diff --git a/.github/instructions/api.instructions.md b/.github/instructions/api.instructions.md index 9ebbca39c..e29553d9b 100644 --- a/.github/instructions/api.instructions.md +++ b/.github/instructions/api.instructions.md @@ -109,32 +109,66 @@ responses=shared_responses # Defines 4xx/5xx response schemas ## Worker Integration -### Job Pipeline -Many operations chain through multiple worker jobs: -1. `create_variants_for_score_set` — Parse uploaded CSV, create variant records -2. `map_variants_for_score_set` — Map variants via DCD Mapping / VRS -3. `submit_score_set_mappings_to_*` — Submit to ClinGen services +### Pipeline System + +Most write operations trigger a multi-step pipeline via the worker: + +```python +from mavedb.lib.workflow.pipeline_factory import PipelineFactory + +# In a router endpoint: +pipeline, entrypoint_job_run = PipelineFactory.create_pipeline( + db=db, + name="validate_map_annotate_score_set", + pipeline_params={ + "score_set_id": score_set.id, + "updater_id": user_data.user.id, + "correlation_id": logging_context().get("correlation_id"), + }, +) +db.commit() + +await worker.enqueue_job("start_pipeline", entrypoint_job_run.id) +``` + +This creates a `Pipeline` with multiple `JobRun` records and `JobDependency` records, then enqueues the pipeline's `start_pipeline` entrypoint in ARQ. The worker coordinates the rest — each job runs after its dependencies complete. + +### Job Function Signature + +All job functions follow this signature (the decorator injects `job_manager`): -### Job Patterns ```python -async def create_variants_for_score_set(ctx: dict, score_set_id: int, correlation_id: str): - logging_context = setup_job_state(ctx, correlation_id) - db = ctx["db"] - - try: - # ... processing ... - pass - except Exception as e: - send_slack_error(e, logging_context) - raise +@with_pipeline_management +async def create_variants_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: + job = job_manager.get_job() + validate_job_params(["score_set_id", "correlation_id", "updater_id"], job) + # ... business logic using job_manager.db ... + return JobExecutionOutcome.succeeded(data={"variants_created": count}) ``` -### Backoff and Retry -Use `enqueue_job_with_backoff()` for jobs that may need retries (e.g., external service calls). +Callers pass only `ctx` and `job_id` when enqueueing. The decorator creates the `JobManager` from the `job_id`. + +### Correlation IDs + +Correlation IDs flow from the API request through the pipeline to each job: -## Correlation IDs -Every request gets a correlation ID via starlette-context middleware. Pass it to worker jobs for end-to-end request tracing: ```python -from mavedb.lib.logging.context import save_to_logging_context -correlation_id = save_to_logging_context({"score_set_urn": urn}) +# In the router — capture correlation ID from starlette-context +from mavedb.lib.logging.context import save_to_logging_context, logging_context + +save_to_logging_context({"score_set_urn": urn}) +correlation_id = logging_context().get("correlation_id") + +# Pass to pipeline via pipeline_params +pipeline, entrypoint = PipelineFactory.create_pipeline( + db=db, + name="validate_map_annotate_score_set", + pipeline_params={"correlation_id": correlation_id, ...}, +) ``` + +Each job retrieves the correlation ID from its `job_params` and uses `job_manager.save_to_context()` for structured logging. + +For detailed worker conventions, see `.github/instructions/worker.instructions.md` and `src/mavedb/worker/README.md`. diff --git a/.github/instructions/copilot-instructions.md b/.github/instructions/copilot-instructions.md index 10f50ab51..089220c66 100644 --- a/.github/instructions/copilot-instructions.md +++ b/.github/instructions/copilot-instructions.md @@ -38,9 +38,17 @@ src/mavedb/ ├── models/ # SQLAlchemy ORM models ├── view_models/ # Pydantic request/response models ├── routers/ # API endpoint handlers -├── worker/ # ARQ background jobs -│ ├── jobs.py # Job implementations -│ └── settings.py # Worker config, function registry, cron jobs +├── worker/ # ARQ background worker system +│ ├── jobs/ # Job function implementations (by category) +│ │ ├── registry.py # Central registry of all jobs, cron definitions +│ │ ├── variant_processing/ # Variant creation and mapping +│ │ ├── external_services/ # ClinGen, ClinVar, gnomAD, UniProt +│ │ ├── pipeline_management/ # Pipeline entrypoint (start_pipeline) +│ │ └── system/ # Cron maintenance (cleanup stalled jobs) +│ ├── lib/ # Infrastructure layer +│ │ ├── decorators/ # @with_pipeline_management, @with_job_management +│ │ └── managers/ # JobManager, PipelineManager state management +│ └── settings/ # ARQ worker config, lifecycle hooks ├── lib/ # Shared utilities │ ├── authentication.py # ORCID JWT + API key auth │ ├── authorization.py # Permission checks @@ -94,7 +102,7 @@ Do not comment obvious operations, variable assignments, or code that is self-ex - **Structured logging**: Use `logger` with `extra=logging_context()` for correlation IDs via starlette-context - **HTTP exceptions**: FastAPI `HTTPException` with appropriate status codes - **Domain exceptions**: `src/mavedb/lib/exceptions.py` — `MixedTargetError`, `NonexistentOrcidError`, etc. -- **Worker errors**: `send_slack_error()` + full logging context +- **Worker errors**: `send_slack_job_error()` or `send_slack_job_error()` + full logging context - **Validation errors**: Two distinct classes exist: - `src/mavedb/lib/validation/exceptions.py` — validation package exceptions - `src/mavedb/lib/exceptions.py` — legacy `ValidationError` (Django-style, used in some older code) @@ -140,3 +148,109 @@ poetry run python -m mavedb.scripts. - [server_main.py](src/mavedb/server_main.py) — App setup and dependency injection - [authentication.py](src/mavedb/lib/authentication.py) — Auth patterns - [conftest.py](tests/conftest.py) — Test fixtures and database setup + +### Naming Conventions +- **Variables & functions**: `snake_case` (e.g., `score_set_id`, `create_variants_for_score_set`) +- **Classes**: `PascalCase` (e.g., `ScoreSet`, `UserData`, `ProcessingState`) +- **Constants**: `UPPER_SNAKE_CASE` (e.g., `MAPPING_QUEUE_NAME`, `DEFAULT_LDH_SUBMISSION_BATCH_SIZE`) +- **Enum values**: `snake_case` (e.g., `ProcessingState.success`, `MappingState.incomplete`) +- **Database tables**: `snake_case` with descriptive association table names (e.g., `scoreset_contributors`, `experiment_set_doi_identifiers`) +- **API endpoints**: kebab-case paths (e.g., `/score-sets`, `/experiment-sets`) + +### Documentation Conventions +*For general Python documentation standards, see `.github/instructions/python.instructions.md`. The following are MaveDB-specific additions:* + +- **Algorithm explanations**: Include comments explaining complex logic, especially URN generation and bioinformatics operations +- **Design decisions**: Comment on why certain architectural choices were made +- **External dependencies**: Explain purpose of external bioinformatics libraries (HGVS, SeqRepo, etc.) +- **Bioinformatics context**: Document biological reasoning behind genomic data processing patterns + +### Commenting Guidelines +**Core Principle: Write self-explanatory code. Comment only to explain WHY, not WHAT.** + +**✅ WRITE Comments For:** +- **Complex bioinformatics algorithms**: Variant mapping algorithms, external service interactions +- **Business logic**: Why specific validation rules exist, regulatory requirements +- **External API constraints**: Rate limits, data format requirements +- **Non-obvious calculations**: Score normalization, statistical methods +- **Configuration values**: Why specific timeouts, batch sizes, or thresholds were chosen + +**❌ AVOID Comments For:** +- **Obvious operations**: Variable assignments, simple loops, basic conditionals +- **Redundant descriptions**: Comments that repeat what the code clearly shows +- **Outdated information**: Comments that don't match current implementation + +### Error Handling Conventions +- **Structured logging**: Always use `logger` with `extra=logging_context()` for correlation IDs +- **HTTP exceptions**: Use FastAPI `HTTPException` with appropriate status codes and descriptive messages +- **Custom exceptions**: Define domain-specific exceptions in `src/mavedb/lib/exceptions.py` +- **Worker job errors**: Send Slack notifications via `send_slack_job_error()` or `send_slack_job_failure()` and log with full context +- **Validation errors**: Use Pydantic validators and raise `ValueError` with clear messages + +### Code Style and Organization Conventions +*For general Python style conventions, see `.github/instructions/python.instructions.md`. The following are MaveDB-specific patterns:* + +- **Async patterns**: Use `async def` for I/O operations, regular functions for CPU-bound work +- **Database operations**: Use SQLAlchemy 2.0 style with `session.scalars(select(...)).one()` +- **Pydantic models**: Separate request/response models with clear inheritance hierarchies +- **Bioinformatics data flow**: Structure code to clearly show genomic data transformations + +### Testing Conventions +*For testing philosophy, mocking boundaries, and conventions see `.github/instructions/testing.instructions.md`. For general Python testing standards, see `.github/instructions/python.instructions.md`. The following are MaveDB-specific patterns:* + +- **Test function naming**: Use descriptive names that reflect bioinformatics operations (e.g., `test_cannot_publish_score_set_without_variants`) +- **Fixtures**: Use `conftest.py` for shared fixtures, especially database and worker setup +- **Mocking**: Mock only at system boundaries (external services, Redis/ARQ, Slack). Do not mock internal helpers or `update_progress` +- **Constants**: Define test data including genomic sequences and variants in `tests/helpers/constants.py` +- **Integration testing**: Test full bioinformatics workflows including external service interactions + +## Codebase Conventions + +### URN Validation +- Use regex patterns from `src/mavedb/lib/validation/urn_re.py` +- Validate URNs in Pydantic models with `@field_validator` +- URN generation logic in `src/mavedb/lib/urns.py` and `temp_urns.py` + +### Worker Jobs (ARQ/Redis) +- **Two-layer architecture**: Infrastructure (decorators + managers) handles lifecycle/state; business layer (jobs/) implements domain logic +- **Job registry**: All jobs registered in `src/mavedb/worker/jobs/registry.py` — `BACKGROUND_FUNCTIONS`, `BACKGROUND_CRONJOBS`, `STANDALONE_JOB_DEFINITIONS` +- **Job function signature**: `async def job_name(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome` — `job_manager` is injected by the decorator, not passed by callers +- **Decorators**: `@with_pipeline_management` (most jobs), `@with_job_management` (standalone), `@with_guaranteed_job_run_record` (cron/auto-created JobRun) +- **Pipeline system**: `PipelineFactory.create_pipeline()` creates Pipeline + JobRun + JobDependency records from definitions in `src/mavedb/lib/workflow/definitions.py` +- **Session management**: Task-local DB sessions via `ContextVar` prevent concurrent ARQ jobs from sharing sessions +- **Commit discipline**: Decorators commit lifecycle state changes; `update_progress()` commits as a checkpoint; job code should NOT commit +- **Key job types**: + - `create_variants_for_score_set` - Parse uploaded CSV, create variant records + - `map_variants_for_score_set` - Map variants via DCD Mapping / VRS + - `submit_score_set_mappings_to_car/ldh` - Submit to ClinGen services + - `cleanup_stalled_jobs` - Cron job for recovering stuck jobs +- **Enqueueing pipelines**: Routers call `PipelineFactory.create_pipeline()` then `ArqRedis.enqueue_job("start_pipeline", ...)` with the pipeline's entrypoint JobRun ID +- **Detailed documentation**: See `src/mavedb/worker/README.md` and `.github/instructions/worker.instructions.md` + +### View Models (Pydantic) +- **Base model** (`src/mavedb/view_models/base/base.py`) converts empty strings to None and uses camelCase aliases +- **Inheritance patterns**: `Base` → `Create` → `Modify` → `Saved` model hierarchy +- **Field validation**: Use `@field_validator` for single fields, `@model_validator(mode="after")` for cross-field validation +- **URN validation**: Validate URNs with regex patterns from `urn_re.py` in field validators +- **Transform functions**: Use functions in `validation/transform.py` for complex data transformations +- **Separate models**: Request (`Create`, `Modify`) vs response (`Saved`) models with different field requirements + +### External Integrations +- **HGVS/SeqRepo** for genomic sequence operations +- **DCD Mapping** for variant mapping and VRS transformation +- **CDOT** for transcript/genomic coordinate conversion +- **GA4GH VRS** for variant representation standardization +- **ClinGen services** for allele registry and linked data hub submissions + +## Key Files to Reference +- `src/mavedb/models/score_set.py` - Primary data model patterns +- `src/mavedb/routers/score_sets.py` - Complex router with worker integration +- `src/mavedb/worker/jobs/registry.py` - Job registration and available functions +- `src/mavedb/worker/jobs/variant_processing/creation.py` - Reference pipeline job implementation +- `src/mavedb/lib/workflow/definitions.py` - Pipeline and job definitions +- `src/mavedb/view_models/score_set.py` - Pydantic model hierarchy examples +- `src/mavedb/server_main.py` - Application setup and dependency injection +- `src/mavedb/data_providers/services.py` - External service integration patterns +- `src/mavedb/lib/authentication.py` - Authentication and authorization patterns +- `tests/conftest.py` - Test fixtures and database setup +- `docker-compose-dev.yml` - Service architecture and dependencies diff --git a/.github/instructions/testing.instructions.md b/.github/instructions/testing.instructions.md index ae8dfcf65..4fd1b1ce6 100644 --- a/.github/instructions/testing.instructions.md +++ b/.github/instructions/testing.instructions.md @@ -1,121 +1,88 @@ --- -description: 'MaveDB testing conventions — fixtures, mocking, test data patterns' +description: 'Testing philosophy and conventions for the MaveDB API' applyTo: 'tests/**/*.py' --- -# Testing Conventions for MaveDB +# Testing Conventions -## Test Infrastructure +## Outcome-Based Testing -### Database -- **pytest-postgresql** provides ephemeral PostgreSQL instances per test session -- Database schema is created from SQLAlchemy models via `Base.metadata.create_all()` -- Each test gets a clean transaction that rolls back after completion -- Core fixtures live in `tests/conftest.py` +Test what code does (return values, DB state, external boundary calls), not how it does it (internal method calls, message strings, call sequences). Tests should survive internal refactoring without changes. -### Network Isolation -- **pytest-socket** blocks real network calls in tests -- External services (HGVS, SeqRepo, DCD Mapping, ClinGen) must be mocked +**Assert on:** +- Return values and response objects +- DB state changes (query for created/updated/deleted records) +- External boundary calls (see Mocking Boundaries below) -## Fixtures +**Do not assert on:** +- Internal function invocations (e.g., that a helper was called with specific args) +- Call counts or call sequences on internal methods +- Log or progress message strings + +## Mocking Boundaries + +Only mock at system boundaries — the edges where your code talks to something external: +- External services (APIs, third-party clients) +- Infrastructure (Redis/ARQ, Slack, email) +- Network I/O (`run_in_executor`, HTTP clients) +- File I/O (S3, local filesystem in tests) + +Do NOT mock internal helpers, validators, or data transforms. Test through them. + +## Unit vs Integration Test Responsibilities + +**Unit tests:** Edge cases, error paths, invalid inputs, boundary conditions. Use mocked external services. + +**Integration tests:** Happy paths, end-to-end workflows, DB state verification. Use real DB with test fixtures. -### Two-Tier conftest -- `tests/conftest.py` — Core fixtures: database session, auth overrides, user contexts, API client -- `tests//conftest.py` — Module-specific fixtures for that test directory - -### Auth Fixtures -Four pre-configured user contexts: -- **Default user** — standard authenticated user (test ORCID) -- **Anonymous user** — unauthenticated -- **Extra user** — second authenticated user (for permission tests) -- **Admin user** — user with admin role - -### DependencyOverrider -Switch auth context mid-test using the `DependencyOverrider` context manager: -```python -with DependencyOverrider(app, {get_current_user: lambda: admin_user}): - response = client.get("/api/v1/score-sets/private-urn") - assert response.status_code == 200 -``` - -## Test Data Constants - -All test constants live in `tests/helpers/constants.py` with naming conventions: - -| Prefix | Purpose | Example | -|--------|---------|---------| -| `VALID_*` | Valid input values | `VALID_ACCESSION`, `VALID_GENE_NAME` | -| `TEST_*` | Complete test objects (dicts) | `TEST_SCORE_SET`, `TEST_EXPERIMENT` | -| `TEST_MINIMAL_*` | Minimal valid objects | `TEST_MINIMAL_SCORE_SET` | -| `SAVED_*` | Expected shapes after save | `SAVED_SCORE_SET` | -| `*_RESPONSE` | Expected API response shapes | `SCORE_SET_RESPONSE` | +## Assertion Best Practices + +- Use `session.refresh()` before asserting on modified ORM objects +- Add custom assertion messages to complex assertions where the failure message wouldn't immediately clarify what went wrong +- Include negative assertions where appropriate (verify unwanted records don't exist) +- Don't add messages to trivially clear assertions like `assert len(variants) == 0` ## Test Naming -Use descriptive names that reflect the operation and expected outcome: -```python -def test_cannot_publish_score_set_without_variants(): ... -def test_admin_can_view_private_score_set(): ... -def test_create_experiment_with_invalid_urn_returns_422(): ... -``` - -## Mocking External Services - -Always mock external bioinformatics services: -```python -from unittest.mock import patch - -@patch("mavedb.data_providers.services.cdot_rest") -@patch("mavedb.worker.jobs.map_variants_for_score_set") -def test_publish_enqueues_mapping(mock_map, mock_cdot, client, db): - ... -``` - -Common mock targets: -- `mavedb.data_providers.services.cdot_rest` -- `mavedb.worker.jobs.*` (individual job functions) -- `mavedb.lib.authentication.get_current_user` -- HGVS/SeqRepo data providers - -## Helper Factories - -Use factory functions in test helpers to create test objects: -```python -from tests.helpers.constants import TEST_SCORE_SET - -def create_score_set(client, payload=TEST_SCORE_SET): - response = client.post("/api/v1/score-sets/", json=payload) - assert response.status_code == 201 - return response.json() -``` - -## Testing Patterns - -### Permission Testing -Test both allowed and denied access for each role: -```python -def test_owner_can_update_draft(client, db): - ... - -def test_non_owner_cannot_update_draft(client, db): - with DependencyOverrider(app, {get_current_user: lambda: other_user}): - response = client.put(f"/api/v1/score-sets/{urn}", json=update_data) - assert response.status_code == 404 # 404, not 403 -``` - -### Worker Job Testing -Test job logic directly, not through the API: -```python -async def test_create_variants_processes_csv(db, score_set): - ctx = {"db": db} - await create_variants_for_score_set(ctx, score_set.id, "test-correlation-id") - assert score_set.num_variants > 0 -``` - -### Schema Validation -Verify that response shapes match view models: -```python -def test_score_set_response_has_record_type(client): - response = client.get(f"/api/v1/score-sets/{urn}") - assert response.json()["recordType"] == "score_set" -``` +Use the pattern: `test___` + +Examples: +- `test_submit_to_car_when_disabled_skips_submission` +- `test_create_score_set_returns_422_when_missing_target` + +Apply to tests being modified; don't rename all tests at once. + +## Parametrization + +Use `@pytest.mark.parametrize` with descriptive `ids` when the same logic is tested across multiple states. Prefer parametrization over copy-pasting near-identical tests. + +## Fixtures + +- Keep fixtures minimal and composable +- Define fixtures in the most specific `conftest.py` where they're needed +- Don't duplicate fixtures across test classes — lift shared ones to the nearest common conftest +- Use factory fixtures when tests need variants of the same object + +--- + +# Worker-Specific Conventions + +The following conventions apply specifically to `tests/worker/`. + +## Job Test Assertions + +- Assert on `JobExecutionOutcome.status` and `.data` for every job test +- Assert on DB state changes for the domain objects the job modifies +- For external service jobs: assert boundary calls (ClinGen CAR/LDH, UniProt, gnomAD/Athena, S3, ClinVar) + +## Let `update_progress` Run Unpatched + +`update_progress()` calls `session.commit()` as a checkpoint. This is production behavior and should execute in tests. Letting it run means tests verify that checkpoint commits don't break state or interfere with final outcomes. Don't patch it, don't mock it, don't assert on its messages. + +## TransactionSpy Usage + +**USE in manager/decorator tests** (e.g., `test_job_manager.py`, `test_pipeline_manager.py`): The commit/rollback boundary IS the contract here. If someone removes a commit, data silently won't persist in production. DB state checks alone can't catch this because the test session may auto-commit on teardown. + +**USE `mock_database_flush_failure` / `mock_database_rollback_failure`**: These simulate DB errors that are genuinely hard to reproduce otherwise. Valuable for testing error recovery paths in infrastructure code. + +**DO NOT USE in job-level tests** (e.g., `test_clingen.py`, `test_cleanup.py`, `test_creation.py`): The job's contract is "variants were created" or "stalled jobs were retried," not "session.commit() was called." Use DB state queries instead. diff --git a/.github/instructions/worker.instructions.md b/.github/instructions/worker.instructions.md new file mode 100644 index 000000000..16b6e19f0 --- /dev/null +++ b/.github/instructions/worker.instructions.md @@ -0,0 +1,191 @@ +--- +description: 'MaveDB worker patterns — jobs, decorators, managers, pipelines' +applyTo: 'src/mavedb/worker/**/*.py' +--- + +# Worker Conventions for MaveDB + +*For comprehensive documentation with walkthroughs and examples, see `src/mavedb/worker/README.md` and linked docs.* + +## Architecture + +The worker is a two-layer system: + +- **Infrastructure layer** (`lib/decorators/`, `lib/managers/`): Handles job lifecycle, state persistence, error recovery, pipeline coordination. Developers rarely modify this. +- **Business layer** (`jobs/`): Implements domain logic (variant creation, mapping, external service calls). This is where most new code goes. + +Decorators bridge the two layers. Job functions focus purely on business logic and return a `JobExecutionOutcome`. Decorators handle lifecycle state, commits, error recovery, and pipeline coordination automatically. + +## Job Function Contract + +Every job function follows this signature: + +```python +@with_pipeline_management +async def my_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: +``` + +- `ctx`: ARQ context dict containing `db`, `redis`, `hdp` (HGVS data provider), `pool`, `state` +- `job_id`: `JobRun.id` from the database (passed by the caller / ARQ) +- `job_manager`: Injected by the decorator — **NOT passed by the caller** +- Return: Always a `JobExecutionOutcome` via its factory methods + +**Callers enqueue jobs with only the job function name and `job_id`.** The decorator injects `job_manager` as a keyword argument before the function executes. + +## Decorator Rules + +| Decorator | Use For | Stacking | +|-----------|---------|----------| +| `@with_pipeline_management` | Jobs that belong (or may belong) to a pipeline | Use alone — it wraps `@with_job_management` internally | +| `@with_job_management` | Standalone jobs without pipeline coordination | Use alone or under `@with_guaranteed_job_run_record` | +| `@with_guaranteed_job_run_record(type)` | Cron/standalone jobs that need a `JobRun` record auto-created | Must be outermost; stack with `@with_job_management` only | + +**Most jobs use `@with_pipeline_management`** because it works for both pipeline and non-pipeline jobs. If the job has no pipeline association, the decorator simply skips coordination. + +`@with_guaranteed_job_run_record` is **NOT compatible** with `@with_pipeline_management`. It is only for standalone/cron jobs. + +### Decorator internals + +- All decorators become no-ops when `MAVEDB_TEST_MODE=1` (checked via `is_test_mode()`). This allows tests to call job functions directly with a pre-built `JobManager`. +- `ensure_session_ctx()` creates a task-local DB session via `ContextVar`, preventing concurrent ARQ jobs from sharing or closing each other's sessions. +- `with_pipeline_management` wraps `with_job_management` internally by calling `with_job_management(func)` inside `_execute_managed_pipeline`. Do not stack them manually. + +## JobManager API (What Job Code Uses) + +From within a job function, use `job_manager` for: + +```python +# Access the job's DB record and parameters +job = job_manager.get_job() # Returns JobRun ORM object +params = job.job_params # Dict of job parameters (JSONB) + +# Access the database session +score_set = job_manager.db.scalars(select(ScoreSet).where(...)).one() + +# Progress tracking (each call commits as a checkpoint by default) +job_manager.update_progress(current, total, message) +job_manager.update_progress(50, 100, "Halfway done", commit=False) # Skip checkpoint + +# Logging context +job_manager.save_to_context({"score_set_id": 123, "correlation_id": "abc"}) +logger.info("Processing", extra=job_manager.logging_context()) +``` + +**Do not call** `start_job()`, `succeed_job()`, `fail_job()`, `error_job()`, or `complete_job()` from job code. The decorator handles these based on the `JobExecutionOutcome` you return. + +## Session & Commit Discipline + +- **Decorators handle commits** for job lifecycle state transitions (start, complete, fail, retry) +- **`update_progress()` commits by default** as a checkpoint — this commits ALL pending session changes, so call it only at safe transaction boundaries. Pass `commit=False` to skip. +- **Job code should NOT call `db.commit()`** — use `db.flush()` if you need generated IDs before the decorator commits +- **PipelineManager commits before its async Redis enqueue loop** to release PostgreSQL row locks and prevent deadlocks (psycopg2 is synchronous, so a blocked UPDATE would freeze asyncio) + +## Return Values (JobExecutionOutcome) + +Always return using factory methods: + +```python +return JobExecutionOutcome.succeeded(data={"variants_created": count}) +return JobExecutionOutcome.failed(reason="No mapped variants found", data={...}) +return JobExecutionOutcome.failed(reason="HGVS parse error", failure_category=FailureCategory.DATA_ERROR) +return JobExecutionOutcome.skipped(data={"reason": "Feature disabled"}) +# For unhandled exceptions: let them propagate — the decorator catches and creates .errored() +``` + +The optional `failure_category` parameter on `.failed()` and `.errored()` controls retry eligibility. Categories in `RETRYABLE_FAILURE_CATEGORIES` (e.g., `NETWORK_ERROR`, `TIMEOUT`, `SERVICE_UNAVAILABLE`) enable automatic retries. When a job doesn't set an explicit category, the decorator classifies unhandled exceptions via `classify_exception()` (e.g., `ConnectionError` → `NETWORK_ERROR`). Unclassifiable exceptions default to `UNKNOWN` (not retryable). + +**Do not return `.errored()` from job code.** Let unhandled exceptions propagate; the decorator catches them, classifies the failure, marks the job as ERRORED, sends Slack alerts, and handles retry logic. + +## Parameter Access Pattern + +Job parameters live in `JobRun.job_params` (JSONB column), not in function arguments: + +```python +job = job_manager.get_job() + +_job_required_params = ["score_set_id", "correlation_id", "updater_id"] +validate_job_params(_job_required_params, job) + +score_set_id = job.job_params["score_set_id"] +correlation_id = job.job_params["correlation_id"] +``` + +Always call `validate_job_params()` (from `worker.jobs.utils.setup`) before accessing params. + +Parameters with `None` values in pipeline definitions are filled at runtime from `pipeline_params` passed by the router/script when creating the pipeline. + +## Error Handling + +- **Business failures** (validation errors, missing data): Return `JobExecutionOutcome.failed(reason=..., failure_category=...)` with an explicit `FailureCategory` for retry control. +- **Unhandled exceptions**: Let them propagate. The decorator catches them, classifies the exception via `classify_exception()`, marks the job as ERRORED, sends a Slack alert, and evaluates retry eligibility. +- **External service disabled/unavailable**: Return `JobExecutionOutcome.skipped()` if a config check shows the service is disabled. Let connection errors propagate for retry handling. +- **Retry eligibility**: Determined by `should_retry()` which checks `retry_count < max_retries` and `failure_category in RETRYABLE_FAILURE_CATEGORIES`. +- **Failure classification**: `classify_exception()` in `utils.py` maps infrastructure exceptions to categories (`ConnectionError` → `NETWORK_ERROR`, `TimeoutError` → `TIMEOUT`, `OSError` → `NETWORK_ERROR`). Unmapped exceptions default to `UNKNOWN`. Job-level explicit `failure_category` on the outcome takes priority over automatic classification. +- **Slack safety**: `send_slack_job_failure()` and `send_slack_job_error()` catch their own exceptions internally (logging critical on failure), so Slack outages never interfere with job lifecycle management or error recovery in the decorators. +- **Stale RUNNING recovery**: `start_job()` accepts RUNNING as a startable status (alongside QUEUED and PENDING). When ARQ re-delivers a job after a worker crash, `start_job()` logs a warning and resets the start timestamp rather than raising `JobTransitionError`. +- **Concurrency limit**: `max_jobs = 2` in `ArqWorkerSettings` prevents event loop starvation from sync psycopg2 DB calls. With the default `max_jobs=10`, multiple concurrent jobs issuing blocking DB operations can starve the asyncio event loop. + +## Pipeline Lifecycle (Brief) + +1. Router calls `PipelineFactory.create_pipeline()` → creates `Pipeline`, `JobRun`, and `JobDependency` records +2. Router enqueues the `start_pipeline` entrypoint job in ARQ +3. `start_pipeline` runs → its `@with_pipeline_management` decorator starts the pipeline and calls `coordinate_pipeline()` +4. `coordinate_pipeline()` finds PENDING jobs whose dependencies are met → marks them QUEUED → enqueues in ARQ +5. Each job runs → after completion, its decorator calls `coordinate_pipeline()` again +6. Cycle repeats until all jobs complete or the pipeline fails/is cancelled + +Pipeline definitions live in `src/mavedb/lib/workflow/definitions.py`. The `PipelineFactory` (in `src/mavedb/lib/workflow/pipeline_factory.py`) reads these definitions and creates the DB records. + +*For full details, see `src/mavedb/worker/pipeline_management.md`.* + +## Adding a New Pipeline Job + +1. Create the job function in `src/mavedb/worker/jobs//.py` +2. Decorate with `@with_pipeline_management` +3. Follow the signature: `async def job_name(ctx, job_id, job_manager) -> JobExecutionOutcome` +4. Export from the category's `__init__.py` +5. Register in `src/mavedb/worker/jobs/registry.py` → add to `BACKGROUND_FUNCTIONS` +6. Add a `JobDefinition` entry to the relevant pipeline in `src/mavedb/lib/workflow/definitions.py` + +## Adding a Standalone/Cron Job + +1. Create the job function in `src/mavedb/worker/jobs//.py` +2. Stack `@with_guaranteed_job_run_record("job_type")` (outer) + `@with_job_management` (inner) +3. Export from the category's `__init__.py` +4. Register in `src/mavedb/worker/jobs/registry.py` → add to `BACKGROUND_FUNCTIONS` +5. For cron: also add to `BACKGROUND_CRONJOBS` with schedule +6. Optionally add to `STANDALONE_JOB_DEFINITIONS` if the job needs to be invoked via operational scripts + +## Testing + +- Decorators are no-ops in test mode (`MAVEDB_TEST_MODE=1`). Tests call job functions directly, passing a real `JobManager` instance. +- Assert on `JobExecutionOutcome.status` and `.data` for every job test. +- Assert on DB state changes (query for created/updated/deleted records). +- Let `update_progress()` run unpatched — its commit behavior is production behavior that should be tested. +- Mock only at system boundaries (external APIs, S3, Slack). Do not mock internal helpers. +- Use `TransactionSpy` in manager/decorator tests only, not in job-level tests. + +*For full testing conventions, see `.github/instructions/testing.instructions.md`.* + +## Key Reference Files + +| File | Purpose | +|------|---------| +| `jobs/registry.py` | All registered job functions, cron definitions, standalone definitions | +| `jobs/variant_processing/creation.py` | Reference pipeline job implementation | +| `jobs/system/cleanup.py` | Reference standalone cron job implementation | +| `lib/decorators/pipeline_management.py` | Pipeline decorator (coordinates after job completion) | +| `lib/decorators/job_management.py` | Job lifecycle decorator (start/complete/error handling) | +| `lib/decorators/utils.py` | Session management (`ensure_session_ctx`), test mode (`is_test_mode`) | +| `lib/managers/job_manager.py` | Job state management (used by decorators and job code) | +| `lib/managers/pipeline_manager.py` | Pipeline coordination, dependency resolution, job enqueueing | +| `lib/managers/constants.py` | Status groupings (`TERMINAL_JOB_STATUSES`, `STARTABLE_JOB_STATUSES`, etc.) | +| `lib/managers/exceptions.py` | Exception hierarchy (`JobStateError`, `PipelineCoordinationError`, etc.) | +| `settings/worker.py` | `ArqWorkerSettings` class (ARQ worker configuration) | +| `settings/lifecycle.py` | Worker startup/shutdown hooks, `standalone_ctx()` | +| `src/mavedb/lib/workflow/definitions.py` | Pipeline and job definitions (`PIPELINE_DEFINITIONS`) | +| `src/mavedb/lib/workflow/pipeline_factory.py` | Creates Pipeline + JobRun + JobDependency records | +| `src/mavedb/lib/types/workflow.py` | `JobExecutionOutcome`, `JobDefinition`, `PipelineDefinition` types | +| `src/mavedb/models/job_run.py` | `JobRun` ORM model | +| `src/mavedb/models/pipeline.py` | `Pipeline` ORM model | +| `src/mavedb/models/enums/job_pipeline.py` | `JobStatus`, `PipelineStatus`, `DependencyType`, `FailureCategory` enums | diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 6cb7d18ec..f07da233d 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -1,6 +1,7 @@ -name: Run Tests (On Push) +name: Run Tests on: push: + # Run all tests on main, fast tests on other branches env: LOG_CONFIG: test @@ -50,7 +51,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev - - run: poetry run pytest tests/ + - name: Run fast tests on non-main branches + if: github.event_name == 'push' && github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" + - name: Run full tests on main + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: poetry run pytest tests/ run-tests-3_11: runs-on: ubuntu-latest @@ -66,7 +72,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev --extras server - - run: poetry run pytest tests/ --show-capture=stdout --cov=src + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" --show-capture=stdout + - name: Run all tests with coverage on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ --show-capture=stdout --cov=src run-tests-3_12-core-dependencies: runs-on: ubuntu-latest @@ -80,7 +91,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev - - run: poetry run pytest tests/ + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" + - name: Run all tests on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ run-tests-3_12: runs-on: ubuntu-latest @@ -96,4 +112,9 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev --extras server - - run: poetry run pytest tests/ --show-capture=stdout --cov=src + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" --show-capture=stdout + - name: Run all tests with coverage on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ --show-capture=stdout --cov=src diff --git a/alembic/versions/009570ae0cb0_rename_success_data_to_annotation_.py b/alembic/versions/009570ae0cb0_rename_success_data_to_annotation_.py new file mode 100644 index 000000000..7fa0de6d8 --- /dev/null +++ b/alembic/versions/009570ae0cb0_rename_success_data_to_annotation_.py @@ -0,0 +1,33 @@ +"""rename_success_data_to_annotation_metadata + +Revision ID: 009570ae0cb0 +Revises: 8de33cc35cd7 +Create Date: 2026-04-16 17:26:16.151395 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "009570ae0cb0" +down_revision = "8de33cc35cd7" +branch_labels = None +depends_on = None + + +def upgrade(): + op.alter_column( + "variant_annotation_status", + "success_data", + new_column_name="annotation_metadata", + comment="Structured metadata for the annotation result", + ) + + +def downgrade(): + op.alter_column( + "variant_annotation_status", + "annotation_metadata", + new_column_name="success_data", + comment="Annotation results when successful", + ) diff --git a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py new file mode 100644 index 000000000..9530868cc --- /dev/null +++ b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py @@ -0,0 +1,222 @@ +"""add pipeline and job tracking tables + +Revision ID: 8de33cc35cd7 +Revises: 659999dec5d9 +Create Date: 2026-01-28 10:08:36.906494 + +""" + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "8de33cc35cd7" +down_revision = "659999dec5d9" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "pipelines", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("urn", sa.String(length=255), nullable=True), + sa.Column("name", sa.String(length=500), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("status", sa.String(length=50), nullable=False), + sa.Column("correlation_id", sa.String(length=255), nullable=True), + sa.Column( + "metadata", + postgresql.JSONB(astext_type=sa.Text()), + server_default="{}", + nullable=False, + comment="Flexible metadata storage for pipeline-specific data", + ), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("created_by_user_id", sa.Integer(), nullable=True), + sa.Column("mavedb_version", sa.String(length=50), nullable=True), + sa.CheckConstraint( + "status IN ('created', 'running', 'succeeded', 'failed', 'cancelled', 'paused', 'partial')", + name="ck_pipelines_status_valid", + ), + sa.ForeignKeyConstraint(["created_by_user_id"], ["users.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("urn"), + ) + op.create_index("ix_pipelines_correlation_id", "pipelines", ["correlation_id"], unique=False) + op.create_index("ix_pipelines_created_at", "pipelines", ["created_at"], unique=False) + op.create_index("ix_pipelines_created_by_user_id", "pipelines", ["created_by_user_id"], unique=False) + op.create_index("ix_pipelines_status", "pipelines", ["status"], unique=False) + op.create_table( + "job_runs", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("urn", sa.String(length=255), nullable=True), + sa.Column("job_type", sa.String(length=100), nullable=False), + sa.Column("job_function", sa.String(length=255), nullable=False), + sa.Column("job_params", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("status", sa.String(length=50), nullable=False), + sa.Column("pipeline_id", sa.Integer(), nullable=True), + sa.Column("priority", sa.Integer(), nullable=False), + sa.Column("max_retries", sa.Integer(), nullable=False), + sa.Column("retry_count", sa.Integer(), nullable=False), + sa.Column("retry_delay_seconds", sa.Integer(), nullable=True), + sa.Column("scheduled_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("error_traceback", sa.Text(), nullable=True), + sa.Column("failure_category", sa.String(length=100), nullable=True), + sa.Column("progress_current", sa.Integer(), nullable=True), + sa.Column("progress_total", sa.Integer(), nullable=True), + sa.Column("progress_message", sa.String(length=500), nullable=True), + sa.Column("correlation_id", sa.String(length=255), nullable=True), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), server_default="{}", nullable=False), + sa.Column("mavedb_version", sa.String(length=50), nullable=True), + sa.CheckConstraint( + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'errored', 'cancelled', 'skipped')", + name="ck_job_runs_status_valid", + ), + sa.CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), + sa.CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), + sa.CheckConstraint("retry_count >= 0", name="ck_job_runs_retry_count_positive"), + sa.ForeignKeyConstraint(["pipeline_id"], ["pipelines.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("urn"), + ) + op.create_index("ix_job_runs_correlation_id", "job_runs", ["correlation_id"], unique=False) + op.create_index("ix_job_runs_created_at", "job_runs", ["created_at"], unique=False) + op.create_index("ix_job_runs_job_type", "job_runs", ["job_type"], unique=False) + op.create_index("ix_job_runs_pipeline_id", "job_runs", ["pipeline_id"], unique=False) + op.create_index("ix_job_runs_scheduled_at", "job_runs", ["scheduled_at"], unique=False) + op.create_index("ix_job_runs_status", "job_runs", ["status"], unique=False) + op.create_index("ix_job_runs_status_scheduled", "job_runs", ["status", "scheduled_at"], unique=False) + op.create_table( + "job_dependencies", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("depends_on_job_id", sa.Integer(), nullable=False), + sa.Column("dependency_type", sa.String(length=50), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.CheckConstraint( + "dependency_type IS NULL OR dependency_type IN ('success_required', 'completion_required')", + name="ck_job_dependencies_type_valid", + ), + sa.ForeignKeyConstraint(["depends_on_job_id"], ["job_runs.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["id"], ["job_runs.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id", "depends_on_job_id"), + ) + op.create_index("ix_job_dependencies_created_at", "job_dependencies", ["created_at"], unique=False) + op.create_index("ix_job_dependencies_depends_on_job_id", "job_dependencies", ["depends_on_job_id"], unique=False) + op.create_table( + "variant_annotation_status", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("variant_id", sa.Integer(), nullable=False), + sa.Column( + "annotation_type", + sa.String(length=50), + nullable=False, + comment="Type of annotation: vrs, clinvar, gnomad, etc.", + ), + sa.Column( + "version", + sa.String(length=50), + nullable=True, + comment="Version of the annotation source used (if applicable)", + ), + sa.Column("status", sa.String(length=50), nullable=False, comment="success, failed, skipped, pending"), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("failure_category", sa.String(length=100), nullable=True), + sa.Column( + "success_data", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + comment="Annotation results when successful", + ), + sa.Column( + "current", + sa.Boolean(), + server_default="true", + nullable=False, + comment="Whether this is the current status for the variant and annotation type", + ), + sa.Column("job_run_id", sa.Integer(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.CheckConstraint( + "annotation_type IN ('vrs_mapping', 'clingen_allele_id', 'mapped_hgvs', 'variant_translation', 'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', 'ldh_submission')", + name="ck_variant_annotation_type_valid", + ), + sa.CheckConstraint("status IN ('success', 'failed', 'skipped')", name="ck_variant_annotation_status_valid"), + sa.ForeignKeyConstraint(["job_run_id"], ["job_runs.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint(["variant_id"], ["variants.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + "ix_variant_annotation_status_annotation_type", "variant_annotation_status", ["annotation_type"], unique=False + ) + op.create_index( + "ix_variant_annotation_status_created_at", "variant_annotation_status", ["created_at"], unique=False + ) + op.create_index("ix_variant_annotation_status_current", "variant_annotation_status", ["current"], unique=False) + op.create_index( + "ix_variant_annotation_status_job_run_id", "variant_annotation_status", ["job_run_id"], unique=False + ) + op.create_index("ix_variant_annotation_status_status", "variant_annotation_status", ["status"], unique=False) + op.create_index( + "ix_variant_annotation_status_variant_id", "variant_annotation_status", ["variant_id"], unique=False + ) + op.create_index( + "ix_variant_annotation_status_variant_type_version_current", + "variant_annotation_status", + ["variant_id", "annotation_type", "version", "current"], + unique=False, + ) + op.create_index("ix_variant_annotation_status_version", "variant_annotation_status", ["version"], unique=False) + op.create_index( + "ix_variant_annotation_type_status", "variant_annotation_status", ["annotation_type", "status"], unique=False + ) + op.create_index( + "ix_variant_annotation_variant_type_status", + "variant_annotation_status", + ["variant_id", "annotation_type", "status"], + unique=False, + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("ix_variant_annotation_variant_type_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_type_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_version", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_variant_type_version_current", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_variant_id", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_job_run_id", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_current", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_created_at", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_annotation_type", table_name="variant_annotation_status") + op.drop_table("variant_annotation_status") + op.drop_index("ix_job_dependencies_depends_on_job_id", table_name="job_dependencies") + op.drop_index("ix_job_dependencies_created_at", table_name="job_dependencies") + op.drop_table("job_dependencies") + op.drop_index("ix_job_runs_status_scheduled", table_name="job_runs") + op.drop_index("ix_job_runs_status", table_name="job_runs") + op.drop_index("ix_job_runs_scheduled_at", table_name="job_runs") + op.drop_index("ix_job_runs_pipeline_id", table_name="job_runs") + op.drop_index("ix_job_runs_job_type", table_name="job_runs") + op.drop_index("ix_job_runs_created_at", table_name="job_runs") + op.drop_index("ix_job_runs_correlation_id", table_name="job_runs") + op.drop_table("job_runs") + op.drop_index("ix_pipelines_status", table_name="pipelines") + op.drop_index("ix_pipelines_created_by_user_id", table_name="pipelines") + op.drop_index("ix_pipelines_created_at", table_name="pipelines") + op.drop_index("ix_pipelines_correlation_id", table_name="pipelines") + op.drop_table("pipelines") + # ### end Alembic commands ### diff --git a/alembic/versions/a3b7c9d1e2f4_drop_redundant_variant_annotation_.py b/alembic/versions/a3b7c9d1e2f4_drop_redundant_variant_annotation_.py new file mode 100644 index 000000000..6647ab0df --- /dev/null +++ b/alembic/versions/a3b7c9d1e2f4_drop_redundant_variant_annotation_.py @@ -0,0 +1,61 @@ +"""drop_redundant_variant_annotation_status_indexes + +Revision ID: a3b7c9d1e2f4 +Revises: 009570ae0cb0 +Create Date: 2026-04-20 12:00:00.000000 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "a3b7c9d1e2f4" +down_revision = "009570ae0cb0" +branch_labels = None +depends_on = None + + +# The variant_annotation_status table is append-only and write-heavy. Every +# production query filters on (variant_id, annotation_type, [version], current), +# which is fully served by the composite index +# ix_variant_annotation_status_variant_type_version_current. +# +# The 8 indexes being dropped here are either: +# - single-column prefixes of that composite (redundant), +# - on low-selectivity columns (boolean, 3 enum values), or +# - on columns that are never filtered in any query (status, created_at). +# +# Keeping: the 4-column composite + the job_run_id FK index. + +INDEXES_TO_DROP = [ + "ix_variant_annotation_status_variant_id", + "ix_variant_annotation_status_annotation_type", + "ix_variant_annotation_status_status", + "ix_variant_annotation_status_created_at", + "ix_variant_annotation_variant_type_status", + "ix_variant_annotation_type_status", + "ix_variant_annotation_status_current", + "ix_variant_annotation_status_version", +] + +# Column definitions for downgrade (recreating dropped indexes) +INDEX_COLUMNS = { + "ix_variant_annotation_status_variant_id": ["variant_id"], + "ix_variant_annotation_status_annotation_type": ["annotation_type"], + "ix_variant_annotation_status_status": ["status"], + "ix_variant_annotation_status_created_at": ["created_at"], + "ix_variant_annotation_variant_type_status": ["variant_id", "annotation_type", "status"], + "ix_variant_annotation_type_status": ["annotation_type", "status"], + "ix_variant_annotation_status_current": ["current"], + "ix_variant_annotation_status_version": ["version"], +} + + +def upgrade() -> None: + for index_name in INDEXES_TO_DROP: + op.drop_index(index_name, table_name="variant_annotation_status") + + +def downgrade() -> None: + for index_name, columns in INDEX_COLUMNS.items(): + op.create_index(index_name, "variant_annotation_status", columns) diff --git a/alembic/versions/b5c8d2e4f6a7_simplify_variant_annotation_status_pk.py b/alembic/versions/b5c8d2e4f6a7_simplify_variant_annotation_status_pk.py new file mode 100644 index 000000000..875f63f60 --- /dev/null +++ b/alembic/versions/b5c8d2e4f6a7_simplify_variant_annotation_status_pk.py @@ -0,0 +1,35 @@ +"""simplify variant_annotation_status pk to id only + +Revision ID: b5c8d2e4f6a7 +Revises: a3b7c9d1e2f4 +Create Date: 2026-04-20 + +The composite PK (id, variant_id, annotation_type) is unnecessary because `id` +is already unique (autoincrement serial). Keeping variant_id and annotation_type +in the PK just widens the B-tree on every INSERT with no benefit — no FK +references this composite key. + +This migration drops the composite PK and recreates it on `id` alone. +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "b5c8d2e4f6a7" +down_revision = "a3b7c9d1e2f4" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.drop_constraint("variant_annotation_status_pkey", "variant_annotation_status", type_="primary") + op.create_primary_key("variant_annotation_status_pkey", "variant_annotation_status", ["id"]) + + +def downgrade() -> None: + op.drop_constraint("variant_annotation_status_pkey", "variant_annotation_status", type_="primary") + op.create_primary_key( + "variant_annotation_status_pkey", + "variant_annotation_status", + ["id", "variant_id", "annotation_type"], + ) diff --git a/alembic/versions/c6d9e3f7a8b2_add_annotation_failure_category_check.py b/alembic/versions/c6d9e3f7a8b2_add_annotation_failure_category_check.py new file mode 100644 index 000000000..886a16ccf --- /dev/null +++ b/alembic/versions/c6d9e3f7a8b2_add_annotation_failure_category_check.py @@ -0,0 +1,69 @@ +"""add annotation failure category check constraint + +Revision ID: c6d9e3f7a8b2 +Revises: b5c8d2e4f6a7 +Create Date: 2026-04-20 + +Adds a CHECK constraint on variant_annotation_status.failure_category to enforce +the AnnotationFailureCategory enum values. Also migrates existing free-text +failure_category values to their corresponding enum values. +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "c6d9e3f7a8b2" +down_revision = "b5c8d2e4f6a7" +branch_labels = None +depends_on = None + +# Mapping from old free-text values to new enum values +OLD_TO_NEW = { + "missing_clingen_allele_id": "missing_identifier", + "multi_variant_clingen_allele_id": "unsupported_identifier", + "invalid_allele_format": "unsupported_identifier", + "clingen_api_error": "external_api_error", + "not_found": "external_reference_not_found", + "clingen_allele_not_found": "external_reference_not_found", + "no_associated_clinvar_allele_id": "no_linked_allele", + "no_canonical_pa_ids": "no_linked_allele", + "no_registered_ca_ids": "no_linked_allele", +} + + +def upgrade() -> None: + # Migrate existing free-text values to enum values + for old_value, new_value in OLD_TO_NEW.items(): + op.execute( + f"UPDATE variant_annotation_status SET failure_category = '{new_value}' " + f"WHERE failure_category = '{old_value}'" + ) + + # Set any remaining non-null values that don't match known enum values to 'unknown' + valid_values = "', '".join( + [ + "missing_identifier", + "unsupported_identifier", + "external_api_error", + "external_reference_not_found", + "no_linked_allele", + "unknown", + ] + ) + op.execute( + f"UPDATE variant_annotation_status SET failure_category = 'unknown' " + f"WHERE failure_category IS NOT NULL AND failure_category NOT IN ('{valid_values}')" + ) + + # Add the check constraint + op.create_check_constraint( + "ck_variant_annotation_failure_category_valid", + "variant_annotation_status", + "failure_category IS NULL OR failure_category IN " + "('missing_identifier', 'unsupported_identifier', 'external_api_error', " + "'external_reference_not_found', 'no_linked_allele', 'unknown')", + ) + + +def downgrade() -> None: + op.drop_constraint("ck_variant_annotation_failure_category_valid", "variant_annotation_status", type_="check") diff --git a/alembic/versions/d1f4a2e9c05b_drop_job_run_priority_column.py b/alembic/versions/d1f4a2e9c05b_drop_job_run_priority_column.py new file mode 100644 index 000000000..413453d47 --- /dev/null +++ b/alembic/versions/d1f4a2e9c05b_drop_job_run_priority_column.py @@ -0,0 +1,28 @@ +"""drop job_run priority column + +Revision ID: d1f4a2e9c05b +Revises: c6d9e3f7a8b2 +Create Date: 2026-04-21 00:00:00.000000 + +""" + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "d1f4a2e9c05b" +down_revision = "c6d9e3f7a8b2" +branch_labels = None +depends_on = None + + +def upgrade(): + op.drop_constraint("ck_job_runs_priority_positive", "job_runs", type_="check") + op.drop_column("job_runs", "priority") + + +def downgrade(): + op.add_column("job_runs", sa.Column("priority", sa.Integer(), nullable=False, server_default="0")) + op.create_check_constraint("ck_job_runs_priority_positive", "job_runs", "priority >= 0") + op.alter_column("job_runs", "priority", server_default=None) diff --git a/alembic/versions/e3a7b9f1d2c5_add_external_service_rejected_failure_category.py b/alembic/versions/e3a7b9f1d2c5_add_external_service_rejected_failure_category.py new file mode 100644 index 000000000..25283981f --- /dev/null +++ b/alembic/versions/e3a7b9f1d2c5_add_external_service_rejected_failure_category.py @@ -0,0 +1,52 @@ +"""add external_service_rejected annotation failure category + +Revision ID: e3a7b9f1d2c5 +Revises: d1f4a2e9c05b +Create Date: 2026-04-24 + +Extends the failure_category CHECK constraint on variant_annotation_status to include +'external_service_rejected', which distinguishes explicit rejections by an external +service (e.g. CAR returning InvalidHGVS) from generic API errors (network failures, +timeouts, etc.). +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "e3a7b9f1d2c5" +down_revision = "d1f4a2e9c05b" +branch_labels = None +depends_on = None + +OLD_VALID_VALUES = ( + "('missing_identifier', 'unsupported_identifier', 'external_api_error', " + "'external_reference_not_found', 'no_linked_allele', 'unknown')" +) +NEW_VALID_VALUES = ( + "('missing_identifier', 'unsupported_identifier', 'external_api_error', " + "'external_service_rejected', 'external_reference_not_found', 'no_linked_allele', 'unknown')" +) + + +def upgrade() -> None: + op.drop_constraint("ck_variant_annotation_failure_category_valid", "variant_annotation_status", type_="check") + op.create_check_constraint( + "ck_variant_annotation_failure_category_valid", + "variant_annotation_status", + f"failure_category IS NULL OR failure_category IN {NEW_VALID_VALUES}", + ) + + +def downgrade() -> None: + # Reclassify any 'external_service_rejected' rows back to 'external_api_error' before + # dropping the new value from the constraint. + op.execute( + "UPDATE variant_annotation_status SET failure_category = 'external_api_error' " + "WHERE failure_category = 'external_service_rejected'" + ) + op.drop_constraint("ck_variant_annotation_failure_category_valid", "variant_annotation_status", type_="check") + op.create_check_constraint( + "ck_variant_annotation_failure_category_valid", + "variant_annotation_status", + f"failure_category IS NULL OR failure_category IN {OLD_VALID_VALUES}", + ) diff --git a/bin/localstack-init.sh b/bin/localstack-init.sh new file mode 100755 index 000000000..1a00cfcbc --- /dev/null +++ b/bin/localstack-init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +echo "Initializing local S3 bucket..." +awslocal s3 mb s3://score-set-csv-uploads-dev +echo "S3 bucket 'score-set-csv-uploads-dev' created." \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index d9d430afe..972eb4108 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -95,6 +95,18 @@ services: volumes: - mavedb-redis-dev:/data + localstack: + image: localstack/localstack:latest + ports: + - "4566:4566" + env_file: + - settings/.env.dev + environment: + - SERVICES=s3:4566 # We only need S3 for MaveDB + volumes: + - mavedb-localstack-dev:/var/lib/localstack + - "./bin/localstack-init.sh:/etc/localstack/init/ready.d/localstack-init.sh" + seqrepo: image: biocommons/seqrepo:2024-12-20 volumes: @@ -104,3 +116,4 @@ volumes: mavedb-data-dev: mavedb-redis-dev: mavedb-seqrepo-dev: + mavedb-localstack-dev: diff --git a/mypy_stubs/aiocache/__init__.pyi b/mypy_stubs/aiocache/__init__.pyi new file mode 100644 index 000000000..b25ca6883 --- /dev/null +++ b/mypy_stubs/aiocache/__init__.pyi @@ -0,0 +1,53 @@ +"""Type stubs for aiocache library. + +Provides type hints for the aiocache caching library functionality used in MaveDB. +""" + +from typing import Any, Awaitable, Callable, Optional, Type, TypeVar, Union + +from .base import BaseCache + +# Type variables for decorator +F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) +T = TypeVar("T") + +class Cache: + """Cache factory class for creating cache instances.""" + + # Cache backend constants + REDIS: Type[BaseCache] + MEMORY: Type[BaseCache] + + def __init__( + self, + cache_class: Type[BaseCache], + *, + endpoint: Optional[str] = None, + port: Optional[int] = None, + ssl: bool = False, + namespace: Optional[str] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + **kwargs: Any, + ) -> None: ... + async def get(self, key: str) -> Any: ... + async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool: ... + async def delete(self, key: str) -> bool: ... + async def clear(self, namespace: Optional[str] = None) -> bool: ... + async def close(self) -> None: ... + +def cached( + ttl: Optional[int] = None, + key: Optional[str] = None, + key_builder: Optional[Callable[..., str]] = None, + cache: Union[Type[BaseCache], BaseCache, None] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + alias: Optional[str] = None, + namespace: Optional[str] = None, + noself: bool = False, + skip_cache_func: Optional[Callable[[Any], bool]] = None, + **kwargs: Any, +) -> Callable[[F], F]: ... + +__all__ = ["Cache", "cached"] diff --git a/mypy_stubs/aiocache/base.pyi b/mypy_stubs/aiocache/base.pyi new file mode 100644 index 000000000..dba95550f --- /dev/null +++ b/mypy_stubs/aiocache/base.pyi @@ -0,0 +1,25 @@ +"""Type stubs for aiocache.base module. + +Provides type hints for the base cache class used by aiocache backends. +""" + +from typing import Any, Optional + +class BaseCache: + """Base class for cache backends.""" + + def __init__( + self, + *, + namespace: Optional[str] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + **kwargs: Any, + ) -> None: ... + async def get(self, key: str) -> Any: ... + async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool: ... + async def delete(self, key: str) -> bool: ... + async def clear(self, namespace: Optional[str] = None) -> bool: ... + async def close(self) -> None: ... + +__all__ = ["BaseCache"] diff --git a/poetry.lock b/poetry.lock index 7167c418c..bb6dffcbd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,26 @@ # This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +[[package]] +name = "aiocache" +version = "0.12.3" +description = "multi backend asyncio cache" +optional = true +python-versions = "*" +groups = ["main"] +markers = "extra == \"server\"" +files = [ + {file = "aiocache-0.12.3-py2.py3-none-any.whl", hash = "sha256:889086fc24710f431937b87ad3720a289f7fc31c4fd8b68e9f918b9bacd8270d"}, + {file = "aiocache-0.12.3.tar.gz", hash = "sha256:f528b27bf4d436b497a1d0d1a8f59a542c153ab1e37c3621713cb376d44c4713"}, +] + +[package.dependencies] +redis = {version = ">=4.2.0", optional = true, markers = "extra == \"redis\""} + +[package.extras] +memcached = ["aiomcache (>=0.5.2)"] +msgpack = ["msgpack (>=0.5.5)"] +redis = ["redis (>=4.2.0)"] + [[package]] name = "alembic" version = "1.14.1" @@ -72,14 +93,14 @@ files = [ [[package]] name = "anyio" -version = "4.12.1" +version = "4.13.0" description = "High-level concurrency and networking framework on top of asyncio or Trio" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] files = [ - {file = "anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c"}, - {file = "anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703"}, + {file = "anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708"}, + {file = "anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc"}, ] [package.dependencies] @@ -87,7 +108,7 @@ idna = ">=2.8" typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] -trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""] +trio = ["trio (>=0.32.0)"] [[package]] name = "arq" @@ -140,29 +161,44 @@ files = [ {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, ] +[[package]] +name = "asyncclick" +version = "8.3.0.7" +description = "Composable command line interface toolkit, async fork" +optional = false +python-versions = ">=3.11" +groups = ["main"] +files = [ + {file = "asyncclick-8.3.0.7-py3-none-any.whl", hash = "sha256:7607046de39a3f315867cad818849f973e29d350c10d92f251db3ff7600c6c7d"}, + {file = "asyncclick-8.3.0.7.tar.gz", hash = "sha256:8a80d8ac613098ee6a9a8f0248f60c66c273e22402cf3f115ed7f071acfc71d3"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "attrs" -version = "25.4.0" +version = "26.1.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"}, - {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, + {file = "attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309"}, + {file = "attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32"}, ] [[package]] name = "authlib" -version = "1.6.7" +version = "1.6.11" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0"}, - {file = "authlib-1.6.7.tar.gz", hash = "sha256:dbf10100011d1e1b34048c9d120e83f13b35d69a826ae762b93d2fb5aafc337b"}, + {file = "authlib-1.6.11-py2.py3-none-any.whl", hash = "sha256:c8687a9a26451c51a34a06fa17bb97cb15bba46a6a626755e2d7f50da8bff3e3"}, + {file = "authlib-1.6.11.tar.gz", hash = "sha256:64db35b9b01aeccb4715a6c9a6613a06f2bd7be2ab9d2eb89edd1dfc7580a38f"}, ] [package.dependencies] @@ -277,411 +313,450 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "boto3-stubs" -version = "1.34.162" -description = "Type annotations for boto3 1.34.162 generated with mypy-boto3-builder 7.26.0" +version = "1.42.97" +description = "Type annotations for boto3 1.42.97 generated with mypy-boto3-builder 8.12.0" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "boto3_stubs-1.34.162-py3-none-any.whl", hash = "sha256:47c651272782a2e894082087eeaeb87a7e809e7e282748560cf39c155031abef"}, - {file = "boto3_stubs-1.34.162.tar.gz", hash = "sha256:6d60b7b9652e1c99f3caba00779e1b94ba7062b0431147a00543af8b1f5252f4"}, + {file = "boto3_stubs-1.42.97-py3-none-any.whl", hash = "sha256:47539eaab612d63b5b828657ee0977237725f7608f19563a2ae7f784042411bc"}, + {file = "boto3_stubs-1.42.97.tar.gz", hash = "sha256:f7f4775b0851ff6db0e3fb097064af6437e4de31b797d874a737104998e028c6"}, ] [package.dependencies] botocore-stubs = "*" +mypy-boto3-s3 = {version = ">=1.42.0,<1.43.0", optional = true, markers = "extra == \"s3\""} types-s3transfer = "*" typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} [package.extras] -accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.34.0,<1.35.0)"] -account = ["mypy-boto3-account (>=1.34.0,<1.35.0)"] -acm = ["mypy-boto3-acm (>=1.34.0,<1.35.0)"] -acm-pca = ["mypy-boto3-acm-pca (>=1.34.0,<1.35.0)"] -all = ["mypy-boto3-accessanalyzer (>=1.34.0,<1.35.0)", "mypy-boto3-account (>=1.34.0,<1.35.0)", "mypy-boto3-acm (>=1.34.0,<1.35.0)", "mypy-boto3-acm-pca (>=1.34.0,<1.35.0)", "mypy-boto3-amp (>=1.34.0,<1.35.0)", "mypy-boto3-amplify (>=1.34.0,<1.35.0)", "mypy-boto3-amplifybackend (>=1.34.0,<1.35.0)", "mypy-boto3-amplifyuibuilder (>=1.34.0,<1.35.0)", "mypy-boto3-apigateway (>=1.34.0,<1.35.0)", "mypy-boto3-apigatewaymanagementapi (>=1.34.0,<1.35.0)", "mypy-boto3-apigatewayv2 (>=1.34.0,<1.35.0)", "mypy-boto3-appconfig (>=1.34.0,<1.35.0)", "mypy-boto3-appconfigdata (>=1.34.0,<1.35.0)", "mypy-boto3-appfabric (>=1.34.0,<1.35.0)", "mypy-boto3-appflow (>=1.34.0,<1.35.0)", "mypy-boto3-appintegrations (>=1.34.0,<1.35.0)", "mypy-boto3-application-autoscaling (>=1.34.0,<1.35.0)", "mypy-boto3-application-insights (>=1.34.0,<1.35.0)", "mypy-boto3-application-signals (>=1.34.0,<1.35.0)", "mypy-boto3-applicationcostprofiler (>=1.34.0,<1.35.0)", "mypy-boto3-appmesh (>=1.34.0,<1.35.0)", "mypy-boto3-apprunner (>=1.34.0,<1.35.0)", "mypy-boto3-appstream (>=1.34.0,<1.35.0)", "mypy-boto3-appsync (>=1.34.0,<1.35.0)", "mypy-boto3-apptest (>=1.34.0,<1.35.0)", "mypy-boto3-arc-zonal-shift (>=1.34.0,<1.35.0)", "mypy-boto3-artifact (>=1.34.0,<1.35.0)", "mypy-boto3-athena (>=1.34.0,<1.35.0)", "mypy-boto3-auditmanager (>=1.34.0,<1.35.0)", "mypy-boto3-autoscaling (>=1.34.0,<1.35.0)", "mypy-boto3-autoscaling-plans (>=1.34.0,<1.35.0)", "mypy-boto3-b2bi (>=1.34.0,<1.35.0)", "mypy-boto3-backup (>=1.34.0,<1.35.0)", "mypy-boto3-backup-gateway (>=1.34.0,<1.35.0)", "mypy-boto3-batch (>=1.34.0,<1.35.0)", "mypy-boto3-bcm-data-exports (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-agent (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-agent-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-billingconductor (>=1.34.0,<1.35.0)", "mypy-boto3-braket (>=1.34.0,<1.35.0)", "mypy-boto3-budgets (>=1.34.0,<1.35.0)", "mypy-boto3-ce (>=1.34.0,<1.35.0)", "mypy-boto3-chatbot (>=1.34.0,<1.35.0)", "mypy-boto3-chime (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-identity (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-meetings (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-messaging (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-voice (>=1.34.0,<1.35.0)", "mypy-boto3-cleanrooms (>=1.34.0,<1.35.0)", "mypy-boto3-cleanroomsml (>=1.34.0,<1.35.0)", "mypy-boto3-cloud9 (>=1.34.0,<1.35.0)", "mypy-boto3-cloudcontrol (>=1.34.0,<1.35.0)", "mypy-boto3-clouddirectory (>=1.34.0,<1.35.0)", "mypy-boto3-cloudformation (>=1.34.0,<1.35.0)", "mypy-boto3-cloudfront (>=1.34.0,<1.35.0)", "mypy-boto3-cloudfront-keyvaluestore (>=1.34.0,<1.35.0)", "mypy-boto3-cloudhsm (>=1.34.0,<1.35.0)", "mypy-boto3-cloudhsmv2 (>=1.34.0,<1.35.0)", "mypy-boto3-cloudsearch (>=1.34.0,<1.35.0)", "mypy-boto3-cloudsearchdomain (>=1.34.0,<1.35.0)", "mypy-boto3-cloudtrail (>=1.34.0,<1.35.0)", "mypy-boto3-cloudtrail-data (>=1.34.0,<1.35.0)", "mypy-boto3-cloudwatch (>=1.34.0,<1.35.0)", "mypy-boto3-codeartifact (>=1.34.0,<1.35.0)", "mypy-boto3-codebuild (>=1.34.0,<1.35.0)", "mypy-boto3-codecatalyst (>=1.34.0,<1.35.0)", "mypy-boto3-codecommit (>=1.34.0,<1.35.0)", "mypy-boto3-codeconnections (>=1.34.0,<1.35.0)", "mypy-boto3-codedeploy (>=1.34.0,<1.35.0)", "mypy-boto3-codeguru-reviewer (>=1.34.0,<1.35.0)", "mypy-boto3-codeguru-security (>=1.34.0,<1.35.0)", "mypy-boto3-codeguruprofiler (>=1.34.0,<1.35.0)", "mypy-boto3-codepipeline (>=1.34.0,<1.35.0)", "mypy-boto3-codestar (>=1.34.0,<1.35.0)", "mypy-boto3-codestar-connections (>=1.34.0,<1.35.0)", "mypy-boto3-codestar-notifications (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-identity (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-idp (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-sync (>=1.34.0,<1.35.0)", "mypy-boto3-comprehend (>=1.34.0,<1.35.0)", "mypy-boto3-comprehendmedical (>=1.34.0,<1.35.0)", "mypy-boto3-compute-optimizer (>=1.34.0,<1.35.0)", "mypy-boto3-config (>=1.34.0,<1.35.0)", "mypy-boto3-connect (>=1.34.0,<1.35.0)", "mypy-boto3-connect-contact-lens (>=1.34.0,<1.35.0)", "mypy-boto3-connectcampaigns (>=1.34.0,<1.35.0)", "mypy-boto3-connectcases (>=1.34.0,<1.35.0)", "mypy-boto3-connectparticipant (>=1.34.0,<1.35.0)", "mypy-boto3-controlcatalog (>=1.34.0,<1.35.0)", "mypy-boto3-controltower (>=1.34.0,<1.35.0)", "mypy-boto3-cost-optimization-hub (>=1.34.0,<1.35.0)", "mypy-boto3-cur (>=1.34.0,<1.35.0)", "mypy-boto3-customer-profiles (>=1.34.0,<1.35.0)", "mypy-boto3-databrew (>=1.34.0,<1.35.0)", "mypy-boto3-dataexchange (>=1.34.0,<1.35.0)", "mypy-boto3-datapipeline (>=1.34.0,<1.35.0)", "mypy-boto3-datasync (>=1.34.0,<1.35.0)", "mypy-boto3-datazone (>=1.34.0,<1.35.0)", "mypy-boto3-dax (>=1.34.0,<1.35.0)", "mypy-boto3-deadline (>=1.34.0,<1.35.0)", "mypy-boto3-detective (>=1.34.0,<1.35.0)", "mypy-boto3-devicefarm (>=1.34.0,<1.35.0)", "mypy-boto3-devops-guru (>=1.34.0,<1.35.0)", "mypy-boto3-directconnect (>=1.34.0,<1.35.0)", "mypy-boto3-discovery (>=1.34.0,<1.35.0)", "mypy-boto3-dlm (>=1.34.0,<1.35.0)", "mypy-boto3-dms (>=1.34.0,<1.35.0)", "mypy-boto3-docdb (>=1.34.0,<1.35.0)", "mypy-boto3-docdb-elastic (>=1.34.0,<1.35.0)", "mypy-boto3-drs (>=1.34.0,<1.35.0)", "mypy-boto3-ds (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodb (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodbstreams (>=1.34.0,<1.35.0)", "mypy-boto3-ebs (>=1.34.0,<1.35.0)", "mypy-boto3-ec2 (>=1.34.0,<1.35.0)", "mypy-boto3-ec2-instance-connect (>=1.34.0,<1.35.0)", "mypy-boto3-ecr (>=1.34.0,<1.35.0)", "mypy-boto3-ecr-public (>=1.34.0,<1.35.0)", "mypy-boto3-ecs (>=1.34.0,<1.35.0)", "mypy-boto3-efs (>=1.34.0,<1.35.0)", "mypy-boto3-eks (>=1.34.0,<1.35.0)", "mypy-boto3-eks-auth (>=1.34.0,<1.35.0)", "mypy-boto3-elastic-inference (>=1.34.0,<1.35.0)", "mypy-boto3-elasticache (>=1.34.0,<1.35.0)", "mypy-boto3-elasticbeanstalk (>=1.34.0,<1.35.0)", "mypy-boto3-elastictranscoder (>=1.34.0,<1.35.0)", "mypy-boto3-elb (>=1.34.0,<1.35.0)", "mypy-boto3-elbv2 (>=1.34.0,<1.35.0)", "mypy-boto3-emr (>=1.34.0,<1.35.0)", "mypy-boto3-emr-containers (>=1.34.0,<1.35.0)", "mypy-boto3-emr-serverless (>=1.34.0,<1.35.0)", "mypy-boto3-entityresolution (>=1.34.0,<1.35.0)", "mypy-boto3-es (>=1.34.0,<1.35.0)", "mypy-boto3-events (>=1.34.0,<1.35.0)", "mypy-boto3-evidently (>=1.34.0,<1.35.0)", "mypy-boto3-finspace (>=1.34.0,<1.35.0)", "mypy-boto3-finspace-data (>=1.34.0,<1.35.0)", "mypy-boto3-firehose (>=1.34.0,<1.35.0)", "mypy-boto3-fis (>=1.34.0,<1.35.0)", "mypy-boto3-fms (>=1.34.0,<1.35.0)", "mypy-boto3-forecast (>=1.34.0,<1.35.0)", "mypy-boto3-forecastquery (>=1.34.0,<1.35.0)", "mypy-boto3-frauddetector (>=1.34.0,<1.35.0)", "mypy-boto3-freetier (>=1.34.0,<1.35.0)", "mypy-boto3-fsx (>=1.34.0,<1.35.0)", "mypy-boto3-gamelift (>=1.34.0,<1.35.0)", "mypy-boto3-glacier (>=1.34.0,<1.35.0)", "mypy-boto3-globalaccelerator (>=1.34.0,<1.35.0)", "mypy-boto3-glue (>=1.34.0,<1.35.0)", "mypy-boto3-grafana (>=1.34.0,<1.35.0)", "mypy-boto3-greengrass (>=1.34.0,<1.35.0)", "mypy-boto3-greengrassv2 (>=1.34.0,<1.35.0)", "mypy-boto3-groundstation (>=1.34.0,<1.35.0)", "mypy-boto3-guardduty (>=1.34.0,<1.35.0)", "mypy-boto3-health (>=1.34.0,<1.35.0)", "mypy-boto3-healthlake (>=1.34.0,<1.35.0)", "mypy-boto3-iam (>=1.34.0,<1.35.0)", "mypy-boto3-identitystore (>=1.34.0,<1.35.0)", "mypy-boto3-imagebuilder (>=1.34.0,<1.35.0)", "mypy-boto3-importexport (>=1.34.0,<1.35.0)", "mypy-boto3-inspector (>=1.34.0,<1.35.0)", "mypy-boto3-inspector-scan (>=1.34.0,<1.35.0)", "mypy-boto3-inspector2 (>=1.34.0,<1.35.0)", "mypy-boto3-internetmonitor (>=1.34.0,<1.35.0)", "mypy-boto3-iot (>=1.34.0,<1.35.0)", "mypy-boto3-iot-data (>=1.34.0,<1.35.0)", "mypy-boto3-iot-jobs-data (>=1.34.0,<1.35.0)", "mypy-boto3-iot1click-devices (>=1.34.0,<1.35.0)", "mypy-boto3-iot1click-projects (>=1.34.0,<1.35.0)", "mypy-boto3-iotanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-iotdeviceadvisor (>=1.34.0,<1.35.0)", "mypy-boto3-iotevents (>=1.34.0,<1.35.0)", "mypy-boto3-iotevents-data (>=1.34.0,<1.35.0)", "mypy-boto3-iotfleethub (>=1.34.0,<1.35.0)", "mypy-boto3-iotfleetwise (>=1.34.0,<1.35.0)", "mypy-boto3-iotsecuretunneling (>=1.34.0,<1.35.0)", "mypy-boto3-iotsitewise (>=1.34.0,<1.35.0)", "mypy-boto3-iotthingsgraph (>=1.34.0,<1.35.0)", "mypy-boto3-iottwinmaker (>=1.34.0,<1.35.0)", "mypy-boto3-iotwireless (>=1.34.0,<1.35.0)", "mypy-boto3-ivs (>=1.34.0,<1.35.0)", "mypy-boto3-ivs-realtime (>=1.34.0,<1.35.0)", "mypy-boto3-ivschat (>=1.34.0,<1.35.0)", "mypy-boto3-kafka (>=1.34.0,<1.35.0)", "mypy-boto3-kafkaconnect (>=1.34.0,<1.35.0)", "mypy-boto3-kendra (>=1.34.0,<1.35.0)", "mypy-boto3-kendra-ranking (>=1.34.0,<1.35.0)", "mypy-boto3-keyspaces (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-archived-media (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-media (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-signaling (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisvideo (>=1.34.0,<1.35.0)", "mypy-boto3-kms (>=1.34.0,<1.35.0)", "mypy-boto3-lakeformation (>=1.34.0,<1.35.0)", "mypy-boto3-lambda (>=1.34.0,<1.35.0)", "mypy-boto3-launch-wizard (>=1.34.0,<1.35.0)", "mypy-boto3-lex-models (>=1.34.0,<1.35.0)", "mypy-boto3-lex-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-lexv2-models (>=1.34.0,<1.35.0)", "mypy-boto3-lexv2-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.34.0,<1.35.0)", "mypy-boto3-lightsail (>=1.34.0,<1.35.0)", "mypy-boto3-location (>=1.34.0,<1.35.0)", "mypy-boto3-logs (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutequipment (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutmetrics (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutvision (>=1.34.0,<1.35.0)", "mypy-boto3-m2 (>=1.34.0,<1.35.0)", "mypy-boto3-machinelearning (>=1.34.0,<1.35.0)", "mypy-boto3-macie2 (>=1.34.0,<1.35.0)", "mypy-boto3-mailmanager (>=1.34.0,<1.35.0)", "mypy-boto3-managedblockchain (>=1.34.0,<1.35.0)", "mypy-boto3-managedblockchain-query (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-agreement (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-catalog (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-deployment (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-entitlement (>=1.34.0,<1.35.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-mediaconnect (>=1.34.0,<1.35.0)", "mypy-boto3-mediaconvert (>=1.34.0,<1.35.0)", "mypy-boto3-medialive (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackage (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackage-vod (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackagev2 (>=1.34.0,<1.35.0)", "mypy-boto3-mediastore (>=1.34.0,<1.35.0)", "mypy-boto3-mediastore-data (>=1.34.0,<1.35.0)", "mypy-boto3-mediatailor (>=1.34.0,<1.35.0)", "mypy-boto3-medical-imaging (>=1.34.0,<1.35.0)", "mypy-boto3-memorydb (>=1.34.0,<1.35.0)", "mypy-boto3-meteringmarketplace (>=1.34.0,<1.35.0)", "mypy-boto3-mgh (>=1.34.0,<1.35.0)", "mypy-boto3-mgn (>=1.34.0,<1.35.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhub-config (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhuborchestrator (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhubstrategy (>=1.34.0,<1.35.0)", "mypy-boto3-mq (>=1.34.0,<1.35.0)", "mypy-boto3-mturk (>=1.34.0,<1.35.0)", "mypy-boto3-mwaa (>=1.34.0,<1.35.0)", "mypy-boto3-neptune (>=1.34.0,<1.35.0)", "mypy-boto3-neptune-graph (>=1.34.0,<1.35.0)", "mypy-boto3-neptunedata (>=1.34.0,<1.35.0)", "mypy-boto3-network-firewall (>=1.34.0,<1.35.0)", "mypy-boto3-networkmanager (>=1.34.0,<1.35.0)", "mypy-boto3-networkmonitor (>=1.34.0,<1.35.0)", "mypy-boto3-nimble (>=1.34.0,<1.35.0)", "mypy-boto3-oam (>=1.34.0,<1.35.0)", "mypy-boto3-omics (>=1.34.0,<1.35.0)", "mypy-boto3-opensearch (>=1.34.0,<1.35.0)", "mypy-boto3-opensearchserverless (>=1.34.0,<1.35.0)", "mypy-boto3-opsworks (>=1.34.0,<1.35.0)", "mypy-boto3-opsworkscm (>=1.34.0,<1.35.0)", "mypy-boto3-organizations (>=1.34.0,<1.35.0)", "mypy-boto3-osis (>=1.34.0,<1.35.0)", "mypy-boto3-outposts (>=1.34.0,<1.35.0)", "mypy-boto3-panorama (>=1.34.0,<1.35.0)", "mypy-boto3-payment-cryptography (>=1.34.0,<1.35.0)", "mypy-boto3-payment-cryptography-data (>=1.34.0,<1.35.0)", "mypy-boto3-pca-connector-ad (>=1.34.0,<1.35.0)", "mypy-boto3-pca-connector-scep (>=1.34.0,<1.35.0)", "mypy-boto3-personalize (>=1.34.0,<1.35.0)", "mypy-boto3-personalize-events (>=1.34.0,<1.35.0)", "mypy-boto3-personalize-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-pi (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-email (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-sms-voice (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.34.0,<1.35.0)", "mypy-boto3-pipes (>=1.34.0,<1.35.0)", "mypy-boto3-polly (>=1.34.0,<1.35.0)", "mypy-boto3-pricing (>=1.34.0,<1.35.0)", "mypy-boto3-privatenetworks (>=1.34.0,<1.35.0)", "mypy-boto3-proton (>=1.34.0,<1.35.0)", "mypy-boto3-qapps (>=1.34.0,<1.35.0)", "mypy-boto3-qbusiness (>=1.34.0,<1.35.0)", "mypy-boto3-qconnect (>=1.34.0,<1.35.0)", "mypy-boto3-qldb (>=1.34.0,<1.35.0)", "mypy-boto3-qldb-session (>=1.34.0,<1.35.0)", "mypy-boto3-quicksight (>=1.34.0,<1.35.0)", "mypy-boto3-ram (>=1.34.0,<1.35.0)", "mypy-boto3-rbin (>=1.34.0,<1.35.0)", "mypy-boto3-rds (>=1.34.0,<1.35.0)", "mypy-boto3-rds-data (>=1.34.0,<1.35.0)", "mypy-boto3-redshift (>=1.34.0,<1.35.0)", "mypy-boto3-redshift-data (>=1.34.0,<1.35.0)", "mypy-boto3-redshift-serverless (>=1.34.0,<1.35.0)", "mypy-boto3-rekognition (>=1.34.0,<1.35.0)", "mypy-boto3-repostspace (>=1.34.0,<1.35.0)", "mypy-boto3-resiliencehub (>=1.34.0,<1.35.0)", "mypy-boto3-resource-explorer-2 (>=1.34.0,<1.35.0)", "mypy-boto3-resource-groups (>=1.34.0,<1.35.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.34.0,<1.35.0)", "mypy-boto3-robomaker (>=1.34.0,<1.35.0)", "mypy-boto3-rolesanywhere (>=1.34.0,<1.35.0)", "mypy-boto3-route53 (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-cluster (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-control-config (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-readiness (>=1.34.0,<1.35.0)", "mypy-boto3-route53domains (>=1.34.0,<1.35.0)", "mypy-boto3-route53profiles (>=1.34.0,<1.35.0)", "mypy-boto3-route53resolver (>=1.34.0,<1.35.0)", "mypy-boto3-rum (>=1.34.0,<1.35.0)", "mypy-boto3-s3 (>=1.34.0,<1.35.0)", "mypy-boto3-s3control (>=1.34.0,<1.35.0)", "mypy-boto3-s3outposts (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-edge (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-geospatial (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-metrics (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-savingsplans (>=1.34.0,<1.35.0)", "mypy-boto3-scheduler (>=1.34.0,<1.35.0)", "mypy-boto3-schemas (>=1.34.0,<1.35.0)", "mypy-boto3-sdb (>=1.34.0,<1.35.0)", "mypy-boto3-secretsmanager (>=1.34.0,<1.35.0)", "mypy-boto3-securityhub (>=1.34.0,<1.35.0)", "mypy-boto3-securitylake (>=1.34.0,<1.35.0)", "mypy-boto3-serverlessrepo (>=1.34.0,<1.35.0)", "mypy-boto3-service-quotas (>=1.34.0,<1.35.0)", "mypy-boto3-servicecatalog (>=1.34.0,<1.35.0)", "mypy-boto3-servicecatalog-appregistry (>=1.34.0,<1.35.0)", "mypy-boto3-servicediscovery (>=1.34.0,<1.35.0)", "mypy-boto3-ses (>=1.34.0,<1.35.0)", "mypy-boto3-sesv2 (>=1.34.0,<1.35.0)", "mypy-boto3-shield (>=1.34.0,<1.35.0)", "mypy-boto3-signer (>=1.34.0,<1.35.0)", "mypy-boto3-simspaceweaver (>=1.34.0,<1.35.0)", "mypy-boto3-sms (>=1.34.0,<1.35.0)", "mypy-boto3-sms-voice (>=1.34.0,<1.35.0)", "mypy-boto3-snow-device-management (>=1.34.0,<1.35.0)", "mypy-boto3-snowball (>=1.34.0,<1.35.0)", "mypy-boto3-sns (>=1.34.0,<1.35.0)", "mypy-boto3-sqs (>=1.34.0,<1.35.0)", "mypy-boto3-ssm (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-contacts (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-incidents (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-quicksetup (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-sap (>=1.34.0,<1.35.0)", "mypy-boto3-sso (>=1.34.0,<1.35.0)", "mypy-boto3-sso-admin (>=1.34.0,<1.35.0)", "mypy-boto3-sso-oidc (>=1.34.0,<1.35.0)", "mypy-boto3-stepfunctions (>=1.34.0,<1.35.0)", "mypy-boto3-storagegateway (>=1.34.0,<1.35.0)", "mypy-boto3-sts (>=1.34.0,<1.35.0)", "mypy-boto3-supplychain (>=1.34.0,<1.35.0)", "mypy-boto3-support (>=1.34.0,<1.35.0)", "mypy-boto3-support-app (>=1.34.0,<1.35.0)", "mypy-boto3-swf (>=1.34.0,<1.35.0)", "mypy-boto3-synthetics (>=1.34.0,<1.35.0)", "mypy-boto3-taxsettings (>=1.34.0,<1.35.0)", "mypy-boto3-textract (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-influxdb (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-query (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-write (>=1.34.0,<1.35.0)", "mypy-boto3-tnb (>=1.34.0,<1.35.0)", "mypy-boto3-transcribe (>=1.34.0,<1.35.0)", "mypy-boto3-transfer (>=1.34.0,<1.35.0)", "mypy-boto3-translate (>=1.34.0,<1.35.0)", "mypy-boto3-trustedadvisor (>=1.34.0,<1.35.0)", "mypy-boto3-verifiedpermissions (>=1.34.0,<1.35.0)", "mypy-boto3-voice-id (>=1.34.0,<1.35.0)", "mypy-boto3-vpc-lattice (>=1.34.0,<1.35.0)", "mypy-boto3-waf (>=1.34.0,<1.35.0)", "mypy-boto3-waf-regional (>=1.34.0,<1.35.0)", "mypy-boto3-wafv2 (>=1.34.0,<1.35.0)", "mypy-boto3-wellarchitected (>=1.34.0,<1.35.0)", "mypy-boto3-wisdom (>=1.34.0,<1.35.0)", "mypy-boto3-workdocs (>=1.34.0,<1.35.0)", "mypy-boto3-worklink (>=1.34.0,<1.35.0)", "mypy-boto3-workmail (>=1.34.0,<1.35.0)", "mypy-boto3-workmailmessageflow (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces-thin-client (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces-web (>=1.34.0,<1.35.0)", "mypy-boto3-xray (>=1.34.0,<1.35.0)"] -amp = ["mypy-boto3-amp (>=1.34.0,<1.35.0)"] -amplify = ["mypy-boto3-amplify (>=1.34.0,<1.35.0)"] -amplifybackend = ["mypy-boto3-amplifybackend (>=1.34.0,<1.35.0)"] -amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.34.0,<1.35.0)"] -apigateway = ["mypy-boto3-apigateway (>=1.34.0,<1.35.0)"] -apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.34.0,<1.35.0)"] -apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.34.0,<1.35.0)"] -appconfig = ["mypy-boto3-appconfig (>=1.34.0,<1.35.0)"] -appconfigdata = ["mypy-boto3-appconfigdata (>=1.34.0,<1.35.0)"] -appfabric = ["mypy-boto3-appfabric (>=1.34.0,<1.35.0)"] -appflow = ["mypy-boto3-appflow (>=1.34.0,<1.35.0)"] -appintegrations = ["mypy-boto3-appintegrations (>=1.34.0,<1.35.0)"] -application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.34.0,<1.35.0)"] -application-insights = ["mypy-boto3-application-insights (>=1.34.0,<1.35.0)"] -application-signals = ["mypy-boto3-application-signals (>=1.34.0,<1.35.0)"] -applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.34.0,<1.35.0)"] -appmesh = ["mypy-boto3-appmesh (>=1.34.0,<1.35.0)"] -apprunner = ["mypy-boto3-apprunner (>=1.34.0,<1.35.0)"] -appstream = ["mypy-boto3-appstream (>=1.34.0,<1.35.0)"] -appsync = ["mypy-boto3-appsync (>=1.34.0,<1.35.0)"] -apptest = ["mypy-boto3-apptest (>=1.34.0,<1.35.0)"] -arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.34.0,<1.35.0)"] -artifact = ["mypy-boto3-artifact (>=1.34.0,<1.35.0)"] -athena = ["mypy-boto3-athena (>=1.34.0,<1.35.0)"] -auditmanager = ["mypy-boto3-auditmanager (>=1.34.0,<1.35.0)"] -autoscaling = ["mypy-boto3-autoscaling (>=1.34.0,<1.35.0)"] -autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.34.0,<1.35.0)"] -b2bi = ["mypy-boto3-b2bi (>=1.34.0,<1.35.0)"] -backup = ["mypy-boto3-backup (>=1.34.0,<1.35.0)"] -backup-gateway = ["mypy-boto3-backup-gateway (>=1.34.0,<1.35.0)"] -batch = ["mypy-boto3-batch (>=1.34.0,<1.35.0)"] -bcm-data-exports = ["mypy-boto3-bcm-data-exports (>=1.34.0,<1.35.0)"] -bedrock = ["mypy-boto3-bedrock (>=1.34.0,<1.35.0)"] -bedrock-agent = ["mypy-boto3-bedrock-agent (>=1.34.0,<1.35.0)"] -bedrock-agent-runtime = ["mypy-boto3-bedrock-agent-runtime (>=1.34.0,<1.35.0)"] -bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.34.0,<1.35.0)"] -billingconductor = ["mypy-boto3-billingconductor (>=1.34.0,<1.35.0)"] -boto3 = ["boto3 (==1.34.162)", "botocore (==1.34.162)"] -braket = ["mypy-boto3-braket (>=1.34.0,<1.35.0)"] -budgets = ["mypy-boto3-budgets (>=1.34.0,<1.35.0)"] -ce = ["mypy-boto3-ce (>=1.34.0,<1.35.0)"] -chatbot = ["mypy-boto3-chatbot (>=1.34.0,<1.35.0)"] -chime = ["mypy-boto3-chime (>=1.34.0,<1.35.0)"] -chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.34.0,<1.35.0)"] -chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.34.0,<1.35.0)"] -chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.34.0,<1.35.0)"] -chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.34.0,<1.35.0)"] -chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.34.0,<1.35.0)"] -cleanrooms = ["mypy-boto3-cleanrooms (>=1.34.0,<1.35.0)"] -cleanroomsml = ["mypy-boto3-cleanroomsml (>=1.34.0,<1.35.0)"] -cloud9 = ["mypy-boto3-cloud9 (>=1.34.0,<1.35.0)"] -cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.34.0,<1.35.0)"] -clouddirectory = ["mypy-boto3-clouddirectory (>=1.34.0,<1.35.0)"] -cloudformation = ["mypy-boto3-cloudformation (>=1.34.0,<1.35.0)"] -cloudfront = ["mypy-boto3-cloudfront (>=1.34.0,<1.35.0)"] -cloudfront-keyvaluestore = ["mypy-boto3-cloudfront-keyvaluestore (>=1.34.0,<1.35.0)"] -cloudhsm = ["mypy-boto3-cloudhsm (>=1.34.0,<1.35.0)"] -cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.34.0,<1.35.0)"] -cloudsearch = ["mypy-boto3-cloudsearch (>=1.34.0,<1.35.0)"] -cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.34.0,<1.35.0)"] -cloudtrail = ["mypy-boto3-cloudtrail (>=1.34.0,<1.35.0)"] -cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.34.0,<1.35.0)"] -cloudwatch = ["mypy-boto3-cloudwatch (>=1.34.0,<1.35.0)"] -codeartifact = ["mypy-boto3-codeartifact (>=1.34.0,<1.35.0)"] -codebuild = ["mypy-boto3-codebuild (>=1.34.0,<1.35.0)"] -codecatalyst = ["mypy-boto3-codecatalyst (>=1.34.0,<1.35.0)"] -codecommit = ["mypy-boto3-codecommit (>=1.34.0,<1.35.0)"] -codeconnections = ["mypy-boto3-codeconnections (>=1.34.0,<1.35.0)"] -codedeploy = ["mypy-boto3-codedeploy (>=1.34.0,<1.35.0)"] -codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.34.0,<1.35.0)"] -codeguru-security = ["mypy-boto3-codeguru-security (>=1.34.0,<1.35.0)"] -codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.34.0,<1.35.0)"] -codepipeline = ["mypy-boto3-codepipeline (>=1.34.0,<1.35.0)"] -codestar = ["mypy-boto3-codestar (>=1.34.0,<1.35.0)"] -codestar-connections = ["mypy-boto3-codestar-connections (>=1.34.0,<1.35.0)"] -codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.34.0,<1.35.0)"] -cognito-identity = ["mypy-boto3-cognito-identity (>=1.34.0,<1.35.0)"] -cognito-idp = ["mypy-boto3-cognito-idp (>=1.34.0,<1.35.0)"] -cognito-sync = ["mypy-boto3-cognito-sync (>=1.34.0,<1.35.0)"] -comprehend = ["mypy-boto3-comprehend (>=1.34.0,<1.35.0)"] -comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.34.0,<1.35.0)"] -compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.34.0,<1.35.0)"] -config = ["mypy-boto3-config (>=1.34.0,<1.35.0)"] -connect = ["mypy-boto3-connect (>=1.34.0,<1.35.0)"] -connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.34.0,<1.35.0)"] -connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.34.0,<1.35.0)"] -connectcases = ["mypy-boto3-connectcases (>=1.34.0,<1.35.0)"] -connectparticipant = ["mypy-boto3-connectparticipant (>=1.34.0,<1.35.0)"] -controlcatalog = ["mypy-boto3-controlcatalog (>=1.34.0,<1.35.0)"] -controltower = ["mypy-boto3-controltower (>=1.34.0,<1.35.0)"] -cost-optimization-hub = ["mypy-boto3-cost-optimization-hub (>=1.34.0,<1.35.0)"] -cur = ["mypy-boto3-cur (>=1.34.0,<1.35.0)"] -customer-profiles = ["mypy-boto3-customer-profiles (>=1.34.0,<1.35.0)"] -databrew = ["mypy-boto3-databrew (>=1.34.0,<1.35.0)"] -dataexchange = ["mypy-boto3-dataexchange (>=1.34.0,<1.35.0)"] -datapipeline = ["mypy-boto3-datapipeline (>=1.34.0,<1.35.0)"] -datasync = ["mypy-boto3-datasync (>=1.34.0,<1.35.0)"] -datazone = ["mypy-boto3-datazone (>=1.34.0,<1.35.0)"] -dax = ["mypy-boto3-dax (>=1.34.0,<1.35.0)"] -deadline = ["mypy-boto3-deadline (>=1.34.0,<1.35.0)"] -detective = ["mypy-boto3-detective (>=1.34.0,<1.35.0)"] -devicefarm = ["mypy-boto3-devicefarm (>=1.34.0,<1.35.0)"] -devops-guru = ["mypy-boto3-devops-guru (>=1.34.0,<1.35.0)"] -directconnect = ["mypy-boto3-directconnect (>=1.34.0,<1.35.0)"] -discovery = ["mypy-boto3-discovery (>=1.34.0,<1.35.0)"] -dlm = ["mypy-boto3-dlm (>=1.34.0,<1.35.0)"] -dms = ["mypy-boto3-dms (>=1.34.0,<1.35.0)"] -docdb = ["mypy-boto3-docdb (>=1.34.0,<1.35.0)"] -docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.34.0,<1.35.0)"] -drs = ["mypy-boto3-drs (>=1.34.0,<1.35.0)"] -ds = ["mypy-boto3-ds (>=1.34.0,<1.35.0)"] -dynamodb = ["mypy-boto3-dynamodb (>=1.34.0,<1.35.0)"] -dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.34.0,<1.35.0)"] -ebs = ["mypy-boto3-ebs (>=1.34.0,<1.35.0)"] -ec2 = ["mypy-boto3-ec2 (>=1.34.0,<1.35.0)"] -ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.34.0,<1.35.0)"] -ecr = ["mypy-boto3-ecr (>=1.34.0,<1.35.0)"] -ecr-public = ["mypy-boto3-ecr-public (>=1.34.0,<1.35.0)"] -ecs = ["mypy-boto3-ecs (>=1.34.0,<1.35.0)"] -efs = ["mypy-boto3-efs (>=1.34.0,<1.35.0)"] -eks = ["mypy-boto3-eks (>=1.34.0,<1.35.0)"] -eks-auth = ["mypy-boto3-eks-auth (>=1.34.0,<1.35.0)"] -elastic-inference = ["mypy-boto3-elastic-inference (>=1.34.0,<1.35.0)"] -elasticache = ["mypy-boto3-elasticache (>=1.34.0,<1.35.0)"] -elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.34.0,<1.35.0)"] -elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.34.0,<1.35.0)"] -elb = ["mypy-boto3-elb (>=1.34.0,<1.35.0)"] -elbv2 = ["mypy-boto3-elbv2 (>=1.34.0,<1.35.0)"] -emr = ["mypy-boto3-emr (>=1.34.0,<1.35.0)"] -emr-containers = ["mypy-boto3-emr-containers (>=1.34.0,<1.35.0)"] -emr-serverless = ["mypy-boto3-emr-serverless (>=1.34.0,<1.35.0)"] -entityresolution = ["mypy-boto3-entityresolution (>=1.34.0,<1.35.0)"] -es = ["mypy-boto3-es (>=1.34.0,<1.35.0)"] -essential = ["mypy-boto3-cloudformation (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodb (>=1.34.0,<1.35.0)", "mypy-boto3-ec2 (>=1.34.0,<1.35.0)", "mypy-boto3-lambda (>=1.34.0,<1.35.0)", "mypy-boto3-rds (>=1.34.0,<1.35.0)", "mypy-boto3-s3 (>=1.34.0,<1.35.0)", "mypy-boto3-sqs (>=1.34.0,<1.35.0)"] -events = ["mypy-boto3-events (>=1.34.0,<1.35.0)"] -evidently = ["mypy-boto3-evidently (>=1.34.0,<1.35.0)"] -finspace = ["mypy-boto3-finspace (>=1.34.0,<1.35.0)"] -finspace-data = ["mypy-boto3-finspace-data (>=1.34.0,<1.35.0)"] -firehose = ["mypy-boto3-firehose (>=1.34.0,<1.35.0)"] -fis = ["mypy-boto3-fis (>=1.34.0,<1.35.0)"] -fms = ["mypy-boto3-fms (>=1.34.0,<1.35.0)"] -forecast = ["mypy-boto3-forecast (>=1.34.0,<1.35.0)"] -forecastquery = ["mypy-boto3-forecastquery (>=1.34.0,<1.35.0)"] -frauddetector = ["mypy-boto3-frauddetector (>=1.34.0,<1.35.0)"] -freetier = ["mypy-boto3-freetier (>=1.34.0,<1.35.0)"] -fsx = ["mypy-boto3-fsx (>=1.34.0,<1.35.0)"] -gamelift = ["mypy-boto3-gamelift (>=1.34.0,<1.35.0)"] -glacier = ["mypy-boto3-glacier (>=1.34.0,<1.35.0)"] -globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.34.0,<1.35.0)"] -glue = ["mypy-boto3-glue (>=1.34.0,<1.35.0)"] -grafana = ["mypy-boto3-grafana (>=1.34.0,<1.35.0)"] -greengrass = ["mypy-boto3-greengrass (>=1.34.0,<1.35.0)"] -greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.34.0,<1.35.0)"] -groundstation = ["mypy-boto3-groundstation (>=1.34.0,<1.35.0)"] -guardduty = ["mypy-boto3-guardduty (>=1.34.0,<1.35.0)"] -health = ["mypy-boto3-health (>=1.34.0,<1.35.0)"] -healthlake = ["mypy-boto3-healthlake (>=1.34.0,<1.35.0)"] -iam = ["mypy-boto3-iam (>=1.34.0,<1.35.0)"] -identitystore = ["mypy-boto3-identitystore (>=1.34.0,<1.35.0)"] -imagebuilder = ["mypy-boto3-imagebuilder (>=1.34.0,<1.35.0)"] -importexport = ["mypy-boto3-importexport (>=1.34.0,<1.35.0)"] -inspector = ["mypy-boto3-inspector (>=1.34.0,<1.35.0)"] -inspector-scan = ["mypy-boto3-inspector-scan (>=1.34.0,<1.35.0)"] -inspector2 = ["mypy-boto3-inspector2 (>=1.34.0,<1.35.0)"] -internetmonitor = ["mypy-boto3-internetmonitor (>=1.34.0,<1.35.0)"] -iot = ["mypy-boto3-iot (>=1.34.0,<1.35.0)"] -iot-data = ["mypy-boto3-iot-data (>=1.34.0,<1.35.0)"] -iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.34.0,<1.35.0)"] -iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.34.0,<1.35.0)"] -iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.34.0,<1.35.0)"] -iotanalytics = ["mypy-boto3-iotanalytics (>=1.34.0,<1.35.0)"] -iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.34.0,<1.35.0)"] -iotevents = ["mypy-boto3-iotevents (>=1.34.0,<1.35.0)"] -iotevents-data = ["mypy-boto3-iotevents-data (>=1.34.0,<1.35.0)"] -iotfleethub = ["mypy-boto3-iotfleethub (>=1.34.0,<1.35.0)"] -iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.34.0,<1.35.0)"] -iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.34.0,<1.35.0)"] -iotsitewise = ["mypy-boto3-iotsitewise (>=1.34.0,<1.35.0)"] -iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.34.0,<1.35.0)"] -iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.34.0,<1.35.0)"] -iotwireless = ["mypy-boto3-iotwireless (>=1.34.0,<1.35.0)"] -ivs = ["mypy-boto3-ivs (>=1.34.0,<1.35.0)"] -ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.34.0,<1.35.0)"] -ivschat = ["mypy-boto3-ivschat (>=1.34.0,<1.35.0)"] -kafka = ["mypy-boto3-kafka (>=1.34.0,<1.35.0)"] -kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.34.0,<1.35.0)"] -kendra = ["mypy-boto3-kendra (>=1.34.0,<1.35.0)"] -kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.34.0,<1.35.0)"] -keyspaces = ["mypy-boto3-keyspaces (>=1.34.0,<1.35.0)"] -kinesis = ["mypy-boto3-kinesis (>=1.34.0,<1.35.0)"] -kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.34.0,<1.35.0)"] -kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.34.0,<1.35.0)"] -kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.34.0,<1.35.0)"] -kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.34.0,<1.35.0)"] -kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.34.0,<1.35.0)"] -kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.34.0,<1.35.0)"] -kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.34.0,<1.35.0)"] -kms = ["mypy-boto3-kms (>=1.34.0,<1.35.0)"] -lakeformation = ["mypy-boto3-lakeformation (>=1.34.0,<1.35.0)"] -lambda = ["mypy-boto3-lambda (>=1.34.0,<1.35.0)"] -launch-wizard = ["mypy-boto3-launch-wizard (>=1.34.0,<1.35.0)"] -lex-models = ["mypy-boto3-lex-models (>=1.34.0,<1.35.0)"] -lex-runtime = ["mypy-boto3-lex-runtime (>=1.34.0,<1.35.0)"] -lexv2-models = ["mypy-boto3-lexv2-models (>=1.34.0,<1.35.0)"] -lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.34.0,<1.35.0)"] -license-manager = ["mypy-boto3-license-manager (>=1.34.0,<1.35.0)"] -license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.34.0,<1.35.0)"] -license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.34.0,<1.35.0)"] -lightsail = ["mypy-boto3-lightsail (>=1.34.0,<1.35.0)"] -location = ["mypy-boto3-location (>=1.34.0,<1.35.0)"] -logs = ["mypy-boto3-logs (>=1.34.0,<1.35.0)"] -lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.34.0,<1.35.0)"] -lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.34.0,<1.35.0)"] -lookoutvision = ["mypy-boto3-lookoutvision (>=1.34.0,<1.35.0)"] -m2 = ["mypy-boto3-m2 (>=1.34.0,<1.35.0)"] -machinelearning = ["mypy-boto3-machinelearning (>=1.34.0,<1.35.0)"] -macie2 = ["mypy-boto3-macie2 (>=1.34.0,<1.35.0)"] -mailmanager = ["mypy-boto3-mailmanager (>=1.34.0,<1.35.0)"] -managedblockchain = ["mypy-boto3-managedblockchain (>=1.34.0,<1.35.0)"] -managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.34.0,<1.35.0)"] -marketplace-agreement = ["mypy-boto3-marketplace-agreement (>=1.34.0,<1.35.0)"] -marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.34.0,<1.35.0)"] -marketplace-deployment = ["mypy-boto3-marketplace-deployment (>=1.34.0,<1.35.0)"] -marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.34.0,<1.35.0)"] -marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.34.0,<1.35.0)"] -mediaconnect = ["mypy-boto3-mediaconnect (>=1.34.0,<1.35.0)"] -mediaconvert = ["mypy-boto3-mediaconvert (>=1.34.0,<1.35.0)"] -medialive = ["mypy-boto3-medialive (>=1.34.0,<1.35.0)"] -mediapackage = ["mypy-boto3-mediapackage (>=1.34.0,<1.35.0)"] -mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.34.0,<1.35.0)"] -mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.34.0,<1.35.0)"] -mediastore = ["mypy-boto3-mediastore (>=1.34.0,<1.35.0)"] -mediastore-data = ["mypy-boto3-mediastore-data (>=1.34.0,<1.35.0)"] -mediatailor = ["mypy-boto3-mediatailor (>=1.34.0,<1.35.0)"] -medical-imaging = ["mypy-boto3-medical-imaging (>=1.34.0,<1.35.0)"] -memorydb = ["mypy-boto3-memorydb (>=1.34.0,<1.35.0)"] -meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.34.0,<1.35.0)"] -mgh = ["mypy-boto3-mgh (>=1.34.0,<1.35.0)"] -mgn = ["mypy-boto3-mgn (>=1.34.0,<1.35.0)"] -migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.34.0,<1.35.0)"] -migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.34.0,<1.35.0)"] -migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.34.0,<1.35.0)"] -migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.34.0,<1.35.0)"] -mq = ["mypy-boto3-mq (>=1.34.0,<1.35.0)"] -mturk = ["mypy-boto3-mturk (>=1.34.0,<1.35.0)"] -mwaa = ["mypy-boto3-mwaa (>=1.34.0,<1.35.0)"] -neptune = ["mypy-boto3-neptune (>=1.34.0,<1.35.0)"] -neptune-graph = ["mypy-boto3-neptune-graph (>=1.34.0,<1.35.0)"] -neptunedata = ["mypy-boto3-neptunedata (>=1.34.0,<1.35.0)"] -network-firewall = ["mypy-boto3-network-firewall (>=1.34.0,<1.35.0)"] -networkmanager = ["mypy-boto3-networkmanager (>=1.34.0,<1.35.0)"] -networkmonitor = ["mypy-boto3-networkmonitor (>=1.34.0,<1.35.0)"] -nimble = ["mypy-boto3-nimble (>=1.34.0,<1.35.0)"] -oam = ["mypy-boto3-oam (>=1.34.0,<1.35.0)"] -omics = ["mypy-boto3-omics (>=1.34.0,<1.35.0)"] -opensearch = ["mypy-boto3-opensearch (>=1.34.0,<1.35.0)"] -opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.34.0,<1.35.0)"] -opsworks = ["mypy-boto3-opsworks (>=1.34.0,<1.35.0)"] -opsworkscm = ["mypy-boto3-opsworkscm (>=1.34.0,<1.35.0)"] -organizations = ["mypy-boto3-organizations (>=1.34.0,<1.35.0)"] -osis = ["mypy-boto3-osis (>=1.34.0,<1.35.0)"] -outposts = ["mypy-boto3-outposts (>=1.34.0,<1.35.0)"] -panorama = ["mypy-boto3-panorama (>=1.34.0,<1.35.0)"] -payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.34.0,<1.35.0)"] -payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.34.0,<1.35.0)"] -pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.34.0,<1.35.0)"] -pca-connector-scep = ["mypy-boto3-pca-connector-scep (>=1.34.0,<1.35.0)"] -personalize = ["mypy-boto3-personalize (>=1.34.0,<1.35.0)"] -personalize-events = ["mypy-boto3-personalize-events (>=1.34.0,<1.35.0)"] -personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.34.0,<1.35.0)"] -pi = ["mypy-boto3-pi (>=1.34.0,<1.35.0)"] -pinpoint = ["mypy-boto3-pinpoint (>=1.34.0,<1.35.0)"] -pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.34.0,<1.35.0)"] -pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.34.0,<1.35.0)"] -pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.34.0,<1.35.0)"] -pipes = ["mypy-boto3-pipes (>=1.34.0,<1.35.0)"] -polly = ["mypy-boto3-polly (>=1.34.0,<1.35.0)"] -pricing = ["mypy-boto3-pricing (>=1.34.0,<1.35.0)"] -privatenetworks = ["mypy-boto3-privatenetworks (>=1.34.0,<1.35.0)"] -proton = ["mypy-boto3-proton (>=1.34.0,<1.35.0)"] -qapps = ["mypy-boto3-qapps (>=1.34.0,<1.35.0)"] -qbusiness = ["mypy-boto3-qbusiness (>=1.34.0,<1.35.0)"] -qconnect = ["mypy-boto3-qconnect (>=1.34.0,<1.35.0)"] -qldb = ["mypy-boto3-qldb (>=1.34.0,<1.35.0)"] -qldb-session = ["mypy-boto3-qldb-session (>=1.34.0,<1.35.0)"] -quicksight = ["mypy-boto3-quicksight (>=1.34.0,<1.35.0)"] -ram = ["mypy-boto3-ram (>=1.34.0,<1.35.0)"] -rbin = ["mypy-boto3-rbin (>=1.34.0,<1.35.0)"] -rds = ["mypy-boto3-rds (>=1.34.0,<1.35.0)"] -rds-data = ["mypy-boto3-rds-data (>=1.34.0,<1.35.0)"] -redshift = ["mypy-boto3-redshift (>=1.34.0,<1.35.0)"] -redshift-data = ["mypy-boto3-redshift-data (>=1.34.0,<1.35.0)"] -redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.34.0,<1.35.0)"] -rekognition = ["mypy-boto3-rekognition (>=1.34.0,<1.35.0)"] -repostspace = ["mypy-boto3-repostspace (>=1.34.0,<1.35.0)"] -resiliencehub = ["mypy-boto3-resiliencehub (>=1.34.0,<1.35.0)"] -resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.34.0,<1.35.0)"] -resource-groups = ["mypy-boto3-resource-groups (>=1.34.0,<1.35.0)"] -resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.34.0,<1.35.0)"] -robomaker = ["mypy-boto3-robomaker (>=1.34.0,<1.35.0)"] -rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.34.0,<1.35.0)"] -route53 = ["mypy-boto3-route53 (>=1.34.0,<1.35.0)"] -route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.34.0,<1.35.0)"] -route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.34.0,<1.35.0)"] -route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.34.0,<1.35.0)"] -route53domains = ["mypy-boto3-route53domains (>=1.34.0,<1.35.0)"] -route53profiles = ["mypy-boto3-route53profiles (>=1.34.0,<1.35.0)"] -route53resolver = ["mypy-boto3-route53resolver (>=1.34.0,<1.35.0)"] -rum = ["mypy-boto3-rum (>=1.34.0,<1.35.0)"] -s3 = ["mypy-boto3-s3 (>=1.34.0,<1.35.0)"] -s3control = ["mypy-boto3-s3control (>=1.34.0,<1.35.0)"] -s3outposts = ["mypy-boto3-s3outposts (>=1.34.0,<1.35.0)"] -sagemaker = ["mypy-boto3-sagemaker (>=1.34.0,<1.35.0)"] -sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.34.0,<1.35.0)"] -sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.34.0,<1.35.0)"] -sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.34.0,<1.35.0)"] -sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.34.0,<1.35.0)"] -sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.34.0,<1.35.0)"] -sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.34.0,<1.35.0)"] -savingsplans = ["mypy-boto3-savingsplans (>=1.34.0,<1.35.0)"] -scheduler = ["mypy-boto3-scheduler (>=1.34.0,<1.35.0)"] -schemas = ["mypy-boto3-schemas (>=1.34.0,<1.35.0)"] -sdb = ["mypy-boto3-sdb (>=1.34.0,<1.35.0)"] -secretsmanager = ["mypy-boto3-secretsmanager (>=1.34.0,<1.35.0)"] -securityhub = ["mypy-boto3-securityhub (>=1.34.0,<1.35.0)"] -securitylake = ["mypy-boto3-securitylake (>=1.34.0,<1.35.0)"] -serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.34.0,<1.35.0)"] -service-quotas = ["mypy-boto3-service-quotas (>=1.34.0,<1.35.0)"] -servicecatalog = ["mypy-boto3-servicecatalog (>=1.34.0,<1.35.0)"] -servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.34.0,<1.35.0)"] -servicediscovery = ["mypy-boto3-servicediscovery (>=1.34.0,<1.35.0)"] -ses = ["mypy-boto3-ses (>=1.34.0,<1.35.0)"] -sesv2 = ["mypy-boto3-sesv2 (>=1.34.0,<1.35.0)"] -shield = ["mypy-boto3-shield (>=1.34.0,<1.35.0)"] -signer = ["mypy-boto3-signer (>=1.34.0,<1.35.0)"] -simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.34.0,<1.35.0)"] -sms = ["mypy-boto3-sms (>=1.34.0,<1.35.0)"] -sms-voice = ["mypy-boto3-sms-voice (>=1.34.0,<1.35.0)"] -snow-device-management = ["mypy-boto3-snow-device-management (>=1.34.0,<1.35.0)"] -snowball = ["mypy-boto3-snowball (>=1.34.0,<1.35.0)"] -sns = ["mypy-boto3-sns (>=1.34.0,<1.35.0)"] -sqs = ["mypy-boto3-sqs (>=1.34.0,<1.35.0)"] -ssm = ["mypy-boto3-ssm (>=1.34.0,<1.35.0)"] -ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.34.0,<1.35.0)"] -ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.34.0,<1.35.0)"] -ssm-quicksetup = ["mypy-boto3-ssm-quicksetup (>=1.34.0,<1.35.0)"] -ssm-sap = ["mypy-boto3-ssm-sap (>=1.34.0,<1.35.0)"] -sso = ["mypy-boto3-sso (>=1.34.0,<1.35.0)"] -sso-admin = ["mypy-boto3-sso-admin (>=1.34.0,<1.35.0)"] -sso-oidc = ["mypy-boto3-sso-oidc (>=1.34.0,<1.35.0)"] -stepfunctions = ["mypy-boto3-stepfunctions (>=1.34.0,<1.35.0)"] -storagegateway = ["mypy-boto3-storagegateway (>=1.34.0,<1.35.0)"] -sts = ["mypy-boto3-sts (>=1.34.0,<1.35.0)"] -supplychain = ["mypy-boto3-supplychain (>=1.34.0,<1.35.0)"] -support = ["mypy-boto3-support (>=1.34.0,<1.35.0)"] -support-app = ["mypy-boto3-support-app (>=1.34.0,<1.35.0)"] -swf = ["mypy-boto3-swf (>=1.34.0,<1.35.0)"] -synthetics = ["mypy-boto3-synthetics (>=1.34.0,<1.35.0)"] -taxsettings = ["mypy-boto3-taxsettings (>=1.34.0,<1.35.0)"] -textract = ["mypy-boto3-textract (>=1.34.0,<1.35.0)"] -timestream-influxdb = ["mypy-boto3-timestream-influxdb (>=1.34.0,<1.35.0)"] -timestream-query = ["mypy-boto3-timestream-query (>=1.34.0,<1.35.0)"] -timestream-write = ["mypy-boto3-timestream-write (>=1.34.0,<1.35.0)"] -tnb = ["mypy-boto3-tnb (>=1.34.0,<1.35.0)"] -transcribe = ["mypy-boto3-transcribe (>=1.34.0,<1.35.0)"] -transfer = ["mypy-boto3-transfer (>=1.34.0,<1.35.0)"] -translate = ["mypy-boto3-translate (>=1.34.0,<1.35.0)"] -trustedadvisor = ["mypy-boto3-trustedadvisor (>=1.34.0,<1.35.0)"] -verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.34.0,<1.35.0)"] -voice-id = ["mypy-boto3-voice-id (>=1.34.0,<1.35.0)"] -vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.34.0,<1.35.0)"] -waf = ["mypy-boto3-waf (>=1.34.0,<1.35.0)"] -waf-regional = ["mypy-boto3-waf-regional (>=1.34.0,<1.35.0)"] -wafv2 = ["mypy-boto3-wafv2 (>=1.34.0,<1.35.0)"] -wellarchitected = ["mypy-boto3-wellarchitected (>=1.34.0,<1.35.0)"] -wisdom = ["mypy-boto3-wisdom (>=1.34.0,<1.35.0)"] -workdocs = ["mypy-boto3-workdocs (>=1.34.0,<1.35.0)"] -worklink = ["mypy-boto3-worklink (>=1.34.0,<1.35.0)"] -workmail = ["mypy-boto3-workmail (>=1.34.0,<1.35.0)"] -workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.34.0,<1.35.0)"] -workspaces = ["mypy-boto3-workspaces (>=1.34.0,<1.35.0)"] -workspaces-thin-client = ["mypy-boto3-workspaces-thin-client (>=1.34.0,<1.35.0)"] -workspaces-web = ["mypy-boto3-workspaces-web (>=1.34.0,<1.35.0)"] -xray = ["mypy-boto3-xray (>=1.34.0,<1.35.0)"] +accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.42.0,<1.43.0)"] +account = ["mypy-boto3-account (>=1.42.0,<1.43.0)"] +acm = ["mypy-boto3-acm (>=1.42.0,<1.43.0)"] +acm-pca = ["mypy-boto3-acm-pca (>=1.42.0,<1.43.0)"] +aiops = ["mypy-boto3-aiops (>=1.42.0,<1.43.0)"] +all = ["mypy-boto3-accessanalyzer (>=1.42.0,<1.43.0)", "mypy-boto3-account (>=1.42.0,<1.43.0)", "mypy-boto3-acm (>=1.42.0,<1.43.0)", "mypy-boto3-acm-pca (>=1.42.0,<1.43.0)", "mypy-boto3-aiops (>=1.42.0,<1.43.0)", "mypy-boto3-amp (>=1.42.0,<1.43.0)", "mypy-boto3-amplify (>=1.42.0,<1.43.0)", "mypy-boto3-amplifybackend (>=1.42.0,<1.43.0)", "mypy-boto3-amplifyuibuilder (>=1.42.0,<1.43.0)", "mypy-boto3-apigateway (>=1.42.0,<1.43.0)", "mypy-boto3-apigatewaymanagementapi (>=1.42.0,<1.43.0)", "mypy-boto3-apigatewayv2 (>=1.42.0,<1.43.0)", "mypy-boto3-appconfig (>=1.42.0,<1.43.0)", "mypy-boto3-appconfigdata (>=1.42.0,<1.43.0)", "mypy-boto3-appfabric (>=1.42.0,<1.43.0)", "mypy-boto3-appflow (>=1.42.0,<1.43.0)", "mypy-boto3-appintegrations (>=1.42.0,<1.43.0)", "mypy-boto3-application-autoscaling (>=1.42.0,<1.43.0)", "mypy-boto3-application-insights (>=1.42.0,<1.43.0)", "mypy-boto3-application-signals (>=1.42.0,<1.43.0)", "mypy-boto3-applicationcostprofiler (>=1.42.0,<1.43.0)", "mypy-boto3-appmesh (>=1.42.0,<1.43.0)", "mypy-boto3-apprunner (>=1.42.0,<1.43.0)", "mypy-boto3-appstream (>=1.42.0,<1.43.0)", "mypy-boto3-appsync (>=1.42.0,<1.43.0)", "mypy-boto3-arc-region-switch (>=1.42.0,<1.43.0)", "mypy-boto3-arc-zonal-shift (>=1.42.0,<1.43.0)", "mypy-boto3-artifact (>=1.42.0,<1.43.0)", "mypy-boto3-athena (>=1.42.0,<1.43.0)", "mypy-boto3-auditmanager (>=1.42.0,<1.43.0)", "mypy-boto3-autoscaling (>=1.42.0,<1.43.0)", "mypy-boto3-autoscaling-plans (>=1.42.0,<1.43.0)", "mypy-boto3-b2bi (>=1.42.0,<1.43.0)", "mypy-boto3-backup (>=1.42.0,<1.43.0)", "mypy-boto3-backup-gateway (>=1.42.0,<1.43.0)", "mypy-boto3-backupsearch (>=1.42.0,<1.43.0)", "mypy-boto3-batch (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-dashboards (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-data-exports (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-pricing-calculator (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-recommended-actions (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agent (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agent-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agentcore (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agentcore-control (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-data-automation (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-data-automation-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-billing (>=1.42.0,<1.43.0)", "mypy-boto3-billingconductor (>=1.42.0,<1.43.0)", "mypy-boto3-braket (>=1.42.0,<1.43.0)", "mypy-boto3-budgets (>=1.42.0,<1.43.0)", "mypy-boto3-ce (>=1.42.0,<1.43.0)", "mypy-boto3-chatbot (>=1.42.0,<1.43.0)", "mypy-boto3-chime (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-identity (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-meetings (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-messaging (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-voice (>=1.42.0,<1.43.0)", "mypy-boto3-cleanrooms (>=1.42.0,<1.43.0)", "mypy-boto3-cleanroomsml (>=1.42.0,<1.43.0)", "mypy-boto3-cloud9 (>=1.42.0,<1.43.0)", "mypy-boto3-cloudcontrol (>=1.42.0,<1.43.0)", "mypy-boto3-clouddirectory (>=1.42.0,<1.43.0)", "mypy-boto3-cloudformation (>=1.42.0,<1.43.0)", "mypy-boto3-cloudfront (>=1.42.0,<1.43.0)", "mypy-boto3-cloudfront-keyvaluestore (>=1.42.0,<1.43.0)", "mypy-boto3-cloudhsm (>=1.42.0,<1.43.0)", "mypy-boto3-cloudhsmv2 (>=1.42.0,<1.43.0)", "mypy-boto3-cloudsearch (>=1.42.0,<1.43.0)", "mypy-boto3-cloudsearchdomain (>=1.42.0,<1.43.0)", "mypy-boto3-cloudtrail (>=1.42.0,<1.43.0)", "mypy-boto3-cloudtrail-data (>=1.42.0,<1.43.0)", "mypy-boto3-cloudwatch (>=1.42.0,<1.43.0)", "mypy-boto3-codeartifact (>=1.42.0,<1.43.0)", "mypy-boto3-codebuild (>=1.42.0,<1.43.0)", "mypy-boto3-codecatalyst (>=1.42.0,<1.43.0)", "mypy-boto3-codecommit (>=1.42.0,<1.43.0)", "mypy-boto3-codeconnections (>=1.42.0,<1.43.0)", "mypy-boto3-codedeploy (>=1.42.0,<1.43.0)", "mypy-boto3-codeguru-reviewer (>=1.42.0,<1.43.0)", "mypy-boto3-codeguru-security (>=1.42.0,<1.43.0)", "mypy-boto3-codeguruprofiler (>=1.42.0,<1.43.0)", "mypy-boto3-codepipeline (>=1.42.0,<1.43.0)", "mypy-boto3-codestar-connections (>=1.42.0,<1.43.0)", "mypy-boto3-codestar-notifications (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-identity (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-idp (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-sync (>=1.42.0,<1.43.0)", "mypy-boto3-comprehend (>=1.42.0,<1.43.0)", "mypy-boto3-comprehendmedical (>=1.42.0,<1.43.0)", "mypy-boto3-compute-optimizer (>=1.42.0,<1.43.0)", "mypy-boto3-compute-optimizer-automation (>=1.42.0,<1.43.0)", "mypy-boto3-config (>=1.42.0,<1.43.0)", "mypy-boto3-connect (>=1.42.0,<1.43.0)", "mypy-boto3-connect-contact-lens (>=1.42.0,<1.43.0)", "mypy-boto3-connectcampaigns (>=1.42.0,<1.43.0)", "mypy-boto3-connectcampaignsv2 (>=1.42.0,<1.43.0)", "mypy-boto3-connectcases (>=1.42.0,<1.43.0)", "mypy-boto3-connecthealth (>=1.42.0,<1.43.0)", "mypy-boto3-connectparticipant (>=1.42.0,<1.43.0)", "mypy-boto3-controlcatalog (>=1.42.0,<1.43.0)", "mypy-boto3-controltower (>=1.42.0,<1.43.0)", "mypy-boto3-cost-optimization-hub (>=1.42.0,<1.43.0)", "mypy-boto3-cur (>=1.42.0,<1.43.0)", "mypy-boto3-customer-profiles (>=1.42.0,<1.43.0)", "mypy-boto3-databrew (>=1.42.0,<1.43.0)", "mypy-boto3-dataexchange (>=1.42.0,<1.43.0)", "mypy-boto3-datapipeline (>=1.42.0,<1.43.0)", "mypy-boto3-datasync (>=1.42.0,<1.43.0)", "mypy-boto3-datazone (>=1.42.0,<1.43.0)", "mypy-boto3-dax (>=1.42.0,<1.43.0)", "mypy-boto3-deadline (>=1.42.0,<1.43.0)", "mypy-boto3-detective (>=1.42.0,<1.43.0)", "mypy-boto3-devicefarm (>=1.42.0,<1.43.0)", "mypy-boto3-devops-agent (>=1.42.0,<1.43.0)", "mypy-boto3-devops-guru (>=1.42.0,<1.43.0)", "mypy-boto3-directconnect (>=1.42.0,<1.43.0)", "mypy-boto3-discovery (>=1.42.0,<1.43.0)", "mypy-boto3-dlm (>=1.42.0,<1.43.0)", "mypy-boto3-dms (>=1.42.0,<1.43.0)", "mypy-boto3-docdb (>=1.42.0,<1.43.0)", "mypy-boto3-docdb-elastic (>=1.42.0,<1.43.0)", "mypy-boto3-drs (>=1.42.0,<1.43.0)", "mypy-boto3-ds (>=1.42.0,<1.43.0)", "mypy-boto3-ds-data (>=1.42.0,<1.43.0)", "mypy-boto3-dsql (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodb (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodbstreams (>=1.42.0,<1.43.0)", "mypy-boto3-ebs (>=1.42.0,<1.43.0)", "mypy-boto3-ec2 (>=1.42.0,<1.43.0)", "mypy-boto3-ec2-instance-connect (>=1.42.0,<1.43.0)", "mypy-boto3-ecr (>=1.42.0,<1.43.0)", "mypy-boto3-ecr-public (>=1.42.0,<1.43.0)", "mypy-boto3-ecs (>=1.42.0,<1.43.0)", "mypy-boto3-efs (>=1.42.0,<1.43.0)", "mypy-boto3-eks (>=1.42.0,<1.43.0)", "mypy-boto3-eks-auth (>=1.42.0,<1.43.0)", "mypy-boto3-elasticache (>=1.42.0,<1.43.0)", "mypy-boto3-elasticbeanstalk (>=1.42.0,<1.43.0)", "mypy-boto3-elb (>=1.42.0,<1.43.0)", "mypy-boto3-elbv2 (>=1.42.0,<1.43.0)", "mypy-boto3-elementalinference (>=1.42.0,<1.43.0)", "mypy-boto3-emr (>=1.42.0,<1.43.0)", "mypy-boto3-emr-containers (>=1.42.0,<1.43.0)", "mypy-boto3-emr-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-entityresolution (>=1.42.0,<1.43.0)", "mypy-boto3-es (>=1.42.0,<1.43.0)", "mypy-boto3-events (>=1.42.0,<1.43.0)", "mypy-boto3-evs (>=1.42.0,<1.43.0)", "mypy-boto3-finspace (>=1.42.0,<1.43.0)", "mypy-boto3-finspace-data (>=1.42.0,<1.43.0)", "mypy-boto3-firehose (>=1.42.0,<1.43.0)", "mypy-boto3-fis (>=1.42.0,<1.43.0)", "mypy-boto3-fms (>=1.42.0,<1.43.0)", "mypy-boto3-forecast (>=1.42.0,<1.43.0)", "mypy-boto3-forecastquery (>=1.42.0,<1.43.0)", "mypy-boto3-frauddetector (>=1.42.0,<1.43.0)", "mypy-boto3-freetier (>=1.42.0,<1.43.0)", "mypy-boto3-fsx (>=1.42.0,<1.43.0)", "mypy-boto3-gamelift (>=1.42.0,<1.43.0)", "mypy-boto3-gameliftstreams (>=1.42.0,<1.43.0)", "mypy-boto3-geo-maps (>=1.42.0,<1.43.0)", "mypy-boto3-geo-places (>=1.42.0,<1.43.0)", "mypy-boto3-geo-routes (>=1.42.0,<1.43.0)", "mypy-boto3-glacier (>=1.42.0,<1.43.0)", "mypy-boto3-globalaccelerator (>=1.42.0,<1.43.0)", "mypy-boto3-glue (>=1.42.0,<1.43.0)", "mypy-boto3-grafana (>=1.42.0,<1.43.0)", "mypy-boto3-greengrass (>=1.42.0,<1.43.0)", "mypy-boto3-greengrassv2 (>=1.42.0,<1.43.0)", "mypy-boto3-groundstation (>=1.42.0,<1.43.0)", "mypy-boto3-guardduty (>=1.42.0,<1.43.0)", "mypy-boto3-health (>=1.42.0,<1.43.0)", "mypy-boto3-healthlake (>=1.42.0,<1.43.0)", "mypy-boto3-iam (>=1.42.0,<1.43.0)", "mypy-boto3-identitystore (>=1.42.0,<1.43.0)", "mypy-boto3-imagebuilder (>=1.42.0,<1.43.0)", "mypy-boto3-importexport (>=1.42.0,<1.43.0)", "mypy-boto3-inspector (>=1.42.0,<1.43.0)", "mypy-boto3-inspector-scan (>=1.42.0,<1.43.0)", "mypy-boto3-inspector2 (>=1.42.0,<1.43.0)", "mypy-boto3-interconnect (>=1.42.0,<1.43.0)", "mypy-boto3-internetmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-invoicing (>=1.42.0,<1.43.0)", "mypy-boto3-iot (>=1.42.0,<1.43.0)", "mypy-boto3-iot-data (>=1.42.0,<1.43.0)", "mypy-boto3-iot-jobs-data (>=1.42.0,<1.43.0)", "mypy-boto3-iot-managed-integrations (>=1.42.0,<1.43.0)", "mypy-boto3-iotdeviceadvisor (>=1.42.0,<1.43.0)", "mypy-boto3-iotevents (>=1.42.0,<1.43.0)", "mypy-boto3-iotevents-data (>=1.42.0,<1.43.0)", "mypy-boto3-iotfleetwise (>=1.42.0,<1.43.0)", "mypy-boto3-iotsecuretunneling (>=1.42.0,<1.43.0)", "mypy-boto3-iotsitewise (>=1.42.0,<1.43.0)", "mypy-boto3-iotthingsgraph (>=1.42.0,<1.43.0)", "mypy-boto3-iottwinmaker (>=1.42.0,<1.43.0)", "mypy-boto3-iotwireless (>=1.42.0,<1.43.0)", "mypy-boto3-ivs (>=1.42.0,<1.43.0)", "mypy-boto3-ivs-realtime (>=1.42.0,<1.43.0)", "mypy-boto3-ivschat (>=1.42.0,<1.43.0)", "mypy-boto3-kafka (>=1.42.0,<1.43.0)", "mypy-boto3-kafkaconnect (>=1.42.0,<1.43.0)", "mypy-boto3-kendra (>=1.42.0,<1.43.0)", "mypy-boto3-kendra-ranking (>=1.42.0,<1.43.0)", "mypy-boto3-keyspaces (>=1.42.0,<1.43.0)", "mypy-boto3-keyspacesstreams (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-archived-media (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-media (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-signaling (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisanalytics (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisvideo (>=1.42.0,<1.43.0)", "mypy-boto3-kms (>=1.42.0,<1.43.0)", "mypy-boto3-lakeformation (>=1.42.0,<1.43.0)", "mypy-boto3-lambda (>=1.42.0,<1.43.0)", "mypy-boto3-launch-wizard (>=1.42.0,<1.43.0)", "mypy-boto3-lex-models (>=1.42.0,<1.43.0)", "mypy-boto3-lex-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-lexv2-models (>=1.42.0,<1.43.0)", "mypy-boto3-lexv2-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.42.0,<1.43.0)", "mypy-boto3-lightsail (>=1.42.0,<1.43.0)", "mypy-boto3-location (>=1.42.0,<1.43.0)", "mypy-boto3-logs (>=1.42.0,<1.43.0)", "mypy-boto3-lookoutequipment (>=1.42.0,<1.43.0)", "mypy-boto3-m2 (>=1.42.0,<1.43.0)", "mypy-boto3-machinelearning (>=1.42.0,<1.43.0)", "mypy-boto3-macie2 (>=1.42.0,<1.43.0)", "mypy-boto3-mailmanager (>=1.42.0,<1.43.0)", "mypy-boto3-managedblockchain (>=1.42.0,<1.43.0)", "mypy-boto3-managedblockchain-query (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-agreement (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-catalog (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-deployment (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-discovery (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-entitlement (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-reporting (>=1.42.0,<1.43.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.42.0,<1.43.0)", "mypy-boto3-mediaconnect (>=1.42.0,<1.43.0)", "mypy-boto3-mediaconvert (>=1.42.0,<1.43.0)", "mypy-boto3-medialive (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackage (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackage-vod (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackagev2 (>=1.42.0,<1.43.0)", "mypy-boto3-mediastore (>=1.42.0,<1.43.0)", "mypy-boto3-mediastore-data (>=1.42.0,<1.43.0)", "mypy-boto3-mediatailor (>=1.42.0,<1.43.0)", "mypy-boto3-medical-imaging (>=1.42.0,<1.43.0)", "mypy-boto3-memorydb (>=1.42.0,<1.43.0)", "mypy-boto3-meteringmarketplace (>=1.42.0,<1.43.0)", "mypy-boto3-mgh (>=1.42.0,<1.43.0)", "mypy-boto3-mgn (>=1.42.0,<1.43.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhub-config (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhuborchestrator (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhubstrategy (>=1.42.0,<1.43.0)", "mypy-boto3-mpa (>=1.42.0,<1.43.0)", "mypy-boto3-mq (>=1.42.0,<1.43.0)", "mypy-boto3-mturk (>=1.42.0,<1.43.0)", "mypy-boto3-mwaa (>=1.42.0,<1.43.0)", "mypy-boto3-mwaa-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-neptune (>=1.42.0,<1.43.0)", "mypy-boto3-neptune-graph (>=1.42.0,<1.43.0)", "mypy-boto3-neptunedata (>=1.42.0,<1.43.0)", "mypy-boto3-network-firewall (>=1.42.0,<1.43.0)", "mypy-boto3-networkflowmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-networkmanager (>=1.42.0,<1.43.0)", "mypy-boto3-networkmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-notifications (>=1.42.0,<1.43.0)", "mypy-boto3-notificationscontacts (>=1.42.0,<1.43.0)", "mypy-boto3-nova-act (>=1.42.0,<1.43.0)", "mypy-boto3-oam (>=1.42.0,<1.43.0)", "mypy-boto3-observabilityadmin (>=1.42.0,<1.43.0)", "mypy-boto3-odb (>=1.42.0,<1.43.0)", "mypy-boto3-omics (>=1.42.0,<1.43.0)", "mypy-boto3-opensearch (>=1.42.0,<1.43.0)", "mypy-boto3-opensearchserverless (>=1.42.0,<1.43.0)", "mypy-boto3-organizations (>=1.42.0,<1.43.0)", "mypy-boto3-osis (>=1.42.0,<1.43.0)", "mypy-boto3-outposts (>=1.42.0,<1.43.0)", "mypy-boto3-panorama (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-account (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-benefits (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-channel (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-selling (>=1.42.0,<1.43.0)", "mypy-boto3-payment-cryptography (>=1.42.0,<1.43.0)", "mypy-boto3-payment-cryptography-data (>=1.42.0,<1.43.0)", "mypy-boto3-pca-connector-ad (>=1.42.0,<1.43.0)", "mypy-boto3-pca-connector-scep (>=1.42.0,<1.43.0)", "mypy-boto3-pcs (>=1.42.0,<1.43.0)", "mypy-boto3-personalize (>=1.42.0,<1.43.0)", "mypy-boto3-personalize-events (>=1.42.0,<1.43.0)", "mypy-boto3-personalize-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-pi (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-email (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-sms-voice (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.42.0,<1.43.0)", "mypy-boto3-pipes (>=1.42.0,<1.43.0)", "mypy-boto3-polly (>=1.42.0,<1.43.0)", "mypy-boto3-pricing (>=1.42.0,<1.43.0)", "mypy-boto3-proton (>=1.42.0,<1.43.0)", "mypy-boto3-qapps (>=1.42.0,<1.43.0)", "mypy-boto3-qbusiness (>=1.42.0,<1.43.0)", "mypy-boto3-qconnect (>=1.42.0,<1.43.0)", "mypy-boto3-quicksight (>=1.42.0,<1.43.0)", "mypy-boto3-ram (>=1.42.0,<1.43.0)", "mypy-boto3-rbin (>=1.42.0,<1.43.0)", "mypy-boto3-rds (>=1.42.0,<1.43.0)", "mypy-boto3-rds-data (>=1.42.0,<1.43.0)", "mypy-boto3-redshift (>=1.42.0,<1.43.0)", "mypy-boto3-redshift-data (>=1.42.0,<1.43.0)", "mypy-boto3-redshift-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-rekognition (>=1.42.0,<1.43.0)", "mypy-boto3-repostspace (>=1.42.0,<1.43.0)", "mypy-boto3-resiliencehub (>=1.42.0,<1.43.0)", "mypy-boto3-resource-explorer-2 (>=1.42.0,<1.43.0)", "mypy-boto3-resource-groups (>=1.42.0,<1.43.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.42.0,<1.43.0)", "mypy-boto3-rolesanywhere (>=1.42.0,<1.43.0)", "mypy-boto3-route53 (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-cluster (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-control-config (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-readiness (>=1.42.0,<1.43.0)", "mypy-boto3-route53domains (>=1.42.0,<1.43.0)", "mypy-boto3-route53globalresolver (>=1.42.0,<1.43.0)", "mypy-boto3-route53profiles (>=1.42.0,<1.43.0)", "mypy-boto3-route53resolver (>=1.42.0,<1.43.0)", "mypy-boto3-rtbfabric (>=1.42.0,<1.43.0)", "mypy-boto3-rum (>=1.42.0,<1.43.0)", "mypy-boto3-s3 (>=1.42.0,<1.43.0)", "mypy-boto3-s3control (>=1.42.0,<1.43.0)", "mypy-boto3-s3files (>=1.42.0,<1.43.0)", "mypy-boto3-s3outposts (>=1.42.0,<1.43.0)", "mypy-boto3-s3tables (>=1.42.0,<1.43.0)", "mypy-boto3-s3vectors (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-edge (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-geospatial (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-metrics (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-savingsplans (>=1.42.0,<1.43.0)", "mypy-boto3-scheduler (>=1.42.0,<1.43.0)", "mypy-boto3-schemas (>=1.42.0,<1.43.0)", "mypy-boto3-sdb (>=1.42.0,<1.43.0)", "mypy-boto3-secretsmanager (>=1.42.0,<1.43.0)", "mypy-boto3-security-ir (>=1.42.0,<1.43.0)", "mypy-boto3-securityagent (>=1.42.0,<1.43.0)", "mypy-boto3-securityhub (>=1.42.0,<1.43.0)", "mypy-boto3-securitylake (>=1.42.0,<1.43.0)", "mypy-boto3-serverlessrepo (>=1.42.0,<1.43.0)", "mypy-boto3-service-quotas (>=1.42.0,<1.43.0)", "mypy-boto3-servicecatalog (>=1.42.0,<1.43.0)", "mypy-boto3-servicecatalog-appregistry (>=1.42.0,<1.43.0)", "mypy-boto3-servicediscovery (>=1.42.0,<1.43.0)", "mypy-boto3-ses (>=1.42.0,<1.43.0)", "mypy-boto3-sesv2 (>=1.42.0,<1.43.0)", "mypy-boto3-shield (>=1.42.0,<1.43.0)", "mypy-boto3-signer (>=1.42.0,<1.43.0)", "mypy-boto3-signer-data (>=1.42.0,<1.43.0)", "mypy-boto3-signin (>=1.42.0,<1.43.0)", "mypy-boto3-simpledbv2 (>=1.42.0,<1.43.0)", "mypy-boto3-simspaceweaver (>=1.42.0,<1.43.0)", "mypy-boto3-snow-device-management (>=1.42.0,<1.43.0)", "mypy-boto3-snowball (>=1.42.0,<1.43.0)", "mypy-boto3-sns (>=1.42.0,<1.43.0)", "mypy-boto3-socialmessaging (>=1.42.0,<1.43.0)", "mypy-boto3-sqs (>=1.42.0,<1.43.0)", "mypy-boto3-ssm (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-contacts (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-guiconnect (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-incidents (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-quicksetup (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-sap (>=1.42.0,<1.43.0)", "mypy-boto3-sso (>=1.42.0,<1.43.0)", "mypy-boto3-sso-admin (>=1.42.0,<1.43.0)", "mypy-boto3-sso-oidc (>=1.42.0,<1.43.0)", "mypy-boto3-stepfunctions (>=1.42.0,<1.43.0)", "mypy-boto3-storagegateway (>=1.42.0,<1.43.0)", "mypy-boto3-sts (>=1.42.0,<1.43.0)", "mypy-boto3-supplychain (>=1.42.0,<1.43.0)", "mypy-boto3-support (>=1.42.0,<1.43.0)", "mypy-boto3-support-app (>=1.42.0,<1.43.0)", "mypy-boto3-sustainability (>=1.42.0,<1.43.0)", "mypy-boto3-swf (>=1.42.0,<1.43.0)", "mypy-boto3-synthetics (>=1.42.0,<1.43.0)", "mypy-boto3-taxsettings (>=1.42.0,<1.43.0)", "mypy-boto3-textract (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-influxdb (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-query (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-write (>=1.42.0,<1.43.0)", "mypy-boto3-tnb (>=1.42.0,<1.43.0)", "mypy-boto3-transcribe (>=1.42.0,<1.43.0)", "mypy-boto3-transfer (>=1.42.0,<1.43.0)", "mypy-boto3-translate (>=1.42.0,<1.43.0)", "mypy-boto3-trustedadvisor (>=1.42.0,<1.43.0)", "mypy-boto3-uxc (>=1.42.0,<1.43.0)", "mypy-boto3-verifiedpermissions (>=1.42.0,<1.43.0)", "mypy-boto3-voice-id (>=1.42.0,<1.43.0)", "mypy-boto3-vpc-lattice (>=1.42.0,<1.43.0)", "mypy-boto3-waf (>=1.42.0,<1.43.0)", "mypy-boto3-waf-regional (>=1.42.0,<1.43.0)", "mypy-boto3-wafv2 (>=1.42.0,<1.43.0)", "mypy-boto3-wellarchitected (>=1.42.0,<1.43.0)", "mypy-boto3-wickr (>=1.42.0,<1.43.0)", "mypy-boto3-wisdom (>=1.42.0,<1.43.0)", "mypy-boto3-workdocs (>=1.42.0,<1.43.0)", "mypy-boto3-workmail (>=1.42.0,<1.43.0)", "mypy-boto3-workmailmessageflow (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-instances (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-thin-client (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-web (>=1.42.0,<1.43.0)", "mypy-boto3-xray (>=1.42.0,<1.43.0)"] +amp = ["mypy-boto3-amp (>=1.42.0,<1.43.0)"] +amplify = ["mypy-boto3-amplify (>=1.42.0,<1.43.0)"] +amplifybackend = ["mypy-boto3-amplifybackend (>=1.42.0,<1.43.0)"] +amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.42.0,<1.43.0)"] +apigateway = ["mypy-boto3-apigateway (>=1.42.0,<1.43.0)"] +apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.42.0,<1.43.0)"] +apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.42.0,<1.43.0)"] +appconfig = ["mypy-boto3-appconfig (>=1.42.0,<1.43.0)"] +appconfigdata = ["mypy-boto3-appconfigdata (>=1.42.0,<1.43.0)"] +appfabric = ["mypy-boto3-appfabric (>=1.42.0,<1.43.0)"] +appflow = ["mypy-boto3-appflow (>=1.42.0,<1.43.0)"] +appintegrations = ["mypy-boto3-appintegrations (>=1.42.0,<1.43.0)"] +application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.42.0,<1.43.0)"] +application-insights = ["mypy-boto3-application-insights (>=1.42.0,<1.43.0)"] +application-signals = ["mypy-boto3-application-signals (>=1.42.0,<1.43.0)"] +applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.42.0,<1.43.0)"] +appmesh = ["mypy-boto3-appmesh (>=1.42.0,<1.43.0)"] +apprunner = ["mypy-boto3-apprunner (>=1.42.0,<1.43.0)"] +appstream = ["mypy-boto3-appstream (>=1.42.0,<1.43.0)"] +appsync = ["mypy-boto3-appsync (>=1.42.0,<1.43.0)"] +arc-region-switch = ["mypy-boto3-arc-region-switch (>=1.42.0,<1.43.0)"] +arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.42.0,<1.43.0)"] +artifact = ["mypy-boto3-artifact (>=1.42.0,<1.43.0)"] +athena = ["mypy-boto3-athena (>=1.42.0,<1.43.0)"] +auditmanager = ["mypy-boto3-auditmanager (>=1.42.0,<1.43.0)"] +autoscaling = ["mypy-boto3-autoscaling (>=1.42.0,<1.43.0)"] +autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.42.0,<1.43.0)"] +b2bi = ["mypy-boto3-b2bi (>=1.42.0,<1.43.0)"] +backup = ["mypy-boto3-backup (>=1.42.0,<1.43.0)"] +backup-gateway = ["mypy-boto3-backup-gateway (>=1.42.0,<1.43.0)"] +backupsearch = ["mypy-boto3-backupsearch (>=1.42.0,<1.43.0)"] +batch = ["mypy-boto3-batch (>=1.42.0,<1.43.0)"] +bcm-dashboards = ["mypy-boto3-bcm-dashboards (>=1.42.0,<1.43.0)"] +bcm-data-exports = ["mypy-boto3-bcm-data-exports (>=1.42.0,<1.43.0)"] +bcm-pricing-calculator = ["mypy-boto3-bcm-pricing-calculator (>=1.42.0,<1.43.0)"] +bcm-recommended-actions = ["mypy-boto3-bcm-recommended-actions (>=1.42.0,<1.43.0)"] +bedrock = ["mypy-boto3-bedrock (>=1.42.0,<1.43.0)"] +bedrock-agent = ["mypy-boto3-bedrock-agent (>=1.42.0,<1.43.0)"] +bedrock-agent-runtime = ["mypy-boto3-bedrock-agent-runtime (>=1.42.0,<1.43.0)"] +bedrock-agentcore = ["mypy-boto3-bedrock-agentcore (>=1.42.0,<1.43.0)"] +bedrock-agentcore-control = ["mypy-boto3-bedrock-agentcore-control (>=1.42.0,<1.43.0)"] +bedrock-data-automation = ["mypy-boto3-bedrock-data-automation (>=1.42.0,<1.43.0)"] +bedrock-data-automation-runtime = ["mypy-boto3-bedrock-data-automation-runtime (>=1.42.0,<1.43.0)"] +bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.42.0,<1.43.0)"] +billing = ["mypy-boto3-billing (>=1.42.0,<1.43.0)"] +billingconductor = ["mypy-boto3-billingconductor (>=1.42.0,<1.43.0)"] +boto3 = ["boto3 (==1.42.97)"] +braket = ["mypy-boto3-braket (>=1.42.0,<1.43.0)"] +budgets = ["mypy-boto3-budgets (>=1.42.0,<1.43.0)"] +ce = ["mypy-boto3-ce (>=1.42.0,<1.43.0)"] +chatbot = ["mypy-boto3-chatbot (>=1.42.0,<1.43.0)"] +chime = ["mypy-boto3-chime (>=1.42.0,<1.43.0)"] +chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.42.0,<1.43.0)"] +chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.42.0,<1.43.0)"] +chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.42.0,<1.43.0)"] +chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.42.0,<1.43.0)"] +chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.42.0,<1.43.0)"] +cleanrooms = ["mypy-boto3-cleanrooms (>=1.42.0,<1.43.0)"] +cleanroomsml = ["mypy-boto3-cleanroomsml (>=1.42.0,<1.43.0)"] +cloud9 = ["mypy-boto3-cloud9 (>=1.42.0,<1.43.0)"] +cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.42.0,<1.43.0)"] +clouddirectory = ["mypy-boto3-clouddirectory (>=1.42.0,<1.43.0)"] +cloudformation = ["mypy-boto3-cloudformation (>=1.42.0,<1.43.0)"] +cloudfront = ["mypy-boto3-cloudfront (>=1.42.0,<1.43.0)"] +cloudfront-keyvaluestore = ["mypy-boto3-cloudfront-keyvaluestore (>=1.42.0,<1.43.0)"] +cloudhsm = ["mypy-boto3-cloudhsm (>=1.42.0,<1.43.0)"] +cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.42.0,<1.43.0)"] +cloudsearch = ["mypy-boto3-cloudsearch (>=1.42.0,<1.43.0)"] +cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.42.0,<1.43.0)"] +cloudtrail = ["mypy-boto3-cloudtrail (>=1.42.0,<1.43.0)"] +cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.42.0,<1.43.0)"] +cloudwatch = ["mypy-boto3-cloudwatch (>=1.42.0,<1.43.0)"] +codeartifact = ["mypy-boto3-codeartifact (>=1.42.0,<1.43.0)"] +codebuild = ["mypy-boto3-codebuild (>=1.42.0,<1.43.0)"] +codecatalyst = ["mypy-boto3-codecatalyst (>=1.42.0,<1.43.0)"] +codecommit = ["mypy-boto3-codecommit (>=1.42.0,<1.43.0)"] +codeconnections = ["mypy-boto3-codeconnections (>=1.42.0,<1.43.0)"] +codedeploy = ["mypy-boto3-codedeploy (>=1.42.0,<1.43.0)"] +codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.42.0,<1.43.0)"] +codeguru-security = ["mypy-boto3-codeguru-security (>=1.42.0,<1.43.0)"] +codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.42.0,<1.43.0)"] +codepipeline = ["mypy-boto3-codepipeline (>=1.42.0,<1.43.0)"] +codestar-connections = ["mypy-boto3-codestar-connections (>=1.42.0,<1.43.0)"] +codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.42.0,<1.43.0)"] +cognito-identity = ["mypy-boto3-cognito-identity (>=1.42.0,<1.43.0)"] +cognito-idp = ["mypy-boto3-cognito-idp (>=1.42.0,<1.43.0)"] +cognito-sync = ["mypy-boto3-cognito-sync (>=1.42.0,<1.43.0)"] +comprehend = ["mypy-boto3-comprehend (>=1.42.0,<1.43.0)"] +comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.42.0,<1.43.0)"] +compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.42.0,<1.43.0)"] +compute-optimizer-automation = ["mypy-boto3-compute-optimizer-automation (>=1.42.0,<1.43.0)"] +config = ["mypy-boto3-config (>=1.42.0,<1.43.0)"] +connect = ["mypy-boto3-connect (>=1.42.0,<1.43.0)"] +connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.42.0,<1.43.0)"] +connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.42.0,<1.43.0)"] +connectcampaignsv2 = ["mypy-boto3-connectcampaignsv2 (>=1.42.0,<1.43.0)"] +connectcases = ["mypy-boto3-connectcases (>=1.42.0,<1.43.0)"] +connecthealth = ["mypy-boto3-connecthealth (>=1.42.0,<1.43.0)"] +connectparticipant = ["mypy-boto3-connectparticipant (>=1.42.0,<1.43.0)"] +controlcatalog = ["mypy-boto3-controlcatalog (>=1.42.0,<1.43.0)"] +controltower = ["mypy-boto3-controltower (>=1.42.0,<1.43.0)"] +cost-optimization-hub = ["mypy-boto3-cost-optimization-hub (>=1.42.0,<1.43.0)"] +cur = ["mypy-boto3-cur (>=1.42.0,<1.43.0)"] +customer-profiles = ["mypy-boto3-customer-profiles (>=1.42.0,<1.43.0)"] +databrew = ["mypy-boto3-databrew (>=1.42.0,<1.43.0)"] +dataexchange = ["mypy-boto3-dataexchange (>=1.42.0,<1.43.0)"] +datapipeline = ["mypy-boto3-datapipeline (>=1.42.0,<1.43.0)"] +datasync = ["mypy-boto3-datasync (>=1.42.0,<1.43.0)"] +datazone = ["mypy-boto3-datazone (>=1.42.0,<1.43.0)"] +dax = ["mypy-boto3-dax (>=1.42.0,<1.43.0)"] +deadline = ["mypy-boto3-deadline (>=1.42.0,<1.43.0)"] +detective = ["mypy-boto3-detective (>=1.42.0,<1.43.0)"] +devicefarm = ["mypy-boto3-devicefarm (>=1.42.0,<1.43.0)"] +devops-agent = ["mypy-boto3-devops-agent (>=1.42.0,<1.43.0)"] +devops-guru = ["mypy-boto3-devops-guru (>=1.42.0,<1.43.0)"] +directconnect = ["mypy-boto3-directconnect (>=1.42.0,<1.43.0)"] +discovery = ["mypy-boto3-discovery (>=1.42.0,<1.43.0)"] +dlm = ["mypy-boto3-dlm (>=1.42.0,<1.43.0)"] +dms = ["mypy-boto3-dms (>=1.42.0,<1.43.0)"] +docdb = ["mypy-boto3-docdb (>=1.42.0,<1.43.0)"] +docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.42.0,<1.43.0)"] +drs = ["mypy-boto3-drs (>=1.42.0,<1.43.0)"] +ds = ["mypy-boto3-ds (>=1.42.0,<1.43.0)"] +ds-data = ["mypy-boto3-ds-data (>=1.42.0,<1.43.0)"] +dsql = ["mypy-boto3-dsql (>=1.42.0,<1.43.0)"] +dynamodb = ["mypy-boto3-dynamodb (>=1.42.0,<1.43.0)"] +dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.42.0,<1.43.0)"] +ebs = ["mypy-boto3-ebs (>=1.42.0,<1.43.0)"] +ec2 = ["mypy-boto3-ec2 (>=1.42.0,<1.43.0)"] +ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.42.0,<1.43.0)"] +ecr = ["mypy-boto3-ecr (>=1.42.0,<1.43.0)"] +ecr-public = ["mypy-boto3-ecr-public (>=1.42.0,<1.43.0)"] +ecs = ["mypy-boto3-ecs (>=1.42.0,<1.43.0)"] +efs = ["mypy-boto3-efs (>=1.42.0,<1.43.0)"] +eks = ["mypy-boto3-eks (>=1.42.0,<1.43.0)"] +eks-auth = ["mypy-boto3-eks-auth (>=1.42.0,<1.43.0)"] +elasticache = ["mypy-boto3-elasticache (>=1.42.0,<1.43.0)"] +elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.42.0,<1.43.0)"] +elb = ["mypy-boto3-elb (>=1.42.0,<1.43.0)"] +elbv2 = ["mypy-boto3-elbv2 (>=1.42.0,<1.43.0)"] +elementalinference = ["mypy-boto3-elementalinference (>=1.42.0,<1.43.0)"] +emr = ["mypy-boto3-emr (>=1.42.0,<1.43.0)"] +emr-containers = ["mypy-boto3-emr-containers (>=1.42.0,<1.43.0)"] +emr-serverless = ["mypy-boto3-emr-serverless (>=1.42.0,<1.43.0)"] +entityresolution = ["mypy-boto3-entityresolution (>=1.42.0,<1.43.0)"] +es = ["mypy-boto3-es (>=1.42.0,<1.43.0)"] +essential = ["mypy-boto3-cloudformation (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodb (>=1.42.0,<1.43.0)", "mypy-boto3-ec2 (>=1.42.0,<1.43.0)", "mypy-boto3-lambda (>=1.42.0,<1.43.0)", "mypy-boto3-rds (>=1.42.0,<1.43.0)", "mypy-boto3-s3 (>=1.42.0,<1.43.0)", "mypy-boto3-sqs (>=1.42.0,<1.43.0)"] +events = ["mypy-boto3-events (>=1.42.0,<1.43.0)"] +evs = ["mypy-boto3-evs (>=1.42.0,<1.43.0)"] +finspace = ["mypy-boto3-finspace (>=1.42.0,<1.43.0)"] +finspace-data = ["mypy-boto3-finspace-data (>=1.42.0,<1.43.0)"] +firehose = ["mypy-boto3-firehose (>=1.42.0,<1.43.0)"] +fis = ["mypy-boto3-fis (>=1.42.0,<1.43.0)"] +fms = ["mypy-boto3-fms (>=1.42.0,<1.43.0)"] +forecast = ["mypy-boto3-forecast (>=1.42.0,<1.43.0)"] +forecastquery = ["mypy-boto3-forecastquery (>=1.42.0,<1.43.0)"] +frauddetector = ["mypy-boto3-frauddetector (>=1.42.0,<1.43.0)"] +freetier = ["mypy-boto3-freetier (>=1.42.0,<1.43.0)"] +fsx = ["mypy-boto3-fsx (>=1.42.0,<1.43.0)"] +full = ["boto3-stubs-full (>=1.42.0,<1.43.0)"] +gamelift = ["mypy-boto3-gamelift (>=1.42.0,<1.43.0)"] +gameliftstreams = ["mypy-boto3-gameliftstreams (>=1.42.0,<1.43.0)"] +geo-maps = ["mypy-boto3-geo-maps (>=1.42.0,<1.43.0)"] +geo-places = ["mypy-boto3-geo-places (>=1.42.0,<1.43.0)"] +geo-routes = ["mypy-boto3-geo-routes (>=1.42.0,<1.43.0)"] +glacier = ["mypy-boto3-glacier (>=1.42.0,<1.43.0)"] +globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.42.0,<1.43.0)"] +glue = ["mypy-boto3-glue (>=1.42.0,<1.43.0)"] +grafana = ["mypy-boto3-grafana (>=1.42.0,<1.43.0)"] +greengrass = ["mypy-boto3-greengrass (>=1.42.0,<1.43.0)"] +greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.42.0,<1.43.0)"] +groundstation = ["mypy-boto3-groundstation (>=1.42.0,<1.43.0)"] +guardduty = ["mypy-boto3-guardduty (>=1.42.0,<1.43.0)"] +health = ["mypy-boto3-health (>=1.42.0,<1.43.0)"] +healthlake = ["mypy-boto3-healthlake (>=1.42.0,<1.43.0)"] +iam = ["mypy-boto3-iam (>=1.42.0,<1.43.0)"] +identitystore = ["mypy-boto3-identitystore (>=1.42.0,<1.43.0)"] +imagebuilder = ["mypy-boto3-imagebuilder (>=1.42.0,<1.43.0)"] +importexport = ["mypy-boto3-importexport (>=1.42.0,<1.43.0)"] +inspector = ["mypy-boto3-inspector (>=1.42.0,<1.43.0)"] +inspector-scan = ["mypy-boto3-inspector-scan (>=1.42.0,<1.43.0)"] +inspector2 = ["mypy-boto3-inspector2 (>=1.42.0,<1.43.0)"] +interconnect = ["mypy-boto3-interconnect (>=1.42.0,<1.43.0)"] +internetmonitor = ["mypy-boto3-internetmonitor (>=1.42.0,<1.43.0)"] +invoicing = ["mypy-boto3-invoicing (>=1.42.0,<1.43.0)"] +iot = ["mypy-boto3-iot (>=1.42.0,<1.43.0)"] +iot-data = ["mypy-boto3-iot-data (>=1.42.0,<1.43.0)"] +iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.42.0,<1.43.0)"] +iot-managed-integrations = ["mypy-boto3-iot-managed-integrations (>=1.42.0,<1.43.0)"] +iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.42.0,<1.43.0)"] +iotevents = ["mypy-boto3-iotevents (>=1.42.0,<1.43.0)"] +iotevents-data = ["mypy-boto3-iotevents-data (>=1.42.0,<1.43.0)"] +iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.42.0,<1.43.0)"] +iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.42.0,<1.43.0)"] +iotsitewise = ["mypy-boto3-iotsitewise (>=1.42.0,<1.43.0)"] +iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.42.0,<1.43.0)"] +iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.42.0,<1.43.0)"] +iotwireless = ["mypy-boto3-iotwireless (>=1.42.0,<1.43.0)"] +ivs = ["mypy-boto3-ivs (>=1.42.0,<1.43.0)"] +ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.42.0,<1.43.0)"] +ivschat = ["mypy-boto3-ivschat (>=1.42.0,<1.43.0)"] +kafka = ["mypy-boto3-kafka (>=1.42.0,<1.43.0)"] +kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.42.0,<1.43.0)"] +kendra = ["mypy-boto3-kendra (>=1.42.0,<1.43.0)"] +kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.42.0,<1.43.0)"] +keyspaces = ["mypy-boto3-keyspaces (>=1.42.0,<1.43.0)"] +keyspacesstreams = ["mypy-boto3-keyspacesstreams (>=1.42.0,<1.43.0)"] +kinesis = ["mypy-boto3-kinesis (>=1.42.0,<1.43.0)"] +kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.42.0,<1.43.0)"] +kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.42.0,<1.43.0)"] +kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.42.0,<1.43.0)"] +kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.42.0,<1.43.0)"] +kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.42.0,<1.43.0)"] +kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.42.0,<1.43.0)"] +kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.42.0,<1.43.0)"] +kms = ["mypy-boto3-kms (>=1.42.0,<1.43.0)"] +lakeformation = ["mypy-boto3-lakeformation (>=1.42.0,<1.43.0)"] +lambda = ["mypy-boto3-lambda (>=1.42.0,<1.43.0)"] +launch-wizard = ["mypy-boto3-launch-wizard (>=1.42.0,<1.43.0)"] +lex-models = ["mypy-boto3-lex-models (>=1.42.0,<1.43.0)"] +lex-runtime = ["mypy-boto3-lex-runtime (>=1.42.0,<1.43.0)"] +lexv2-models = ["mypy-boto3-lexv2-models (>=1.42.0,<1.43.0)"] +lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.42.0,<1.43.0)"] +license-manager = ["mypy-boto3-license-manager (>=1.42.0,<1.43.0)"] +license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.42.0,<1.43.0)"] +license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.42.0,<1.43.0)"] +lightsail = ["mypy-boto3-lightsail (>=1.42.0,<1.43.0)"] +location = ["mypy-boto3-location (>=1.42.0,<1.43.0)"] +logs = ["mypy-boto3-logs (>=1.42.0,<1.43.0)"] +lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.42.0,<1.43.0)"] +m2 = ["mypy-boto3-m2 (>=1.42.0,<1.43.0)"] +machinelearning = ["mypy-boto3-machinelearning (>=1.42.0,<1.43.0)"] +macie2 = ["mypy-boto3-macie2 (>=1.42.0,<1.43.0)"] +mailmanager = ["mypy-boto3-mailmanager (>=1.42.0,<1.43.0)"] +managedblockchain = ["mypy-boto3-managedblockchain (>=1.42.0,<1.43.0)"] +managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.42.0,<1.43.0)"] +marketplace-agreement = ["mypy-boto3-marketplace-agreement (>=1.42.0,<1.43.0)"] +marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.42.0,<1.43.0)"] +marketplace-deployment = ["mypy-boto3-marketplace-deployment (>=1.42.0,<1.43.0)"] +marketplace-discovery = ["mypy-boto3-marketplace-discovery (>=1.42.0,<1.43.0)"] +marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.42.0,<1.43.0)"] +marketplace-reporting = ["mypy-boto3-marketplace-reporting (>=1.42.0,<1.43.0)"] +marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.42.0,<1.43.0)"] +mediaconnect = ["mypy-boto3-mediaconnect (>=1.42.0,<1.43.0)"] +mediaconvert = ["mypy-boto3-mediaconvert (>=1.42.0,<1.43.0)"] +medialive = ["mypy-boto3-medialive (>=1.42.0,<1.43.0)"] +mediapackage = ["mypy-boto3-mediapackage (>=1.42.0,<1.43.0)"] +mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.42.0,<1.43.0)"] +mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.42.0,<1.43.0)"] +mediastore = ["mypy-boto3-mediastore (>=1.42.0,<1.43.0)"] +mediastore-data = ["mypy-boto3-mediastore-data (>=1.42.0,<1.43.0)"] +mediatailor = ["mypy-boto3-mediatailor (>=1.42.0,<1.43.0)"] +medical-imaging = ["mypy-boto3-medical-imaging (>=1.42.0,<1.43.0)"] +memorydb = ["mypy-boto3-memorydb (>=1.42.0,<1.43.0)"] +meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.42.0,<1.43.0)"] +mgh = ["mypy-boto3-mgh (>=1.42.0,<1.43.0)"] +mgn = ["mypy-boto3-mgn (>=1.42.0,<1.43.0)"] +migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.42.0,<1.43.0)"] +migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.42.0,<1.43.0)"] +migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.42.0,<1.43.0)"] +migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.42.0,<1.43.0)"] +mpa = ["mypy-boto3-mpa (>=1.42.0,<1.43.0)"] +mq = ["mypy-boto3-mq (>=1.42.0,<1.43.0)"] +mturk = ["mypy-boto3-mturk (>=1.42.0,<1.43.0)"] +mwaa = ["mypy-boto3-mwaa (>=1.42.0,<1.43.0)"] +mwaa-serverless = ["mypy-boto3-mwaa-serverless (>=1.42.0,<1.43.0)"] +neptune = ["mypy-boto3-neptune (>=1.42.0,<1.43.0)"] +neptune-graph = ["mypy-boto3-neptune-graph (>=1.42.0,<1.43.0)"] +neptunedata = ["mypy-boto3-neptunedata (>=1.42.0,<1.43.0)"] +network-firewall = ["mypy-boto3-network-firewall (>=1.42.0,<1.43.0)"] +networkflowmonitor = ["mypy-boto3-networkflowmonitor (>=1.42.0,<1.43.0)"] +networkmanager = ["mypy-boto3-networkmanager (>=1.42.0,<1.43.0)"] +networkmonitor = ["mypy-boto3-networkmonitor (>=1.42.0,<1.43.0)"] +notifications = ["mypy-boto3-notifications (>=1.42.0,<1.43.0)"] +notificationscontacts = ["mypy-boto3-notificationscontacts (>=1.42.0,<1.43.0)"] +nova-act = ["mypy-boto3-nova-act (>=1.42.0,<1.43.0)"] +oam = ["mypy-boto3-oam (>=1.42.0,<1.43.0)"] +observabilityadmin = ["mypy-boto3-observabilityadmin (>=1.42.0,<1.43.0)"] +odb = ["mypy-boto3-odb (>=1.42.0,<1.43.0)"] +omics = ["mypy-boto3-omics (>=1.42.0,<1.43.0)"] +opensearch = ["mypy-boto3-opensearch (>=1.42.0,<1.43.0)"] +opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.42.0,<1.43.0)"] +organizations = ["mypy-boto3-organizations (>=1.42.0,<1.43.0)"] +osis = ["mypy-boto3-osis (>=1.42.0,<1.43.0)"] +outposts = ["mypy-boto3-outposts (>=1.42.0,<1.43.0)"] +panorama = ["mypy-boto3-panorama (>=1.42.0,<1.43.0)"] +partnercentral-account = ["mypy-boto3-partnercentral-account (>=1.42.0,<1.43.0)"] +partnercentral-benefits = ["mypy-boto3-partnercentral-benefits (>=1.42.0,<1.43.0)"] +partnercentral-channel = ["mypy-boto3-partnercentral-channel (>=1.42.0,<1.43.0)"] +partnercentral-selling = ["mypy-boto3-partnercentral-selling (>=1.42.0,<1.43.0)"] +payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.42.0,<1.43.0)"] +payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.42.0,<1.43.0)"] +pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.42.0,<1.43.0)"] +pca-connector-scep = ["mypy-boto3-pca-connector-scep (>=1.42.0,<1.43.0)"] +pcs = ["mypy-boto3-pcs (>=1.42.0,<1.43.0)"] +personalize = ["mypy-boto3-personalize (>=1.42.0,<1.43.0)"] +personalize-events = ["mypy-boto3-personalize-events (>=1.42.0,<1.43.0)"] +personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.42.0,<1.43.0)"] +pi = ["mypy-boto3-pi (>=1.42.0,<1.43.0)"] +pinpoint = ["mypy-boto3-pinpoint (>=1.42.0,<1.43.0)"] +pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.42.0,<1.43.0)"] +pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.42.0,<1.43.0)"] +pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.42.0,<1.43.0)"] +pipes = ["mypy-boto3-pipes (>=1.42.0,<1.43.0)"] +polly = ["mypy-boto3-polly (>=1.42.0,<1.43.0)"] +pricing = ["mypy-boto3-pricing (>=1.42.0,<1.43.0)"] +proton = ["mypy-boto3-proton (>=1.42.0,<1.43.0)"] +qapps = ["mypy-boto3-qapps (>=1.42.0,<1.43.0)"] +qbusiness = ["mypy-boto3-qbusiness (>=1.42.0,<1.43.0)"] +qconnect = ["mypy-boto3-qconnect (>=1.42.0,<1.43.0)"] +quicksight = ["mypy-boto3-quicksight (>=1.42.0,<1.43.0)"] +ram = ["mypy-boto3-ram (>=1.42.0,<1.43.0)"] +rbin = ["mypy-boto3-rbin (>=1.42.0,<1.43.0)"] +rds = ["mypy-boto3-rds (>=1.42.0,<1.43.0)"] +rds-data = ["mypy-boto3-rds-data (>=1.42.0,<1.43.0)"] +redshift = ["mypy-boto3-redshift (>=1.42.0,<1.43.0)"] +redshift-data = ["mypy-boto3-redshift-data (>=1.42.0,<1.43.0)"] +redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.42.0,<1.43.0)"] +rekognition = ["mypy-boto3-rekognition (>=1.42.0,<1.43.0)"] +repostspace = ["mypy-boto3-repostspace (>=1.42.0,<1.43.0)"] +resiliencehub = ["mypy-boto3-resiliencehub (>=1.42.0,<1.43.0)"] +resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.42.0,<1.43.0)"] +resource-groups = ["mypy-boto3-resource-groups (>=1.42.0,<1.43.0)"] +resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.42.0,<1.43.0)"] +rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.42.0,<1.43.0)"] +route53 = ["mypy-boto3-route53 (>=1.42.0,<1.43.0)"] +route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.42.0,<1.43.0)"] +route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.42.0,<1.43.0)"] +route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.42.0,<1.43.0)"] +route53domains = ["mypy-boto3-route53domains (>=1.42.0,<1.43.0)"] +route53globalresolver = ["mypy-boto3-route53globalresolver (>=1.42.0,<1.43.0)"] +route53profiles = ["mypy-boto3-route53profiles (>=1.42.0,<1.43.0)"] +route53resolver = ["mypy-boto3-route53resolver (>=1.42.0,<1.43.0)"] +rtbfabric = ["mypy-boto3-rtbfabric (>=1.42.0,<1.43.0)"] +rum = ["mypy-boto3-rum (>=1.42.0,<1.43.0)"] +s3 = ["mypy-boto3-s3 (>=1.42.0,<1.43.0)"] +s3control = ["mypy-boto3-s3control (>=1.42.0,<1.43.0)"] +s3files = ["mypy-boto3-s3files (>=1.42.0,<1.43.0)"] +s3outposts = ["mypy-boto3-s3outposts (>=1.42.0,<1.43.0)"] +s3tables = ["mypy-boto3-s3tables (>=1.42.0,<1.43.0)"] +s3vectors = ["mypy-boto3-s3vectors (>=1.42.0,<1.43.0)"] +sagemaker = ["mypy-boto3-sagemaker (>=1.42.0,<1.43.0)"] +sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.42.0,<1.43.0)"] +sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.42.0,<1.43.0)"] +sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.42.0,<1.43.0)"] +sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.42.0,<1.43.0)"] +sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.42.0,<1.43.0)"] +sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.42.0,<1.43.0)"] +savingsplans = ["mypy-boto3-savingsplans (>=1.42.0,<1.43.0)"] +scheduler = ["mypy-boto3-scheduler (>=1.42.0,<1.43.0)"] +schemas = ["mypy-boto3-schemas (>=1.42.0,<1.43.0)"] +sdb = ["mypy-boto3-sdb (>=1.42.0,<1.43.0)"] +secretsmanager = ["mypy-boto3-secretsmanager (>=1.42.0,<1.43.0)"] +security-ir = ["mypy-boto3-security-ir (>=1.42.0,<1.43.0)"] +securityagent = ["mypy-boto3-securityagent (>=1.42.0,<1.43.0)"] +securityhub = ["mypy-boto3-securityhub (>=1.42.0,<1.43.0)"] +securitylake = ["mypy-boto3-securitylake (>=1.42.0,<1.43.0)"] +serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.42.0,<1.43.0)"] +service-quotas = ["mypy-boto3-service-quotas (>=1.42.0,<1.43.0)"] +servicecatalog = ["mypy-boto3-servicecatalog (>=1.42.0,<1.43.0)"] +servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.42.0,<1.43.0)"] +servicediscovery = ["mypy-boto3-servicediscovery (>=1.42.0,<1.43.0)"] +ses = ["mypy-boto3-ses (>=1.42.0,<1.43.0)"] +sesv2 = ["mypy-boto3-sesv2 (>=1.42.0,<1.43.0)"] +shield = ["mypy-boto3-shield (>=1.42.0,<1.43.0)"] +signer = ["mypy-boto3-signer (>=1.42.0,<1.43.0)"] +signer-data = ["mypy-boto3-signer-data (>=1.42.0,<1.43.0)"] +signin = ["mypy-boto3-signin (>=1.42.0,<1.43.0)"] +simpledbv2 = ["mypy-boto3-simpledbv2 (>=1.42.0,<1.43.0)"] +simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.42.0,<1.43.0)"] +snow-device-management = ["mypy-boto3-snow-device-management (>=1.42.0,<1.43.0)"] +snowball = ["mypy-boto3-snowball (>=1.42.0,<1.43.0)"] +sns = ["mypy-boto3-sns (>=1.42.0,<1.43.0)"] +socialmessaging = ["mypy-boto3-socialmessaging (>=1.42.0,<1.43.0)"] +sqs = ["mypy-boto3-sqs (>=1.42.0,<1.43.0)"] +ssm = ["mypy-boto3-ssm (>=1.42.0,<1.43.0)"] +ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.42.0,<1.43.0)"] +ssm-guiconnect = ["mypy-boto3-ssm-guiconnect (>=1.42.0,<1.43.0)"] +ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.42.0,<1.43.0)"] +ssm-quicksetup = ["mypy-boto3-ssm-quicksetup (>=1.42.0,<1.43.0)"] +ssm-sap = ["mypy-boto3-ssm-sap (>=1.42.0,<1.43.0)"] +sso = ["mypy-boto3-sso (>=1.42.0,<1.43.0)"] +sso-admin = ["mypy-boto3-sso-admin (>=1.42.0,<1.43.0)"] +sso-oidc = ["mypy-boto3-sso-oidc (>=1.42.0,<1.43.0)"] +stepfunctions = ["mypy-boto3-stepfunctions (>=1.42.0,<1.43.0)"] +storagegateway = ["mypy-boto3-storagegateway (>=1.42.0,<1.43.0)"] +sts = ["mypy-boto3-sts (>=1.42.0,<1.43.0)"] +supplychain = ["mypy-boto3-supplychain (>=1.42.0,<1.43.0)"] +support = ["mypy-boto3-support (>=1.42.0,<1.43.0)"] +support-app = ["mypy-boto3-support-app (>=1.42.0,<1.43.0)"] +sustainability = ["mypy-boto3-sustainability (>=1.42.0,<1.43.0)"] +swf = ["mypy-boto3-swf (>=1.42.0,<1.43.0)"] +synthetics = ["mypy-boto3-synthetics (>=1.42.0,<1.43.0)"] +taxsettings = ["mypy-boto3-taxsettings (>=1.42.0,<1.43.0)"] +textract = ["mypy-boto3-textract (>=1.42.0,<1.43.0)"] +timestream-influxdb = ["mypy-boto3-timestream-influxdb (>=1.42.0,<1.43.0)"] +timestream-query = ["mypy-boto3-timestream-query (>=1.42.0,<1.43.0)"] +timestream-write = ["mypy-boto3-timestream-write (>=1.42.0,<1.43.0)"] +tnb = ["mypy-boto3-tnb (>=1.42.0,<1.43.0)"] +transcribe = ["mypy-boto3-transcribe (>=1.42.0,<1.43.0)"] +transfer = ["mypy-boto3-transfer (>=1.42.0,<1.43.0)"] +translate = ["mypy-boto3-translate (>=1.42.0,<1.43.0)"] +trustedadvisor = ["mypy-boto3-trustedadvisor (>=1.42.0,<1.43.0)"] +uxc = ["mypy-boto3-uxc (>=1.42.0,<1.43.0)"] +verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.42.0,<1.43.0)"] +voice-id = ["mypy-boto3-voice-id (>=1.42.0,<1.43.0)"] +vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.42.0,<1.43.0)"] +waf = ["mypy-boto3-waf (>=1.42.0,<1.43.0)"] +waf-regional = ["mypy-boto3-waf-regional (>=1.42.0,<1.43.0)"] +wafv2 = ["mypy-boto3-wafv2 (>=1.42.0,<1.43.0)"] +wellarchitected = ["mypy-boto3-wellarchitected (>=1.42.0,<1.43.0)"] +wickr = ["mypy-boto3-wickr (>=1.42.0,<1.43.0)"] +wisdom = ["mypy-boto3-wisdom (>=1.42.0,<1.43.0)"] +workdocs = ["mypy-boto3-workdocs (>=1.42.0,<1.43.0)"] +workmail = ["mypy-boto3-workmail (>=1.42.0,<1.43.0)"] +workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.42.0,<1.43.0)"] +workspaces = ["mypy-boto3-workspaces (>=1.42.0,<1.43.0)"] +workspaces-instances = ["mypy-boto3-workspaces-instances (>=1.42.0,<1.43.0)"] +workspaces-thin-client = ["mypy-boto3-workspaces-thin-client (>=1.42.0,<1.43.0)"] +workspaces-web = ["mypy-boto3-workspaces-web (>=1.42.0,<1.43.0)"] +xray = ["mypy-boto3-xray (>=1.42.0,<1.43.0)"] [[package]] name = "botocore" @@ -755,14 +830,14 @@ requests = "*" [[package]] name = "certifi" -version = "2026.1.4" +version = "2026.4.22" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c"}, - {file = "certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120"}, + {file = "certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a"}, + {file = "certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580"}, ] [[package]] @@ -877,138 +952,154 @@ files = [ [[package]] name = "charset-normalizer" -version = "3.4.4" +version = "3.4.7" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-win32.whl", hash = "sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-win32.whl", hash = "sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-win_arm64.whl", hash = "sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50"}, - {file = "charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f"}, - {file = "charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cdd68a1fb318e290a2077696b7eb7a21a49163c455979c639bf5a5dcdc46617d"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e17b8d5d6a8c47c85e68ca8379def1303fd360c3e22093a807cd34a71cd082b8"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:511ef87c8aec0783e08ac18565a16d435372bc1ac25a91e6ac7f5ef2b0bff790"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:007d05ec7321d12a40227aae9e2bc6dca73f3cb21058999a1df9e193555a9dcc"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf29836da5119f3c8a8a70667b0ef5fdca3bb12f80fd06487cfa575b3909b393"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:12d8baf840cc7889b37c7c770f478adea7adce3dcb3944d02ec87508e2dcf153"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d560742f3c0d62afaccf9f41fe485ed69bd7661a241f86a3ef0f0fb8b1a397af"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b14b2d9dac08e28bb8046a1a0434b1750eb221c8f5b87a68f4fa11a6f97b5e34"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:bc17a677b21b3502a21f66a8cc64f5bfad4df8a0b8434d661666f8ce90ac3af1"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:750e02e074872a3fad7f233b47734166440af3cdea0add3e95163110816d6752"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:4e5163c14bffd570ef2affbfdd77bba66383890797df43dc8b4cc7d6f500bf53"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6ed74185b2db44f41ef35fd1617c5888e59792da9bbc9190d6c7300617182616"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:94e1885b270625a9a828c9793b4d52a64445299baa1fea5a173bf1d3dd9a1a5a"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win32.whl", hash = "sha256:6785f414ae0f3c733c437e0f3929197934f526d19dfaa75e18fdb4f94c6fb374"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:6696b7688f54f5af4462118f0bfa7c1621eeb87154f77fa04b9295ce7a8f2943"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:66671f93accb62ed07da56613636f3641f1a12c13046ce91ffc923721f23c008"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win32.whl", hash = "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e5f4d355f0a2b1a31bc3edec6795b46324349c9cb25eed068049e4f472fb4259"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16d971e29578a5e97d7117866d15889a4a07befe0e87e703ed63cd90cb348c01"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dca4bbc466a95ba9c0234ef56d7dd9509f63da22274589ebd4ed7f1f4d4c54e3"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e80c8378d8f3d83cd3164da1ad2df9e37a666cdde7b1cb2298ed0b558064be30"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:36836d6ff945a00b88ba1e4572d721e60b5b8c98c155d465f56ad19d68f23734"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux_2_31_armv7l.whl", hash = "sha256:bd9b23791fe793e4968dba0c447e12f78e425c59fc0e3b97f6450f4781f3ee60"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:aef65cd602a6d0e0ff6f9930fcb1c8fec60dd2cfcb6facaf4bdb0e5873042db0"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:82b271f5137d07749f7bf32f70b17ab6eaabedd297e75dce75081a24f76eb545"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:1efde3cae86c8c273f1eb3b287be7d8499420cf2fe7585c41d370d3e790054a5"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:c593052c465475e64bbfe5dbd81680f64a67fdc752c56d7a0ae205dc8aeefe0f"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:af21eb4409a119e365397b2adbaca4c9ccab56543a65d5dbd9f920d6ac29f686"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:84c018e49c3bf790f9c2771c45e9313a08c2c2a6342b162cd650258b57817706"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dd915403e231e6b1809fe9b6d9fc55cf8fb5e02765ac625d9cd623342a7905d7"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-win32.whl", hash = "sha256:320ade88cfb846b8cd6b4ddf5ee9e80ee0c1f52401f2456b84ae1ae6a1a5f207"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:1dc8b0ea451d6e69735094606991f32867807881400f808a106ee1d963c46a83"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:177a0ba5f0211d488e295aaf82707237e331c24788d8d76c96c5a41594723217"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e0d51f618228538a3e8f46bd246f87a6cd030565e015803691603f55e12afb5"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:14265bfe1f09498b9d8ec91e9ec9fa52775edf90fcbde092b25f4a33d444fea9"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:87fad7d9ba98c86bcb41b2dc8dbb326619be2562af1f8ff50776a39e55721c5a"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f22dec1690b584cea26fade98b2435c132c1b5f68e39f5a0b7627cd7ae31f1dc"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:d61f00a0869d77422d9b2aba989e2d24afa6ffd552af442e0e58de4f35ea6d00"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6370e8686f662e6a3941ee48ed4742317cafbe5707e36406e9df792cdb535776"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a6c5863edfbe888d9eff9c8b8087354e27618d9da76425c119293f11712a6319"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:ed065083d0898c9d5b4bbec7b026fd755ff7454e6e8b73a67f8c744b13986e24"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:2cd4a60d0e2fb04537162c62bbbb4182f53541fe0ede35cdf270a1c1e723cc42"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:813c0e0132266c08eb87469a642cb30aaff57c5f426255419572aaeceeaa7bf4"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:07d9e39b01743c3717745f4c530a6349eadbfa043c7577eef86c502c15df2c67"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c0f081d69a6e58272819b70288d3221a6ee64b98df852631c80f293514d3b274"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win32.whl", hash = "sha256:8751d2787c9131302398b11e6c8068053dcb55d5a8964e114b6e196cf16cb366"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:12a6fff75f6bc66711b73a2f0addfc4c8c15a20e805146a02d147a318962c444"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:bb8cc7534f51d9a017b93e3e85b260924f909601c3df002bcdb58ddb4dc41a5c"}, + {file = "charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d"}, + {file = "charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5"}, ] [[package]] name = "click" -version = "8.3.1" +version = "8.3.3" description = "Composable command line interface toolkit" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6"}, - {file = "click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a"}, + {file = "click-8.3.3-py3-none-any.whl", hash = "sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613"}, + {file = "click-8.3.3.tar.gz", hash = "sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2"}, ] [package.dependencies] @@ -1025,7 +1116,7 @@ files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "extra == \"server\" and (platform_system == \"Windows\" or sys_platform == \"win32\")", dev = "sys_platform == \"win32\""} +markers = {main = "platform_system == \"Windows\" or extra == \"server\" and sys_platform == \"win32\"", dev = "sys_platform == \"win32\""} [[package]] name = "coloredlogs" @@ -1069,118 +1160,118 @@ type = ["pytest-mypy"] [[package]] name = "coverage" -version = "7.13.4" +version = "7.13.5" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "coverage-7.13.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fc31c787a84f8cd6027eba44010517020e0d18487064cd3d8968941856d1415"}, - {file = "coverage-7.13.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a32ebc02a1805adf637fc8dec324b5cdacd2e493515424f70ee33799573d661b"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e24f9156097ff9dc286f2f913df3a7f63c0e333dcafa3c196f2c18b4175ca09a"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a09cfa6a5862bc2fc6ca7c3def5b2926194a56b8ab78ffcf617d28911123012"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:296f8b0af861d3970c2a4d8c91d48eb4dd4771bcef9baedec6a9b515d7de3def"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e101609bcbbfb04605ea1027b10dc3735c094d12d40826a60f897b98b1c30256"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:aa3feb8db2e87ff5e6d00d7e1480ae241876286691265657b500886c98f38bda"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4fc7fa81bbaf5a02801b65346c8b3e657f1d93763e58c0abdf7c992addd81a92"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:33901f604424145c6e9c2398684b92e176c0b12df77d52db81c20abd48c3794c"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:bb28c0f2cf2782508a40cec377935829d5fcc3ad9a3681375af4e84eb34b6b58"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d107aff57a83222ddbd8d9ee705ede2af2cc926608b57abed8ef96b50b7e8f9"}, - {file = "coverage-7.13.4-cp310-cp310-win32.whl", hash = "sha256:a6f94a7d00eb18f1b6d403c91a88fd58cfc92d4b16080dfdb774afc8294469bf"}, - {file = "coverage-7.13.4-cp310-cp310-win_amd64.whl", hash = "sha256:2cb0f1e000ebc419632bbe04366a8990b6e32c4e0b51543a6484ffe15eaeda95"}, - {file = "coverage-7.13.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d490ba50c3f35dd7c17953c68f3270e7ccd1c6642e2d2afe2d8e720b98f5a053"}, - {file = "coverage-7.13.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:19bc3c88078789f8ef36acb014d7241961dbf883fd2533d18cb1e7a5b4e28b11"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3998e5a32e62fdf410c0dbd3115df86297995d6e3429af80b8798aad894ca7aa"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e264226ec98e01a8e1054314af91ee6cde0eacac4f465cc93b03dbe0bce2fd7"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a3aa4e7b9e416774b21797365b358a6e827ffadaaca81b69ee02946852449f00"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:71ca20079dd8f27fcf808817e281e90220475cd75115162218d0e27549f95fef"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e2f25215f1a359ab17320b47bcdaca3e6e6356652e8256f2441e4ef972052903"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d65b2d373032411e86960604dc4edac91fdfb5dca539461cf2cbe78327d1e64f"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94eb63f9b363180aff17de3e7c8760c3ba94664ea2695c52f10111244d16a299"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e856bf6616714c3a9fbc270ab54103f4e685ba236fa98c054e8f87f266c93505"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:65dfcbe305c3dfe658492df2d85259e0d79ead4177f9ae724b6fb245198f55d6"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b507778ae8a4c915436ed5c2e05b4a6cecfa70f734e19c22a005152a11c7b6a9"}, - {file = "coverage-7.13.4-cp311-cp311-win32.whl", hash = "sha256:784fc3cf8be001197b652d51d3fd259b1e2262888693a4636e18879f613a62a9"}, - {file = "coverage-7.13.4-cp311-cp311-win_amd64.whl", hash = "sha256:2421d591f8ca05b308cf0092807308b2facbefe54af7c02ac22548b88b95c98f"}, - {file = "coverage-7.13.4-cp311-cp311-win_arm64.whl", hash = "sha256:79e73a76b854d9c6088fe5d8b2ebe745f8681c55f7397c3c0a016192d681045f"}, - {file = "coverage-7.13.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02231499b08dabbe2b96612993e5fc34217cdae907a51b906ac7fca8027a4459"}, - {file = "coverage-7.13.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40aa8808140e55dc022b15d8aa7f651b6b3d68b365ea0398f1441e0b04d859c3"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5b856a8ccf749480024ff3bd7310adaef57bf31fd17e1bfc404b7940b6986634"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c048ea43875fbf8b45d476ad79f179809c590ec7b79e2035c662e7afa3192e3"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7b38448866e83176e28086674fe7368ab8590e4610fb662b44e345b86d63ffa"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:de6defc1c9badbf8b9e67ae90fd00519186d6ab64e5cc5f3d21359c2a9b2c1d3"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7eda778067ad7ffccd23ecffce537dface96212576a07924cbf0d8799d2ded5a"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e87f6c587c3f34356c3759f0420693e35e7eb0e2e41e4c011cb6ec6ecbbf1db7"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8248977c2e33aecb2ced42fef99f2d319e9904a36e55a8a68b69207fb7e43edc"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:25381386e80ae727608e662474db537d4df1ecd42379b5ba33c84633a2b36d47"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:ee756f00726693e5ba94d6df2bdfd64d4852d23b09bb0bc700e3b30e6f333985"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fdfc1e28e7c7cdce44985b3043bc13bbd9c747520f94a4d7164af8260b3d91f0"}, - {file = "coverage-7.13.4-cp312-cp312-win32.whl", hash = "sha256:01d4cbc3c283a17fc1e42d614a119f7f438eabb593391283adca8dc86eff1246"}, - {file = "coverage-7.13.4-cp312-cp312-win_amd64.whl", hash = "sha256:9401ebc7ef522f01d01d45532c68c5ac40fb27113019b6b7d8b208f6e9baa126"}, - {file = "coverage-7.13.4-cp312-cp312-win_arm64.whl", hash = "sha256:b1ec7b6b6e93255f952e27ab58fbc68dcc468844b16ecbee881aeb29b6ab4d8d"}, - {file = "coverage-7.13.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b66a2da594b6068b48b2692f043f35d4d3693fb639d5ea8b39533c2ad9ac3ab9"}, - {file = "coverage-7.13.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3599eb3992d814d23b35c536c28df1a882caa950f8f507cef23d1cbf334995ac"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:93550784d9281e374fb5a12bf1324cc8a963fd63b2d2f223503ef0fd4aa339ea"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b720ce6a88a2755f7c697c23268ddc47a571b88052e6b155224347389fdf6a3b"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b322db1284a2ed3aa28ffd8ebe3db91c929b7a333c0820abec3d838ef5b3525"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4594c67d8a7c89cf922d9df0438c7c7bb022ad506eddb0fdb2863359ff78242"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:53d133df809c743eb8bce33b24bcababb371f4441340578cd406e084d94a6148"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76451d1978b95ba6507a039090ba076105c87cc76fc3efd5d35d72093964d49a"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7f57b33491e281e962021de110b451ab8a24182589be17e12a22c79047935e23"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:1731dc33dc276dafc410a885cbf5992f1ff171393e48a21453b78727d090de80"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:bd60d4fe2f6fa7dff9223ca1bbc9f05d2b6697bc5961072e5d3b952d46e1b1ea"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9181a3ccead280b828fae232df12b16652702b49d41e99d657f46cc7b1f6ec7a"}, - {file = "coverage-7.13.4-cp313-cp313-win32.whl", hash = "sha256:f53d492307962561ac7de4cd1de3e363589b000ab69617c6156a16ba7237998d"}, - {file = "coverage-7.13.4-cp313-cp313-win_amd64.whl", hash = "sha256:e6f70dec1cc557e52df5306d051ef56003f74d56e9c4dd7ddb07e07ef32a84dd"}, - {file = "coverage-7.13.4-cp313-cp313-win_arm64.whl", hash = "sha256:fb07dc5da7e849e2ad31a5d74e9bece81f30ecf5a42909d0a695f8bd1874d6af"}, - {file = "coverage-7.13.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40d74da8e6c4b9ac18b15331c4b5ebc35a17069410cad462ad4f40dcd2d50c0d"}, - {file = "coverage-7.13.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4223b4230a376138939a9173f1bdd6521994f2aff8047fae100d6d94d50c5a12"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1d4be36a5114c499f9f1f9195e95ebf979460dbe2d88e6816ea202010ba1c34b"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:200dea7d1e8095cc6e98cdabe3fd1d21ab17d3cee6dab00cadbb2fe35d9c15b9"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8eb931ee8e6d8243e253e5ed7336deea6904369d2fd8ae6e43f68abbf167092"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:75eab1ebe4f2f64d9509b984f9314d4aa788540368218b858dad56dc8f3e5eb9"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c35eb28c1d085eb7d8c9b3296567a1bebe03ce72962e932431b9a61f28facf26"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb88b316ec33760714a4720feb2816a3a59180fd58c1985012054fa7aebee4c2"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7d41eead3cc673cbd38a4417deb7fd0b4ca26954ff7dc6078e33f6ff97bed940"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:fb26a934946a6afe0e326aebe0730cdff393a8bc0bbb65a2f41e30feddca399c"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:dae88bc0fc77edaa65c14be099bd57ee140cf507e6bfdeea7938457ab387efb0"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:845f352911777a8e722bfce168958214951e07e47e5d5d9744109fa5fe77f79b"}, - {file = "coverage-7.13.4-cp313-cp313t-win32.whl", hash = "sha256:2fa8d5f8de70688a28240de9e139fa16b153cc3cbb01c5f16d88d6505ebdadf9"}, - {file = "coverage-7.13.4-cp313-cp313t-win_amd64.whl", hash = "sha256:9351229c8c8407645840edcc277f4a2d44814d1bc34a2128c11c2a031d45a5dd"}, - {file = "coverage-7.13.4-cp313-cp313t-win_arm64.whl", hash = "sha256:30b8d0512f2dc8c8747557e8fb459d6176a2c9e5731e2b74d311c03b78451997"}, - {file = "coverage-7.13.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:300deaee342f90696ed186e3a00c71b5b3d27bffe9e827677954f4ee56969601"}, - {file = "coverage-7.13.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29e3220258d682b6226a9b0925bc563ed9a1ebcff3cad30f043eceea7eaf2689"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:391ee8f19bef69210978363ca930f7328081c6a0152f1166c91f0b5fdd2a773c"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0dd7ab8278f0d58a0128ba2fca25824321f05d059c1441800e934ff2efa52129"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78cdf0d578b15148b009ccf18c686aa4f719d887e76e6b40c38ffb61d264a552"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:48685fee12c2eb3b27c62f2658e7ea21e9c3239cba5a8a242801a0a3f6a8c62a"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4e83efc079eb39480e6346a15a1bcb3e9b04759c5202d157e1dd4303cd619356"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecae9737b72408d6a950f7e525f30aca12d4bd8dd95e37342e5beb3a2a8c4f71"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ae4578f8528569d3cf303fef2ea569c7f4c4059a38c8667ccef15c6e1f118aa5"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6fdef321fdfbb30a197efa02d48fcd9981f0d8ad2ae8903ac318adc653f5df98"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b0f6ccf3dbe577170bebfce1318707d0e8c3650003cb4b3a9dd744575daa8b5"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75fcd519f2a5765db3f0e391eb3b7d150cce1a771bf4c9f861aeab86c767a3c0"}, - {file = "coverage-7.13.4-cp314-cp314-win32.whl", hash = "sha256:8e798c266c378da2bd819b0677df41ab46d78065fb2a399558f3f6cae78b2fbb"}, - {file = "coverage-7.13.4-cp314-cp314-win_amd64.whl", hash = "sha256:245e37f664d89861cf2329c9afa2c1fe9e6d4e1a09d872c947e70718aeeac505"}, - {file = "coverage-7.13.4-cp314-cp314-win_arm64.whl", hash = "sha256:ad27098a189e5838900ce4c2a99f2fe42a0bf0c2093c17c69b45a71579e8d4a2"}, - {file = "coverage-7.13.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:85480adfb35ffc32d40918aad81b89c69c9cc5661a9b8a81476d3e645321a056"}, - {file = "coverage-7.13.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:79be69cf7f3bf9b0deeeb062eab7ac7f36cd4cc4c4dd694bd28921ba4d8596cc"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:caa421e2684e382c5d8973ac55e4f36bed6821a9bad5c953494de960c74595c9"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14375934243ee05f56c45393fe2ce81fe5cc503c07cee2bdf1725fb8bef3ffaf"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25a41c3104d08edb094d9db0d905ca54d0cd41c928bb6be3c4c799a54753af55"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f01afcff62bf9a08fb32b2c1d6e924236c0383c02c790732b6537269e466a72"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eb9078108fbf0bcdde37c3f4779303673c2fa1fe8f7956e68d447d0dd426d38a"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e086334e8537ddd17e5f16a344777c1ab8194986ec533711cbe6c41cde841b6"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:725d985c5ab621268b2edb8e50dfe57633dc69bda071abc470fed55a14935fd3"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3c06f0f1337c667b971ca2f975523347e63ec5e500b9aa5882d91931cd3ef750"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:590c0ed4bf8e85f745e6b805b2e1c457b2e33d5255dd9729743165253bc9ad39"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:eb30bf180de3f632cd043322dad5751390e5385108b2807368997d1a92a509d0"}, - {file = "coverage-7.13.4-cp314-cp314t-win32.whl", hash = "sha256:c4240e7eded42d131a2d2c4dec70374b781b043ddc79a9de4d55ca71f8e98aea"}, - {file = "coverage-7.13.4-cp314-cp314t-win_amd64.whl", hash = "sha256:4c7d3cc01e7350f2f0f6f7036caaf5673fb56b6998889ccfe9e1c1fe75a9c932"}, - {file = "coverage-7.13.4-cp314-cp314t-win_arm64.whl", hash = "sha256:23e3f687cf945070d1c90f85db66d11e3025665d8dafa831301a0e0038f3db9b"}, - {file = "coverage-7.13.4-py3-none-any.whl", hash = "sha256:1af1641e57cf7ba1bd67d677c9abdbcd6cc2ab7da3bca7fa1e2b7e50e65f2ad0"}, - {file = "coverage-7.13.4.tar.gz", hash = "sha256:e5c8f6ed1e61a8b2dcdf31eb0b9bbf0130750ca79c1c49eb898e2ad86f5ccc91"}, + {file = "coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5"}, + {file = "coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0"}, + {file = "coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58"}, + {file = "coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e"}, + {file = "coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d"}, + {file = "coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8"}, + {file = "coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf"}, + {file = "coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9"}, + {file = "coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028"}, + {file = "coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01"}, + {file = "coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c"}, + {file = "coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf"}, + {file = "coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810"}, + {file = "coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de"}, + {file = "coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1"}, + {file = "coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17"}, + {file = "coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85"}, + {file = "coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b"}, + {file = "coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664"}, + {file = "coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d"}, + {file = "coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2"}, + {file = "coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a"}, + {file = "coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819"}, + {file = "coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911"}, + {file = "coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f"}, + {file = "coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0"}, + {file = "coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc"}, + {file = "coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633"}, + {file = "coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8"}, + {file = "coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b"}, + {file = "coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a"}, + {file = "coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215"}, + {file = "coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43"}, + {file = "coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45"}, + {file = "coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61"}, + {file = "coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179"}, ] [package.extras] @@ -1188,62 +1279,62 @@ toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "cryptography" -version = "46.0.5" +version = "46.0.7" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = true python-versions = "!=3.9.0,!=3.9.1,>=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731"}, - {file = "cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82"}, - {file = "cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1"}, - {file = "cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48"}, - {file = "cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4"}, - {file = "cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663"}, - {file = "cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826"}, - {file = "cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d"}, - {file = "cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a"}, - {file = "cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4"}, - {file = "cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c"}, - {file = "cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4"}, - {file = "cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9"}, - {file = "cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72"}, - {file = "cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7"}, - {file = "cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d"}, + {file = "cryptography-46.0.7-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:ea42cbe97209df307fdc3b155f1b6fa2577c0defa8f1f7d3be7d31d189108ad4"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b36a4695e29fe69215d75960b22577197aca3f7a25b9cf9d165dcfe9d80bc325"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ad9ef796328c5e3c4ceed237a183f5d41d21150f972455a9d926593a1dcb308"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:73510b83623e080a2c35c62c15298096e2a5dc8d51c3b4e1740211839d0dea77"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cbd5fb06b62bd0721e1170273d3f4d5a277044c47ca27ee257025146c34cbdd1"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:420b1e4109cc95f0e5700eed79908cef9268265c773d3a66f7af1eef53d409ef"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:24402210aa54baae71d99441d15bb5a1919c195398a87b563df84468160a65de"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:8a469028a86f12eb7d2fe97162d0634026d92a21f3ae0ac87ed1c4a447886c83"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9694078c5d44c157ef3162e3bf3946510b857df5a3955458381d1c7cfc143ddb"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:42a1e5f98abb6391717978baf9f90dc28a743b7d9be7f0751a6f56a75d14065b"}, + {file = "cryptography-46.0.7-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91bbcb08347344f810cbe49065914fe048949648f6bd5c2519f34619142bbe85"}, + {file = "cryptography-46.0.7-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5d1c02a14ceb9148cc7816249f64f623fbfee39e8c03b3650d842ad3f34d637e"}, + {file = "cryptography-46.0.7-cp311-abi3-win32.whl", hash = "sha256:d23c8ca48e44ee015cd0a54aeccdf9f09004eba9fc96f38c911011d9ff1bd457"}, + {file = "cryptography-46.0.7-cp311-abi3-win_amd64.whl", hash = "sha256:397655da831414d165029da9bc483bed2fe0e75dde6a1523ec2fe63f3c46046b"}, + {file = "cryptography-46.0.7-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:d151173275e1728cf7839aaa80c34fe550c04ddb27b34f48c232193df8db5842"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:db0f493b9181c7820c8134437eb8b0b4792085d37dbb24da050476ccb664e59c"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ebd6daf519b9f189f85c479427bbd6e9c9037862cf8fe89ee35503bd209ed902"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:b7b412817be92117ec5ed95f880defe9cf18a832e8cafacf0a22337dc1981b4d"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:fbfd0e5f273877695cb93baf14b185f4878128b250cc9f8e617ea0c025dfb022"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ffca7aa1d00cf7d6469b988c581598f2259e46215e0140af408966a24cf086ce"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:60627cf07e0d9274338521205899337c5d18249db56865f943cbe753aa96f40f"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:80406c3065e2c55d7f49a9550fe0c49b3f12e5bfff5dedb727e319e1afb9bf99"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:c5b1ccd1239f48b7151a65bc6dd54bcfcc15e028c8ac126d3fada09db0e07ef1"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d5f7520159cd9c2154eb61eb67548ca05c5774d39e9c2c4339fd793fe7d097b2"}, + {file = "cryptography-46.0.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fcd8eac50d9138c1d7fc53a653ba60a2bee81a505f9f8850b6b2888555a45d0e"}, + {file = "cryptography-46.0.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:65814c60f8cc400c63131584e3e1fad01235edba2614b61fbfbfa954082db0ee"}, + {file = "cryptography-46.0.7-cp314-cp314t-win32.whl", hash = "sha256:fdd1736fed309b4300346f88f74cd120c27c56852c3838cab416e7a166f67298"}, + {file = "cryptography-46.0.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e06acf3c99be55aa3b516397fe42f5855597f430add9c17fa46bf2e0fb34c9bb"}, + {file = "cryptography-46.0.7-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:462ad5cb1c148a22b2e3bcc5ad52504dff325d17daf5df8d88c17dda1f75f2a4"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:84d4cced91f0f159a7ddacad249cc077e63195c36aac40b4150e7a57e84fffe7"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:128c5edfe5e5938b86b03941e94fac9ee793a94452ad1365c9fc3f4f62216832"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5e51be372b26ef4ba3de3c167cd3d1022934bc838ae9eaad7e644986d2a3d163"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cdf1a610ef82abb396451862739e3fc93b071c844399e15b90726ef7470eeaf2"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1d25aee46d0c6f1a501adcddb2d2fee4b979381346a78558ed13e50aa8a59067"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:cdfbe22376065ffcf8be74dc9a909f032df19bc58a699456a21712d6e5eabfd0"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:abad9dac36cbf55de6eb49badd4016806b3165d396f64925bf2999bcb67837ba"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:935ce7e3cfdb53e3536119a542b839bb94ec1ad081013e9ab9b7cfd478b05006"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:35719dc79d4730d30f1c2b6474bd6acda36ae2dfae1e3c16f2051f215df33ce0"}, + {file = "cryptography-46.0.7-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:7bbc6ccf49d05ac8f7d7b5e2e2c33830d4fe2061def88210a126d130d7f71a85"}, + {file = "cryptography-46.0.7-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a1529d614f44b863a7b480c6d000fe93b59acee9c82ffa027cfadc77521a9f5e"}, + {file = "cryptography-46.0.7-cp38-abi3-win32.whl", hash = "sha256:f247c8c1a1fb45e12586afbb436ef21ff1e80670b2861a90353d9b025583d246"}, + {file = "cryptography-46.0.7-cp38-abi3-win_amd64.whl", hash = "sha256:506c4ff91eff4f82bdac7633318a526b1d1309fc07ca76a3ad182cb5b686d6d3"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:fc9ab8856ae6cf7c9358430e49b368f3108f050031442eaeb6b9d87e4dcf4e4f"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d3b99c535a9de0adced13d159c5a9cf65c325601aa30f4be08afd680643e9c15"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d02c738dacda7dc2a74d1b2b3177042009d5cab7c7079db74afc19e56ca1b455"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:04959522f938493042d595a736e7dbdff6eb6cc2339c11465b3ff89343b65f65"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:3986ac1dee6def53797289999eabe84798ad7817f3e97779b5061a95b0ee4968"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:258514877e15963bd43b558917bc9f54cf7cf866c38aa576ebf47a77ddbc43a4"}, + {file = "cryptography-46.0.7.tar.gz", hash = "sha256:e4cfd68c5f3e0bfdad0d38e023239b96a2fe84146481852dffbcca442c245aa5"}, ] [package.dependencies] @@ -1256,7 +1347,7 @@ nox = ["nox[uv] (>=2024.4.15)"] pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.14)", "ruff (>=0.11.11)"] sdist = ["build (>=1.0.0)"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi (>=2024)", "cryptography-vectors (==46.0.5)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] +test = ["certifi (>=2024)", "cryptography-vectors (==46.0.7)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] test-randomorder = ["pytest-randomly"] [[package]] @@ -1307,15 +1398,15 @@ wmi = ["wmi (>=1.5.1) ; platform_system == \"Windows\""] [[package]] name = "ecdsa" -version = "0.19.1" +version = "0.19.2" description = "ECDSA cryptographic signature library (pure python)" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.6" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "ecdsa-0.19.1-py2.py3-none-any.whl", hash = "sha256:30638e27cf77b7e15c4c4cc1973720149e1033827cfd00661ca5c8cc0cdb24c3"}, - {file = "ecdsa-0.19.1.tar.gz", hash = "sha256:478cba7b62555866fcb3bb3fe985e06decbdb68ef55713c4e5ab98c57d508e61"}, + {file = "ecdsa-0.19.2-py2.py3-none-any.whl", hash = "sha256:840f5dc5e375c68f36c1a7a5b9caad28f95daa65185c9253c0c08dd952bb7399"}, + {file = "ecdsa-0.19.2.tar.gz", hash = "sha256:62635b0ac1ca2e027f82122b5b81cb706edc38cd91c63dda28e4f3455a2bf930"}, ] [package.dependencies] @@ -1348,6 +1439,7 @@ description = "\"Python interface to NCBI's eutilities API\"" optional = false python-versions = ">=3.6" groups = ["main"] +markers = "python_version == \"3.11\"" files = [ {file = "eutils-0.6.0-py2.py3-none-any.whl", hash = "sha256:4938c4baff6ca52141204ff3eff3a91ec1e83e52a6c5d92e7163585117b96566"}, {file = "eutils-0.6.0.tar.gz", hash = "sha256:3515178c0aadb836206a3eee2bc9f340f3213c13b53632e058eb58a9219d03cf"}, @@ -1361,6 +1453,23 @@ requests = "*" [package.extras] dev = ["flake8", "ipython", "mock", "pytest", "pytest-cov", "restview", "setuptools", "sphinx", "sphinx-rtd-theme", "tox", "vcrpy", "yapf"] +[[package]] +name = "eutils" +version = "0.6.1" +description = "Python interface to NCBI's eutilities API" +optional = false +python-versions = ">=3.12" +groups = ["main"] +markers = "python_version >= \"3.12\"" +files = [ + {file = "eutils-0.6.1-py3-none-any.whl", hash = "sha256:6916efd10f397f20ba0e6bd5b84d4e868e077161509e240d7c4ab1d98fb2d3b1"}, + {file = "eutils-0.6.1.tar.gz", hash = "sha256:68d4e007996d4b08171a936413f6ec2cd4c045ac83acf7df9e9b7110df06c030"}, +] + +[package.dependencies] +lxml = "*" +requests = "*" + [[package]] name = "executing" version = "2.2.1" @@ -1426,14 +1535,14 @@ standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[stand [[package]] name = "filelock" -version = "3.21.2" +version = "3.29.0" description = "A platform independent file lock." optional = false python-versions = ">=3.10" -groups = ["dev"] +groups = ["main", "dev"] files = [ - {file = "filelock-3.21.2-py3-none-any.whl", hash = "sha256:d6cd4dbef3e1bb63bc16500fc5aa100f16e405bbff3fb4231711851be50c1560"}, - {file = "filelock-3.21.2.tar.gz", hash = "sha256:cfd218cfccf8b947fce7837da312ec3359d10ef2a47c8602edd59e0bacffb708"}, + {file = "filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258"}, + {file = "filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90"}, ] [[package]] @@ -1469,15 +1578,15 @@ dev = ["black", "flake8", "flake8-pyproject", "mypy", "pre-commit", "pytest"] [[package]] name = "fsspec" -version = "2026.2.0" +version = "2026.4.0" description = "File-system specification" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437"}, - {file = "fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff"}, + {file = "fsspec-2026.4.0-py3-none-any.whl", hash = "sha256:11ef7bb35dab8a394fde6e608221d5cf3e8499401c249bebaeaad760a1a8dec2"}, + {file = "fsspec-2026.4.0.tar.gz", hash = "sha256:301d8ac70ae90ef3ad05dcf94d6c3754a097f9b5fe4667d2787aa359ec7df7e4"}, ] [package.extras] @@ -1577,66 +1686,72 @@ notebooks = ["jupyter", "pyyaml"] [[package]] name = "greenlet" -version = "3.3.1" +version = "3.5.0" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.10" groups = ["main", "dev"] markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\"" files = [ - {file = "greenlet-3.3.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:04bee4775f40ecefcdaa9d115ab44736cd4b9c5fba733575bfe9379419582e13"}, - {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50e1457f4fed12a50e427988a07f0f9df53cf0ee8da23fab16e6732c2ec909d4"}, - {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:070472cd156f0656f86f92e954591644e158fd65aa415ffbe2d44ca77656a8f5"}, - {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1108b61b06b5224656121c3c8ee8876161c491cbe74e5c519e0634c837cf93d5"}, - {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3a300354f27dd86bae5fbf7002e6dd2b3255cd372e9242c933faf5e859b703fe"}, - {file = "greenlet-3.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e84b51cbebf9ae573b5fbd15df88887815e3253fc000a7d0ff95170e8f7e9729"}, - {file = "greenlet-3.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0093bd1a06d899892427217f0ff2a3c8f306182b8c754336d32e2d587c131b4"}, - {file = "greenlet-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:7932f5f57609b6a3b82cc11877709aa7a98e3308983ed93552a1c377069b20c8"}, - {file = "greenlet-3.3.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5fd23b9bc6d37b563211c6abbb1b3cab27db385a4449af5c32e932f93017080c"}, - {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f51496a0bfbaa9d74d36a52d2580d1ef5ed4fdfcff0a73730abfbbbe1403dd"}, - {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb0feb07fe6e6a74615ee62a880007d976cf739b6669cce95daa7373d4fc69c5"}, - {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:67ea3fc73c8cd92f42467a72b75e8f05ed51a0e9b1d15398c913416f2dafd49f"}, - {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:39eda9ba259cc9801da05351eaa8576e9aa83eb9411e8f0c299e05d712a210f2"}, - {file = "greenlet-3.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e2e7e882f83149f0a71ac822ebf156d902e7a5d22c9045e3e0d1daf59cee2cc9"}, - {file = "greenlet-3.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80aa4d79eb5564f2e0a6144fcc744b5a37c56c4a92d60920720e99210d88db0f"}, - {file = "greenlet-3.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:32e4ca9777c5addcbf42ff3915d99030d8e00173a56f80001fb3875998fe410b"}, - {file = "greenlet-3.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:da19609432f353fed186cc1b85e9440db93d489f198b4bdf42ae19cc9d9ac9b4"}, - {file = "greenlet-3.3.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7e806ca53acf6d15a888405880766ec84721aa4181261cd11a457dfe9a7a4975"}, - {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d842c94b9155f1c9b3058036c24ffb8ff78b428414a19792b2380be9cecf4f36"}, - {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20fedaadd422fa02695f82093f9a98bad3dab5fcda793c658b945fcde2ab27ba"}, - {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c620051669fd04ac6b60ebc70478210119c56e2d5d5df848baec4312e260e4ca"}, - {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14194f5f4305800ff329cbf02c5fcc88f01886cadd29941b807668a45f0d2336"}, - {file = "greenlet-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7b2fe4150a0cf59f847a67db8c155ac36aed89080a6a639e9f16df5d6c6096f1"}, - {file = "greenlet-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49f4ad195d45f4a66a0eb9c1ba4832bb380570d361912fa3554746830d332149"}, - {file = "greenlet-3.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:cc98b9c4e4870fa983436afa999d4eb16b12872fab7071423d5262fa7120d57a"}, - {file = "greenlet-3.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:bfb2d1763d777de5ee495c85309460f6fd8146e50ec9d0ae0183dbf6f0a829d1"}, - {file = "greenlet-3.3.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:7ab327905cabb0622adca5971e488064e35115430cec2c35a50fd36e72a315b3"}, - {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:65be2f026ca6a176f88fb935ee23c18333ccea97048076aef4db1ef5bc0713ac"}, - {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7a3ae05b3d225b4155bda56b072ceb09d05e974bc74be6c3fc15463cf69f33fd"}, - {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:12184c61e5d64268a160226fb4818af4df02cfead8379d7f8b99a56c3a54ff3e"}, - {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6423481193bbbe871313de5fd06a082f2649e7ce6e08015d2a76c1e9186ca5b3"}, - {file = "greenlet-3.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:33a956fe78bbbda82bfc95e128d61129b32d66bcf0a20a1f0c08aa4839ffa951"}, - {file = "greenlet-3.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b065d3284be43728dd280f6f9a13990b56470b81be20375a207cdc814a983f2"}, - {file = "greenlet-3.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:27289986f4e5b0edec7b5a91063c109f0276abb09a7e9bdab08437525977c946"}, - {file = "greenlet-3.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:2f080e028001c5273e0b42690eaf359aeef9cb1389da0f171ea51a5dc3c7608d"}, - {file = "greenlet-3.3.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:bd59acd8529b372775cd0fcbc5f420ae20681c5b045ce25bd453ed8455ab99b5"}, - {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b31c05dd84ef6871dd47120386aed35323c944d86c3d91a17c4b8d23df62f15b"}, - {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:02925a0bfffc41e542c70aa14c7eda3593e4d7e274bfcccca1827e6c0875902e"}, - {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3e0f3878ca3a3ff63ab4ea478585942b53df66ddde327b59ecb191b19dbbd62d"}, - {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34a729e2e4e4ffe9ae2408d5ecaf12f944853f40ad724929b7585bca808a9d6f"}, - {file = "greenlet-3.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aec9ab04e82918e623415947921dea15851b152b822661cce3f8e4393c3df683"}, - {file = "greenlet-3.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:71c767cf281a80d02b6c1bdc41c9468e1f5a494fb11bc8688c360524e273d7b1"}, - {file = "greenlet-3.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:96aff77af063b607f2489473484e39a0bbae730f2ea90c9e5606c9b73c44174a"}, - {file = "greenlet-3.3.1-cp314-cp314-win_arm64.whl", hash = "sha256:b066e8b50e28b503f604fa538adc764a638b38cf8e81e025011d26e8a627fa79"}, - {file = "greenlet-3.3.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:3e63252943c921b90abb035ebe9de832c436401d9c45f262d80e2d06cc659242"}, - {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76e39058e68eb125de10c92524573924e827927df5d3891fbc97bd55764a8774"}, - {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9f9d5e7a9310b7a2f416dd13d2e3fd8b42d803968ea580b7c0f322ccb389b97"}, - {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b9721549a95db96689458a1e0ae32412ca18776ed004463df3a9299c1b257ab"}, - {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92497c78adf3ac703b57f1e3813c2d874f27f71a178f9ea5887855da413cd6d2"}, - {file = "greenlet-3.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ed6b402bc74d6557a705e197d47f9063733091ed6357b3de33619d8a8d93ac53"}, - {file = "greenlet-3.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:59913f1e5ada20fde795ba906916aea25d442abcc0593fba7e26c92b7ad76249"}, - {file = "greenlet-3.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:301860987846c24cb8964bdec0e31a96ad4a2a801b41b4ef40963c1b44f33451"}, - {file = "greenlet-3.3.1.tar.gz", hash = "sha256:41848f3230b58c08bb43dee542e74a2a2e34d3c59dc3076cec9151aeeedcae98"}, + {file = "greenlet-3.5.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:29ea813b2e1f45fa9649a17853b2b5465c4072fbcb072e5af6cd3a288216574a"}, + {file = "greenlet-3.5.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:804a70b328e706b785c6ef16187051c394a63dd1a906d89be24b6ad77759f13f"}, + {file = "greenlet-3.5.0-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:884f649de075b84739713d41dd4dfd41e2b910bfb769c4a3ea02ec1da52cd9bb"}, + {file = "greenlet-3.5.0-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4d0eadc7e4d9ffb2af4247b606cae307be8e448911e5a0d0b16d72fc3d224cfd"}, + {file = "greenlet-3.5.0-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4b28037cb07768933c54d81bfe47a85f9f402f57d7d69743b991a713b63954eb"}, + {file = "greenlet-3.5.0-cp310-cp310-manylinux_2_39_riscv64.whl", hash = "sha256:f8c30c2225f40dd76c50790f0eb3b5c7c18431efb299e2782083e1981feed243"}, + {file = "greenlet-3.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cda05425526240807408156b6960a17a79a0c760b813573b67027823be760977"}, + {file = "greenlet-3.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9c615f869163e14bb1ced20322d8038fb680b08236521ac3f30cd4c1288785a0"}, + {file = "greenlet-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:ba8f0bdc2fae6ce915dfd0c16d2d00bca7e4247c1eae4416e06430e522137858"}, + {file = "greenlet-3.5.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:8f1cc966c126639cd152fdaa52624d2655f492faa79e013fea161de3e6dda082"}, + {file = "greenlet-3.5.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:362624e6a8e5bca3b8233e45eef33903a100e9539a2b995c364d595dbc4018b3"}, + {file = "greenlet-3.5.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5ecd83806b0f4c2f53b1018e0005cd82269ea01d42befc0368730028d850ed1c"}, + {file = "greenlet-3.5.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fa94cb2288681e3a11645958f1871d48ee9211bd2f66628fdace505927d6e564"}, + {file = "greenlet-3.5.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ff251e9a0279522e62f6176412869395a64ddf2b5c5f782ff609a8216a4e662"}, + {file = "greenlet-3.5.0-cp311-cp311-manylinux_2_39_riscv64.whl", hash = "sha256:64d6ac45f7271f48e45f67c95b54ef73534c52ec041fcda8edf520c6d811f4bc"}, + {file = "greenlet-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6d874e79afd41a96e11ff4c5d0bc90a80973e476fda1c2c64985667397df432b"}, + {file = "greenlet-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0ed006e4b86c59de7467eb2601cd1b77b5a7d657d1ee55e30fe30d76451edba4"}, + {file = "greenlet-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:703cb211b820dbffbbc55a16bfc6e4583a6e6e990f33a119d2cc8b83211119c8"}, + {file = "greenlet-3.5.0-cp311-cp311-win_arm64.whl", hash = "sha256:6c18dfb59c70f5a94acd271c72e90128c3c776e41e5f07767908c8c1b74ad339"}, + {file = "greenlet-3.5.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:db2910d3c809444e0a20147361f343fe2798e106af8d9d8506f5305302655a9f"}, + {file = "greenlet-3.5.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ec9ea74e7268ace7f9aab1b1a4e730193fc661b39a993cd91c606c32d4a3628"}, + {file = "greenlet-3.5.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54d243512da35485fc7a6bf3c178fdda6327a9d6506fcdd62b1abd1e41b2927b"}, + {file = "greenlet-3.5.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:41353ec2ecedf7aa8f682753a41919f8718031a6edac46b8d3dc7ed9e1ceb136"}, + {file = "greenlet-3.5.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d280a7f5c331622c69f97eb167f33577ff2d1df282c41cd15907fc0a3ca198c"}, + {file = "greenlet-3.5.0-cp312-cp312-manylinux_2_39_riscv64.whl", hash = "sha256:58c1c374fe2b3d852f9b6b11a7dff4c85404e51b9a596fd9e89cf904eb09866d"}, + {file = "greenlet-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1eb67d5adefb5bd2e182d42678a328979a209e4e82eb93575708185d31d1f588"}, + {file = "greenlet-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2628d6c86f6cb0cb45e0c3c54058bbec559f57eaae699447748cb3928150577e"}, + {file = "greenlet-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:d4d9f0624c775f2dfc56ba54d515a8c771044346852a918b405914f6b19d7fd8"}, + {file = "greenlet-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:83ed9f27f1680b50e89f40f6df348a290ea234b249a4003d366663a12eab94f2"}, + {file = "greenlet-3.5.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:5a5ed18de6a0f6cc7087f1563f6bd93fc7df1c19165ca01e9bde5a5dc281d106"}, + {file = "greenlet-3.5.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a717fbc46d8a354fa675f7c1e813485b6ba3885f9bef0cd56e5ba27d758ff5b"}, + {file = "greenlet-3.5.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ddc090c5c1792b10246a78e8c2163ebbe04cf877f9d785c230a7b27b39ad038e"}, + {file = "greenlet-3.5.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4964101b8585c144cbda5532b1aa644255126c08a265dae90c16e7a0e63aaa9d"}, + {file = "greenlet-3.5.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2094acd54b272cb6eae8c03dd87b3fa1820a4cef18d6889c378d503500a1dc13"}, + {file = "greenlet-3.5.0-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:7022615368890680e67b9965d33f5773aade330d5343bbe25560135aaa849eae"}, + {file = "greenlet-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5e05ba267789ea87b5a155cf0e810b1ab88bf18e9e8740813945ceb8ee4350ba"}, + {file = "greenlet-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0ecec963079cd58cbd14723582384f11f166fd58883c15dcbfb342e0bc9b5846"}, + {file = "greenlet-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:728d9667d8f2f586644b748dbd9bb67e50d6a9381767d1357714ea6825bb3bf5"}, + {file = "greenlet-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:47422135b1d308c14b2c6e758beedb1acd33bb91679f5670edf77bf46244722b"}, + {file = "greenlet-3.5.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:f35807464c4c58c55f0d31dfa83c541a5615d825c2fe3d2b95360cf7c4e3c0a8"}, + {file = "greenlet-3.5.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55fa7ea52771be44af0de27d8b80c02cd18c2c3cddde6c847ecebdf72418b6a1"}, + {file = "greenlet-3.5.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a97e4821aa710603f94de0da25f25096454d78ffdace5dc77f3a006bc01abba3"}, + {file = "greenlet-3.5.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bf2d8a80bec89ab46221ae45c5373d5ba0bd36c19aa8508e85c6cd7e5106cd37"}, + {file = "greenlet-3.5.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8f52a464e4ed91780bdfbbdd2b97197f3accaa629b98c200f4dffada759f3ae7"}, + {file = "greenlet-3.5.0-cp314-cp314-manylinux_2_39_riscv64.whl", hash = "sha256:1bae92a1dd94c5f9d9493c3a212dd874c202442047cf96446412c862feca83a2"}, + {file = "greenlet-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:762612baf1161ccb8437c0161c668a688223cba28e1bf038f4eb47b13e39ccdf"}, + {file = "greenlet-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:57a43c6079a89713522bc4bcb9f75070ecf5d3dbad7792bfe42239362cbf2a16"}, + {file = "greenlet-3.5.0-cp314-cp314-win_amd64.whl", hash = "sha256:3bc59be3945ae9750b9e7d45067d01ae3fe90ea5f9ade99239dabdd6e28a5033"}, + {file = "greenlet-3.5.0-cp314-cp314-win_arm64.whl", hash = "sha256:a96fcee45e03fe30a62669fd16ab5c9d3c172660d3085605cb1e2d1280d3c988"}, + {file = "greenlet-3.5.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:a10a732421ab4fec934783ce3e54763470d0181db6e3468f9103a275c3ed1853"}, + {file = "greenlet-3.5.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7fc391b1566f2907d17aaebe78f8855dc45675159a775fcf9e61f8ee0078e87f"}, + {file = "greenlet-3.5.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:680bd0e7ad5e8daa8a4aa89f68fd6adc834b8a8036dc256533f7e08f4a4b01f7"}, + {file = "greenlet-3.5.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1aa4ce8debcd4ea7fb2e150f3036588c41493d1d52c43538924ae1819003f4ce"}, + {file = "greenlet-3.5.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddb36c7d6c9c0a65f18c7258634e0c416c6ab59caac8c987b96f80c2ebda0112"}, + {file = "greenlet-3.5.0-cp314-cp314t-manylinux_2_39_riscv64.whl", hash = "sha256:728a73687e39ae9ca34e4694cbf2f049d3fbc7174639468d0f67200a97d8f9e2"}, + {file = "greenlet-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e5ddf316ced87539144621453c3aef229575825fe60c604e62bedc4003f372b2"}, + {file = "greenlet-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4a448128607be0de65342dc9b31be7f948ef4cc0bc8832069350abefd310a8f2"}, + {file = "greenlet-3.5.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d60097128cb0a1cab9ea541186ea13cd7b847b8449a7787c2e2350da0cb82d86"}, + {file = "greenlet-3.5.0.tar.gz", hash = "sha256:d419647372241bc68e957bf38d5c1f98852155e4146bd1e4121adea81f4f01e4"}, ] [package.extras] @@ -1657,15 +1772,15 @@ files = [ [[package]] name = "hgvs" -version = "1.5.6" +version = "1.5.7" description = "HGVS Parser, Formatter, Mapper, Validator" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "hgvs-1.5.6-py3-none-any.whl", hash = "sha256:7ca4f9fc7be3afca29f5caf1bc5256083fc581a59c6801b7e9654a15d8d2d376"}, - {file = "hgvs-1.5.6.tar.gz", hash = "sha256:663755fd5db38a897c447dd1ec0a2bfc8157a28ad30378a08489746e3aa61ff2"}, + {file = "hgvs-1.5.7-py3-none-any.whl", hash = "sha256:4ff13a4df730fc6ecc68579629a200a7c9f403e4e8f0636cb1c819fab8543144"}, + {file = "hgvs-1.5.7.tar.gz", hash = "sha256:5d76d9cde7bd029d770f3e06ddac5b6bba22da2d8698e39a5a403053d9451251"}, ] [package.dependencies] @@ -1683,118 +1798,118 @@ dev = ["black", "flake8", "ipython", "isort", "jupyter", "pre-commit (>=3.4,<4.0 [[package]] name = "hiredis" -version = "3.3.0" +version = "3.3.1" description = "Python wrapper for hiredis" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:9937d9b69321b393fbace69f55423480f098120bc55a3316e1ca3508c4dbbd6f"}, - {file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:50351b77f89ba6a22aff430b993653847f36b71d444509036baa0f2d79d1ebf4"}, - {file = "hiredis-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1d00bce25c813eec45a2f524249f58daf51d38c9d3347f6f643ae53826fc735a"}, - {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ef840d9f142556ed384180ed8cdf14ff875fcae55c980cbe5cec7adca2ef4d8"}, - {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:88bc79d7e9b94d17ed1bd8b7f2815ed0eada376ed5f48751044e5e4d179aa2f2"}, - {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7165c7363e59b258e1875c51f35c0b2b9901e6c691037b487d8a0ace2c137ed2"}, - {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8c3be446f0c38fbe6863a7cf4522c9a463df6e64bee87c4402e9f6d7d2e7f869"}, - {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:96f9a27643279853b91a1fb94a88b559e55fdecec86f1fcd5f2561492be52e47"}, - {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0a5eebb170de1b415c78ae5ca3aee17cff8b885df93c2055d54320e789d838f4"}, - {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:200678547ac3966bac3e38df188211fdc13d5f21509c23267e7def411710e112"}, - {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dd9d78c5363a858f9dc5e698e5e1e402b83c00226cba294f977a92c53092b549"}, - {file = "hiredis-3.3.0-cp310-cp310-win32.whl", hash = "sha256:a0d31ff178b913137a7a08c7377e93805914755a15c3585e203d0d74496456c0"}, - {file = "hiredis-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b41833c8f0d4c7fbfaa867c8ed9a4e4aaa71d7c54e4806ed62da2d5cd27b40d"}, - {file = "hiredis-3.3.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:63ee6c1ae6a2462a2439eb93c38ab0315cd5f4b6d769c6a34903058ba538b5d6"}, - {file = "hiredis-3.3.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:31eda3526e2065268a8f97fbe3d0e9a64ad26f1d89309e953c80885c511ea2ae"}, - {file = "hiredis-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a26bae1b61b7bcafe3d0d0c7d012fb66ab3c95f2121dbea336df67e344e39089"}, - {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9546079f7fd5c50fbff9c791710049b32eebe7f9b94debec1e8b9f4c048cba2"}, - {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ae327fc13b1157b694d53f92d50920c0051e30b0c245f980a7036e299d039ab4"}, - {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4016e50a8be5740a59c5af5252e5ad16c395021a999ad24c6604f0d9faf4d346"}, - {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c17b473f273465a3d2168a57a5b43846165105ac217d5652a005e14068589ddc"}, - {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9ecd9b09b11bd0b8af87d29c3f5da628d2bdc2a6c23d2dd264d2da082bd4bf32"}, - {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:00fb04eac208cd575d14f246e74a468561081ce235937ab17d77cde73aefc66c"}, - {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:60814a7d0b718adf3bfe2c32c6878b0e00d6ae290ad8e47f60d7bba3941234a6"}, - {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fcbd1a15e935aa323b5b2534b38419511b7909b4b8ee548e42b59090a1b37bb1"}, - {file = "hiredis-3.3.0-cp311-cp311-win32.whl", hash = "sha256:73679607c5a19f4bcfc9cf6eb54480bcd26617b68708ac8b1079da9721be5449"}, - {file = "hiredis-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:30a4df3d48f32538de50648d44146231dde5ad7f84f8f08818820f426840ae97"}, - {file = "hiredis-3.3.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:5b8e1d6a2277ec5b82af5dce11534d3ed5dffeb131fd9b210bc1940643b39b5f"}, - {file = "hiredis-3.3.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:c4981de4d335f996822419e8a8b3b87367fcef67dc5fb74d3bff4df9f6f17783"}, - {file = "hiredis-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1706480a683e328ae9ba5d704629dee2298e75016aa0207e7067b9c40cecc271"}, - {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a95cef9989736ac313639f8f545b76b60b797e44e65834aabbb54e4fad8d6c8"}, - {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca2802934557ccc28a954414c245ba7ad904718e9712cb67c05152cf6b9dd0a3"}, - {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fe730716775f61e76d75810a38ee4c349d3af3896450f1525f5a4034cf8f2ed7"}, - {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:749faa69b1ce1f741f5eaf743435ac261a9262e2d2d66089192477e7708a9abc"}, - {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:95c9427f2ac3f1dd016a3da4e1161fa9d82f221346c8f3fdd6f3f77d4e28946c"}, - {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c863ee44fe7bff25e41f3a5105c936a63938b76299b802d758f40994ab340071"}, - {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2213c7eb8ad5267434891f3241c7776e3bafd92b5933fc57d53d4456247dc542"}, - {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a172bae3e2837d74530cd60b06b141005075db1b814d966755977c69bd882ce8"}, - {file = "hiredis-3.3.0-cp312-cp312-win32.whl", hash = "sha256:cb91363b9fd6d41c80df9795e12fffbaf5c399819e6ae8120f414dedce6de068"}, - {file = "hiredis-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:04ec150e95eea3de9ff8bac754978aa17b8bf30a86d4ab2689862020945396b0"}, - {file = "hiredis-3.3.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:b7048b4ec0d5dddc8ddd03da603de0c4b43ef2540bf6e4c54f47d23e3480a4fa"}, - {file = "hiredis-3.3.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:e5f86ce5a779319c15567b79e0be806e8e92c18bb2ea9153e136312fafa4b7d6"}, - {file = "hiredis-3.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fbdb97a942e66016fff034df48a7a184e2b7dc69f14c4acd20772e156f20d04b"}, - {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0fb4bea72fe45ff13e93ddd1352b43ff0749f9866263b5cca759a4c960c776f"}, - {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:85b9baf98050e8f43c2826ab46aaf775090d608217baf7af7882596aef74e7f9"}, - {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:69079fb0f0ebb61ba63340b9c4bce9388ad016092ca157e5772eb2818209d930"}, - {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c17f77b79031ea4b0967d30255d2ae6e7df0603ee2426ad3274067f406938236"}, - {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45d14f745fc177bc05fc24bdf20e2b515e9a068d3d4cce90a0fb78d04c9c9d9a"}, - {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ba063fdf1eff6377a0c409609cbe890389aefddfec109c2d20fcc19cfdafe9da"}, - {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1799cc66353ad066bfdd410135c951959da9f16bcb757c845aab2f21fc4ef099"}, - {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2cbf71a121996ffac82436b6153290815b746afb010cac19b3290a1644381b07"}, - {file = "hiredis-3.3.0-cp313-cp313-win32.whl", hash = "sha256:a7cbbc6026bf03659f0b25e94bbf6e64f6c8c22f7b4bc52fe569d041de274194"}, - {file = "hiredis-3.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:a8def89dd19d4e2e4482b7412d453dec4a5898954d9a210d7d05f60576cedef6"}, - {file = "hiredis-3.3.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c135bda87211f7af9e2fd4e046ab433c576cd17b69e639a0f5bb2eed5e0e71a9"}, - {file = "hiredis-3.3.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2f855c678230aed6fc29b962ce1cc67e5858a785ef3a3fd6b15dece0487a2e60"}, - {file = "hiredis-3.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4059c78a930cbb33c391452ccce75b137d6f89e2eebf6273d75dafc5c2143c03"}, - {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:334a3f1d14c253bb092e187736c3384203bd486b244e726319bbb3f7dffa4a20"}, - {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd137b147235447b3d067ec952c5b9b95ca54b71837e1b38dbb2ec03b89f24fc"}, - {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8f88f4f2aceb73329ece86a1cb0794fdbc8e6d614cb5ca2d1023c9b7eb432db8"}, - {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:550f4d1538822fc75ebf8cf63adc396b23d4958bdbbad424521f2c0e3dfcb169"}, - {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:54b14211fbd5930fc696f6fcd1f1f364c660970d61af065a80e48a1fa5464dd6"}, - {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9e96f63dbc489fc86f69951e9f83dadb9582271f64f6822c47dcffa6fac7e4a"}, - {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:106e99885d46684d62ab3ec1d6b01573cc0e0083ac295b11aaa56870b536c7ec"}, - {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:087e2ef3206361281b1a658b5b4263572b6ba99465253e827796964208680459"}, - {file = "hiredis-3.3.0-cp314-cp314-win32.whl", hash = "sha256:80638ebeab1cefda9420e9fedc7920e1ec7b4f0513a6b23d58c9d13c882f8065"}, - {file = "hiredis-3.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a68aaf9ba024f4e28cf23df9196ff4e897bd7085872f3a30644dca07fa787816"}, - {file = "hiredis-3.3.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:f7f80442a32ce51ee5d89aeb5a84ee56189a0e0e875f1a57bbf8d462555ae48f"}, - {file = "hiredis-3.3.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:a1a67530da714954ed50579f4fe1ab0ddbac9c43643b1721c2cb226a50dde263"}, - {file = "hiredis-3.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:616868352e47ab355559adca30f4f3859f9db895b4e7bc71e2323409a2add751"}, - {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e799b79f3150083e9702fc37e6243c0bd47a443d6eae3f3077b0b3f510d6a145"}, - {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9ef1dfb0d2c92c3701655e2927e6bbe10c499aba632c7ea57b6392516df3864b"}, - {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c290da6bc2a57e854c7da9956cd65013483ede935677e84560da3b848f253596"}, - {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd8c438d9e1728f0085bf9b3c9484d19ec31f41002311464e75b69550c32ffa8"}, - {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1bbc6b8a88bbe331e3ebf6685452cebca6dfe6d38a6d4efc5651d7e363ba28bd"}, - {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:55d8c18fe9a05496c5c04e6eccc695169d89bf358dff964bcad95696958ec05f"}, - {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:4ddc79afa76b805d364e202a754666cb3c4d9c85153cbfed522871ff55827838"}, - {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e8a4b8540581dcd1b2b25827a54cfd538e0afeaa1a0e3ca87ad7126965981cc"}, - {file = "hiredis-3.3.0-cp314-cp314t-win32.whl", hash = "sha256:298593bb08487753b3afe6dc38bac2532e9bac8dcee8d992ef9977d539cc6776"}, - {file = "hiredis-3.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b442b6ab038a6f3b5109874d2514c4edf389d8d8b553f10f12654548808683bc"}, - {file = "hiredis-3.3.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:114c0b9f1b5fad99edae38e747018aead358a4f4e9720cc1876495d78cdb8276"}, - {file = "hiredis-3.3.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:c6d91a5e6904ed7eca21d74b041e03f2ad598dd08a6065b06a776974fe5d003c"}, - {file = "hiredis-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:76374faa075e996c895cbe106ba923852a9f8146f2aa59eba22111c5e5ec6316"}, - {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50a54397bd104c2e2f5b7696bbdab8ba2973d3075e4deb932adb025b8863de91"}, - {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:15edee02cc9cc06e07e2bcfae07e283e640cc1aeedd08b4c6934bf1a0113c607"}, - {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ff3179a57745d0f8d71fa8bf3ea3944d3f557dcfa4431304497987fecad381dd"}, - {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdb7cd9e1e73db78f145a09bb837732790d0912eb963dee5768631faf2ece162"}, - {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4d3b4e0d4445faf9041c52a98cb5d2b65c4fcaebb2aa02efa7c6517c4917f7e8"}, - {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ffea6c407cff532c7599d3ec9e8502c2c865753cebab044f3dfce9afbf71a8df"}, - {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:bcd745a28e1b3216e42680d91e142a42569dfad68a6f40535080c47b0356c796"}, - {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4c18a97ea55d1a58f5c3adfe236b3e7cccedc6735cbd36ab1c786c52fd823667"}, - {file = "hiredis-3.3.0-cp38-cp38-win32.whl", hash = "sha256:77eacd969e3c6ff50c2b078c27d2a773c652248a5d81af5765a8663478d0bc02"}, - {file = "hiredis-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:161a4a595a53475587aef8dc549d0527962879b0c5d62f7947b44ba7e5084b76"}, - {file = "hiredis-3.3.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:1203697a7ebadc7cf873acc189df9e44fcb377b636e6660471707ac8d5bcba68"}, - {file = "hiredis-3.3.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:9a7ea2344d277317160da4911f885bcf7dfd8381b830d76b442f7775b41544b3"}, - {file = "hiredis-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9bd7c9a089cf4e4f4b5a61f412c76293449bac6b0bf92bb49a3892850bd5c899"}, - {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:294de11e3995128c784534e327d1f9382b88dc5407356465df7934c710e8392d"}, - {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a3aab895358368f81f9546a7cd192b6fb427f785cb1a8853cf9db38df01e9ca"}, - {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:eaf8418e33e23d6d7ef0128eff4c06ab3040d40b9bbc8a24d6265d751a472596"}, - {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41aea51949142bad4e40badb0396392d7f4394791e4097a0951ab75bcc58ff84"}, - {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1f9a5f84a8bd29ac5b9953b27e8ba5508396afeabf1d165611a1e31fbd90a0e1"}, - {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:a5f9fde56550ebbe962f437a4c982b0856d03aea7fab09e30fa6c0f9be992b40"}, - {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c567aab02612d91f3e747fc492100ae894515194f85d6fb6bb68958c0e718721"}, - {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ca97c5e6f9e9b9f0aed61b70fed2d594ce2f7472905077d2d10b307c50a41008"}, - {file = "hiredis-3.3.0-cp39-cp39-win32.whl", hash = "sha256:776dc5769d5eb05e969216de095377ff61c802414a74bd3c24a4ca8526c897ab"}, - {file = "hiredis-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:538a9f5fbb3a8a4ef0c3abd309cccb90cd2ba9976fcc2b44193af9507d005b48"}, - {file = "hiredis-3.3.0.tar.gz", hash = "sha256:105596aad9249634361815c574351f1bd50455dc23b537c2940066c4a9dea685"}, + {file = "hiredis-3.3.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:f525734382a47f9828c9d6a1501522c78d5935466d8e2be1a41ba40ca5bb922b"}, + {file = "hiredis-3.3.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:6e2e1024f0a021777740cb7c633a0efb2c4a4bc570f508223a8dcbcf79f99ef9"}, + {file = "hiredis-3.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1d68c6980d4690a4550bd3db6c03146f7be68ef5d08d38bb1fb68b3e9c32fe3"}, + {file = "hiredis-3.3.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0caf3fc8af0767794b335753781c3fa35f2a3e975c098edbc8f733d35d6a95e4"}, + {file = "hiredis-3.3.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81a1669b6631976b1dc9d3d58ed1ab3333e9f52feb91a2a1fb8241101ac3b665"}, + {file = "hiredis-3.3.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c8139e9011117822391c5bcfd674c5948fb1e4b8cb9adf6f13d9890859ee3a1a"}, + {file = "hiredis-3.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:042e57de8a2cae91e3e7c0af32960ea2c5107b2f27f68a740295861e68780a8a"}, + {file = "hiredis-3.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:65f6ac06a9f0c32c254660ec6a9329d81d589e8f5d0a9837a941d5424a6be1ef"}, + {file = "hiredis-3.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:002fc0201b9af1cc8960e27cdc501ad1f8cdd6dbadb2091c6ddbd4e5ace6cb77"}, + {file = "hiredis-3.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9ebae74ce2b977c2fcb22d6a10aa0acb730022406977b2bcb6ddd6788f5c414a"}, + {file = "hiredis-3.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8a52b24cd710690c4a7e191c7e300136ad2ecb3c68ffe7e95b598e76de166e5e"}, + {file = "hiredis-3.3.1-cp310-cp310-win32.whl", hash = "sha256:1ebc307a87b099d0877dbd2bdc0bae427258e7ec67f60a951e89027f8dc2568f"}, + {file = "hiredis-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:62cc62284541bb2a86c898c7d5e8388661cade91c184cb862095ed547e80588f"}, + {file = "hiredis-3.3.1-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:26f899cde0279e4b7d370716ff80320601c2bd93cdf3e774a42bdd44f65b41f8"}, + {file = "hiredis-3.3.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:a2f049c3f3c83e886cd1f53958e2a1ebb369be626bef9e50d8b24d79864f1df6"}, + {file = "hiredis-3.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5f316cf2d0558f5027aab19dde7d7e4901c26c21fa95367bc37784e8f547bbf2"}, + {file = "hiredis-3.3.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03baa381964b8df356d19ec4e3a6ae656044249a87b0def257fe1e08dbaf6094"}, + {file = "hiredis-3.3.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:304481241e081bc26f0778b2c2b99f9c43917e4e724a016dcc9439b7ab12c726"}, + {file = "hiredis-3.3.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8597c35c9e82f65fd5897c4a2188c65d7daf10607b102960137b23d261cd957b"}, + {file = "hiredis-3.3.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad940dc2db545dc978cb41cb9a683e2ff328f3ef581230b9ca40ff6c3d01d542"}, + {file = "hiredis-3.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:156be6a0c736ee145cfe0fb155d0e96cec8d4872cf8b4f76ad6a2ee6ab391d0a"}, + {file = "hiredis-3.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:583de2f16528e66081cbdfe510d8488c2de73039dc00aada7d22bd49d73a4a94"}, + {file = "hiredis-3.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c24c1460486b6b36083252c2db21a814becf8495ccd0e76b7286623e37239b63"}, + {file = "hiredis-3.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a58a58cef0d911b1717154179a9ff47852249c536ea5966bde4370b6b20638ff"}, + {file = "hiredis-3.3.1-cp311-cp311-win32.whl", hash = "sha256:e0db44cf81e4d7b94f3776b9f89111f74ed6bbdbfd42a22bc4a5ce0644d3e060"}, + {file = "hiredis-3.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:1f7bceb03a1b934872ffe3942eaeed7c7e09096e67b53f095b81f39c7a819113"}, + {file = "hiredis-3.3.1-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:60543f3b068b16a86e99ed96b7fdae71cdc1d8abdfe9b3f82032a555e52ece7e"}, + {file = "hiredis-3.3.1-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2611bfaaadc5e8d43fb7967f9bbf1110c8beaa83aee2f2d812c76f11cfb56c6a"}, + {file = "hiredis-3.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8e3754ce60e1b11b0afad9a053481ff184d2ee24bea47099107156d1b84a84aa"}, + {file = "hiredis-3.3.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e89dabf436ee79b358fd970dcbed6333a36d91db73f27069ca24a02fb138a404"}, + {file = "hiredis-3.3.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4f7e242eab698ad0be5a4b2ec616fa856569c57455cc67c625fd567726290e5f"}, + {file = "hiredis-3.3.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:53148a4e21057541b6d8e493b2ea1b500037ddf34433c391970036f3cbce00e3"}, + {file = "hiredis-3.3.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c25132902d3eff38781e0d54f27a0942ec849e3c07dbdce83c4d92b7e43c8dce"}, + {file = "hiredis-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3fb6573efa15a29c12c0c0f7170b14e7c1347fe4bb39b6a15b779f46015cc929"}, + {file = "hiredis-3.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:487658e1db83c1ee9fbbac6a43039ea76957767a5987ffb16b590613f9e68297"}, + {file = "hiredis-3.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a1d190790ee39b8b7adeeb10fc4090dc4859eb4e75ed27bd8108710eef18f358"}, + {file = "hiredis-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a42c7becd4c9ec4ab5769c754eb61112777bdc6e1c1525e2077389e193b5f5aa"}, + {file = "hiredis-3.3.1-cp312-cp312-win32.whl", hash = "sha256:17ec8b524055a88b80d76c177dbbbe475a25c17c5bf4b67bdbdbd0629bcae838"}, + {file = "hiredis-3.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:0fac4af8515e6cca74fc701169ae4dc9a71a90e9319c9d21006ec9454b43aa2f"}, + {file = "hiredis-3.3.1-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:afe3c3863f16704fb5d7c2c6ff56aaf9e054f6d269f7b4c9074c5476178d1aba"}, + {file = "hiredis-3.3.1-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:f19ee7dc1ef8a6497570d91fa4057ba910ad98297a50b8c44ff37589f7c89d17"}, + {file = "hiredis-3.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:09f5e510f637f2c72d2a79fb3ad05f7b6211e057e367ca5c4f97bb3d8c9d71f4"}, + {file = "hiredis-3.3.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b46e96b50dad03495447860510daebd2c96fd44ed25ba8ccb03e9f89eaa9d34"}, + {file = "hiredis-3.3.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b4fe7f38aa8956fcc1cea270e62601e0e11066aff78e384be70fd283d30293b6"}, + {file = "hiredis-3.3.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b96da7e365d6488d2a75266a662cbe3cc14b28c23dd9b0c9aa04b5bc5c20192"}, + {file = "hiredis-3.3.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52d5641027d6731bc7b5e7d126a5158a99784a9f8c6de3d97ca89aca4969e9f8"}, + {file = "hiredis-3.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eddeb9a153795cf6e615f9f3cef66a1d573ff3b6ee16df2b10d1d1c2f2baeaa8"}, + {file = "hiredis-3.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:011a9071c3df4885cac7f58a2623feac6c8e2ad30e6ba93c55195af05ce61ff5"}, + {file = "hiredis-3.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:264ee7e9cb6c30dc78da4ecf71d74cf14ca122817c665d838eda8b4384bce1b0"}, + {file = "hiredis-3.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d1434d0bcc1b3ef048bae53f26456405c08aeed9827e65b24094f5f3a6793f1"}, + {file = "hiredis-3.3.1-cp313-cp313-win32.whl", hash = "sha256:f915a34fb742e23d0d61573349aa45d6f74037fde9d58a9f340435eff8d62736"}, + {file = "hiredis-3.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:d8e56e0d1fe607bfff422633f313aec9191c3859ab99d11ff097e3e6e068000c"}, + {file = "hiredis-3.3.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:439f9a5cc8f9519ce208a24cdebfa0440fef26aa682a40ba2c92acb10a53f5e0"}, + {file = "hiredis-3.3.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3724f0e58c6ff76fd683429945491de71324ab1bc0ad943a8d68cb0932d24075"}, + {file = "hiredis-3.3.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29fe35e3c6fe03204e75c86514f452591957a1e06b05d86e10d795455b71c355"}, + {file = "hiredis-3.3.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d42f3a13290f89191568fc113d95a3d2c8759cdd8c3672f021d8b7436f909e75"}, + {file = "hiredis-3.3.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2afc675b831f7552da41116fffffca4340f387dc03f56d6ec0c7895ab0b59a10"}, + {file = "hiredis-3.3.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4106201cd052d9eabe3cb7b5a24b0fe37307792bda4fcb3cf6ddd72f697828e8"}, + {file = "hiredis-3.3.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8887bf0f31e4b550bd988c8863b527b6587d200653e9375cd91eea2b944b7424"}, + {file = "hiredis-3.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1ac7697365dbe45109273b34227fee6826b276ead9a4a007e0877e1d3f0fcf21"}, + {file = "hiredis-3.3.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2b6da6e07359107c653a809b3cff2d9ccaeedbafe33c6f16434aef6f53ce4a2b"}, + {file = "hiredis-3.3.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ce334915f5d31048f76a42c607bf26687cf045eb1bc852b7340f09729c6a64fc"}, + {file = "hiredis-3.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ee11fd431f83d8a5b29d370b9d79a814d3218d30113bdcd44657e9bdf715fc92"}, + {file = "hiredis-3.3.1-cp314-cp314-win32.whl", hash = "sha256:e0356561b4a97c83b9ee3de657a41b8d1a1781226853adaf47b550bb988fda6f"}, + {file = "hiredis-3.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:80aba5f85d6227faee628ae28d1c3b69c661806a0636548ac56c68782606454f"}, + {file = "hiredis-3.3.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:907f7b5501a534030738f0f27459a612d2266fd0507b007bb8f3e6de08167920"}, + {file = "hiredis-3.3.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:de94b409f49eb6a588ebdd5872e826caec417cd77c17af0fb94f2128427f1a2a"}, + {file = "hiredis-3.3.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:79cd03e7ff550c17758a7520bf437c156d3d4c8bb74214deeafa69cda49c85a4"}, + {file = "hiredis-3.3.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ffa7ba2e2da1f806f3181b9730b3e87ba9dbfec884806725d4584055ba3faa6"}, + {file = "hiredis-3.3.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ee37fe8cf081b72dea72f96a0ee604f492ec02252eb77dc26ff6eec3f997b580"}, + {file = "hiredis-3.3.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9bfdeff778d3f7ff449ca5922ab773899e7d31e26a576028b06a5e9cf0ed8c34"}, + {file = "hiredis-3.3.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:027ce4fabfeff5af5b9869d5524770877f9061d118bc36b85703ae3faf5aad8e"}, + {file = "hiredis-3.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:dcea8c3f53674ae68e44b12e853b844a1d315250ca6677b11ec0c06aff85e86c"}, + {file = "hiredis-3.3.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0b5ff2f643f4b452b0597b7fe6aa35d398cb31d8806801acfafb1558610ea2aa"}, + {file = "hiredis-3.3.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3586c8a5f56d34b9dddaaa9e76905f31933cac267251006adf86ec0eef7d0400"}, + {file = "hiredis-3.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a110d19881ca78a88583d3b07231e7c6864864f5f1f3491b638863ea45fa8708"}, + {file = "hiredis-3.3.1-cp314-cp314t-win32.whl", hash = "sha256:98fd5b39410e9d69e10e90d0330e35650becaa5dd2548f509b9598f1f3c6124d"}, + {file = "hiredis-3.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:ab1f646ff531d70bfd25f01e60708dfa3d105eb458b7dedd9fe9a443039fd809"}, + {file = "hiredis-3.3.1-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:a3af4e9f277d6b8acd369dc44a723a055752fca9d045094383af39f90a3e3729"}, + {file = "hiredis-3.3.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:526db52e5234a9463520e960a509d6c1bd5128d1ab1b569cbf459fe39189e8ab"}, + {file = "hiredis-3.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:90d6b9f2652303aefd2c5a26a5e14cb74a3a63d10faa642c08d790e99442a088"}, + {file = "hiredis-3.3.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4479e36d263251dba8ab8ea81adf07e7f1163603c7102c5de1e130b83b4fad3b"}, + {file = "hiredis-3.3.1-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2390ad81c03d93ef1d5afd18ffcf5935de827f1a2b96b2c829437968bdabccb"}, + {file = "hiredis-3.3.1-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:65c05b79cb8366c123357b354a16f9fc3f7187159422f143638d1c26b7240ed4"}, + {file = "hiredis-3.3.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09d41a3a965f7c261223d516ebda607aee4d8440dd7637f01af9a4c05872f0c4"}, + {file = "hiredis-3.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:113e098e4a6b3cc5500e05e7cb1548ba9e83de5fe755941b11f6020a76e6c03a"}, + {file = "hiredis-3.3.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e31e92b61d56244047ad600812e16f7587a6172f74810fd919ff993af12b9149"}, + {file = "hiredis-3.3.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:77c5d2bebbc9d06691abb512a31d0f54e1562af0b872891463a67a949b5278ef"}, + {file = "hiredis-3.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:137c14905ea6f2933967200bc7b2a0c8ec9387888b273fd0004f25b994fd0343"}, + {file = "hiredis-3.3.1-cp38-cp38-win32.whl", hash = "sha256:f2f94355affd51088f57f8674b0e294704c3c7c3d7d3b1545310f5b135d4843b"}, + {file = "hiredis-3.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:b1e3b9f4bf9a4120510ba77a77b2fb674893cd6795653545152bb11a79eecfcb"}, + {file = "hiredis-3.3.1-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:743b85bd6902856cac457ddd8cd7dd48c89c47d641b6016ff5e4d015bfbd4799"}, + {file = "hiredis-3.3.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b37df4b10cb15dedfc203f69312d8eedd617b941c21df58c13af59496c53ad0f"}, + {file = "hiredis-3.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8650158217b469d8b6087f490929211b0493a9121154c4efaafd1dec9e19319e"}, + {file = "hiredis-3.3.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c74bd9926954e7e575f9cd9890f63defd90cd8f812dfbf8e1efb72acc9355456"}, + {file = "hiredis-3.3.1-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2f1c1b2e8f00b71e6214234d313f655a3a27cd4384b054126ce04073c1d47045"}, + {file = "hiredis-3.3.1-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:01cf82a514bc4fd145b99333c28523e61b7a9ad051a245804323ebf4e7b1c6a6"}, + {file = "hiredis-3.3.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:db46baf157feefd88724e6a7f145fe996a5990a8604ed9292b45d563360e513b"}, + {file = "hiredis-3.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5e55d90b431b0c6b64ae5a624208d4aea318566d31872e595ee723c0f5b9a79f"}, + {file = "hiredis-3.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:40ae8a7041fcb328a6bc7202d8c4e6e0d38d434b2e3880b1ee8ed754f17cd836"}, + {file = "hiredis-3.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:d14229beaa76e66c3a25f9477d973336441ca820df853679a98796256813316f"}, + {file = "hiredis-3.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b3df9447f9209f9aa0434ca74050e9509670c1ad99398fe5807abb90e5f3a014"}, + {file = "hiredis-3.3.1-cp39-cp39-win32.whl", hash = "sha256:48ff424f8aa36aacd9fdaa68efeb27d2e8771f293af4305bdb15d92194ca6631"}, + {file = "hiredis-3.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:318f772dd321404075d406825266e574ee0f4751be1831424c2ebd5722609398"}, + {file = "hiredis-3.3.1.tar.gz", hash = "sha256:da6f0302360e99d32bc2869772692797ebadd536e1b826d0103c72ba49d38698"}, ] [[package]] @@ -1939,14 +2054,14 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve [[package]] name = "identify" -version = "2.6.16" +version = "2.6.19" description = "File identification library for Python" optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0"}, - {file = "identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980"}, + {file = "identify-2.6.19-py2.py3-none-any.whl", hash = "sha256:20e6a87f786f768c092a721ad107fc9df0eb89347be9396cadf3f4abbd1fb78a"}, + {file = "identify-2.6.19.tar.gz", hash = "sha256:6be5020c38fcb07da56c53733538a3081ea5aa70d36a156f83044bfbf9173842"}, ] [package.extras] @@ -1954,18 +2069,18 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.11" +version = "3.13" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, - {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, + {file = "idna-3.13-py3-none-any.whl", hash = "sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3"}, + {file = "idna-3.13.tar.gz", hash = "sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242"}, ] [package.extras] -all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] +all = ["mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] [[package]] name = "idutils" @@ -1988,49 +2103,49 @@ tests = ["pytest-black (>=0.3.0,<0.3.10)", "pytest-cache (>=1.0)", "pytest-inven [[package]] name = "importlib-metadata" -version = "8.7.1" +version = "9.0.0" description = "Read metadata from Python packages" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151"}, - {file = "importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb"}, + {file = "importlib_metadata-9.0.0-py3-none-any.whl", hash = "sha256:2d21d1cc5a017bd0559e36150c21c830ab1dc304dedd1b7ea85d20f45ef3edd7"}, + {file = "importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc"}, ] [package.dependencies] zipp = ">=3.20" [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.14)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=3.4)"] perf = ["ipython"] -test = ["flufl.flake8", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] -type = ["mypy (<1.19) ; platform_python_implementation == \"PyPy\"", "pytest-mypy (>=1.0.1)"] +test = ["packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] +type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [[package]] name = "importlib-resources" -version = "6.5.2" +version = "7.1.0" description = "Read resources from Python packages" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec"}, - {file = "importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c"}, + {file = "importlib_resources-7.1.0-py3-none-any.whl", hash = "sha256:1bd7b48b4088eddb2cd16382150bb515af0bd2c70128194392725f82ad2c96a1"}, + {file = "importlib_resources-7.1.0.tar.gz", hash = "sha256:0722d4c6212489c530f2a145a34c0a7a3b4721bc96a15fada5930e2a0b760708"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.14)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] +enabler = ["pytest-enabler (>=3.4)"] test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "zipp (>=3.17)"] -type = ["pytest-mypy"] +type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [[package]] name = "iniconfig" @@ -2062,15 +2177,15 @@ sortedcontainers = "*" [[package]] name = "ipython" -version = "8.38.0" +version = "8.39.0" description = "IPython: Productive Interactive Computing" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "ipython-8.38.0-py3-none-any.whl", hash = "sha256:750162629d800ac65bb3b543a14e7a74b0e88063eac9b92124d4b2aa3f6d8e86"}, - {file = "ipython-8.38.0.tar.gz", hash = "sha256:9cfea8c903ce0867cc2f23199ed8545eb741f3a69420bfcf3743ad1cec856d39"}, + {file = "ipython-8.39.0-py3-none-any.whl", hash = "sha256:bb3c51c4fa8148ab1dea07a79584d1c854e234ea44aa1283bcb37bc75054651f"}, + {file = "ipython-8.39.0.tar.gz", hash = "sha256:4110ae96012c379b8b6db898a07e186c40a2a1ef5d57a7fa83166047d9da7624"}, ] [package.dependencies] @@ -2201,152 +2316,146 @@ mypy = ["mypy"] [[package]] name = "lxml" -version = "6.0.2" +version = "6.1.0" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388"}, - {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c"}, - {file = "lxml-6.0.2-cp310-cp310-win32.whl", hash = "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b"}, - {file = "lxml-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0"}, - {file = "lxml-6.0.2-cp310-cp310-win_arm64.whl", hash = "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5"}, - {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607"}, - {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7"}, - {file = "lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46"}, - {file = "lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078"}, - {file = "lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285"}, - {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456"}, - {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322"}, - {file = "lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849"}, - {file = "lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f"}, - {file = "lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6"}, - {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77"}, - {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314"}, - {file = "lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2"}, - {file = "lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7"}, - {file = "lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf"}, - {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe"}, - {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c"}, - {file = "lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b"}, - {file = "lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed"}, - {file = "lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8"}, - {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d"}, - {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f"}, - {file = "lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312"}, - {file = "lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca"}, - {file = "lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c"}, - {file = "lxml-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a656ca105115f6b766bba324f23a67914d9c728dafec57638e2b92a9dcd76c62"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c54d83a2188a10ebdba573f16bd97135d06c9ef60c3dc495315c7a28c80a263f"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:1ea99340b3c729beea786f78c38f60f4795622f36e305d9c9be402201efdc3b7"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af85529ae8d2a453feee4c780d9406a5e3b17cee0dd75c18bd31adcd584debc3"}, - {file = "lxml-6.0.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fe659f6b5d10fb5a17f00a50eb903eb277a71ee35df4615db573c069bcf967ac"}, - {file = "lxml-6.0.2-cp38-cp38-win32.whl", hash = "sha256:5921d924aa5468c939d95c9814fa9f9b5935a6ff4e679e26aaf2951f74043512"}, - {file = "lxml-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:0aa7070978f893954008ab73bb9e3c24a7c56c054e00566a21b553dc18105fca"}, - {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2c8458c2cdd29589a8367c09c8f030f1d202be673f0ca224ec18590b3b9fb694"}, - {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fee0851639d06276e6b387f1c190eb9d7f06f7f53514e966b26bae46481ec90"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2142a376b40b6736dfc214fd2902409e9e3857eff554fed2d3c60f097e62a62"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6b5b39cc7e2998f968f05309e666103b53e2edd01df8dc51b90d734c0825444"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4aec24d6b72ee457ec665344a29acb2d35937d5192faebe429ea02633151aad"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:b42f4d86b451c2f9d06ffb4f8bbc776e04df3ba070b9fe2657804b1b40277c48"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cdaefac66e8b8f30e37a9b4768a391e1f8a16a7526d5bc77a7928408ef68e93"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:b738f7e648735714bbb82bdfd030203360cfeab7f6e8a34772b3c8c8b820568c"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daf42de090d59db025af61ce6bdb2521f0f102ea0e6ea310f13c17610a97da4c"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:66328dabea70b5ba7e53d94aa774b733cf66686535f3bc9250a7aab53a91caaf"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:e237b807d68a61fc3b1e845407e27e5eb8ef69bc93fe8505337c1acb4ee300b6"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:ac02dc29fd397608f8eb15ac1610ae2f2f0154b03f631e6d724d9e2ad4ee2c84"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:817ef43a0c0b4a77bd166dc9a09a555394105ff3374777ad41f453526e37f9cb"}, - {file = "lxml-6.0.2-cp39-cp39-win32.whl", hash = "sha256:bc532422ff26b304cfb62b328826bd995c96154ffd2bac4544f37dbb95ecaa8f"}, - {file = "lxml-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:995e783eb0374c120f528f807443ad5a83a656a8624c467ea73781fc5f8a8304"}, - {file = "lxml-6.0.2-cp39-cp39-win_arm64.whl", hash = "sha256:08b9d5e803c2e4725ae9e8559ee880e5328ed61aa0935244e0515d7d9dbec0aa"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e"}, - {file = "lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62"}, + {file = "lxml-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:41dcc4c7b10484257cbd6c37b83ddb26df2b0e5aff5ac00d095689015af868ec"}, + {file = "lxml-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a31286dbb5e74c8e9a5344465b77ab4c5bd511a253b355b5ca2fae7e579fafec"}, + {file = "lxml-6.1.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1bc4cc83fb7f66ffb16f74d6dd0162e144333fc36ebcce32246f80c8735b2551"}, + {file = "lxml-6.1.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:20cf4d0651987c906a2f5cba4e3a8d6ba4bfdf973cfe2a96c0d6053888ea2ecd"}, + {file = "lxml-6.1.0-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffb34ea45a82dd637c2c97ae1bbb920850c1e59bcae79ce1c15af531d83e7215"}, + {file = "lxml-6.1.0-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a1d9b99e5b2597e4f5aed2484fef835256fa1b68a19e4265c97628ef4bf8bcf4"}, + {file = "lxml-6.1.0-cp310-cp310-manylinux_2_28_i686.whl", hash = "sha256:d43aa26dcda363f21e79afa0668f5029ed7394b3bb8c92a6927a3d34e8b610ea"}, + {file = "lxml-6.1.0-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:6262b87f9e5c1e5fe501d6c153247289af42eb44ad7660b9b3de17baaf92d6f6"}, + {file = "lxml-6.1.0-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d1392c569c032f78a11a25d1de1c43fff13294c793b39e19d84fade3045cbbc3"}, + {file = "lxml-6.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:045e387d1f4f42a418380930fa3f45c73c9b392faf67e495e58902e68e8f44a7"}, + {file = "lxml-6.1.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:9f93d5b8b07f73e8c77e3c6556a3db269918390c804b5e5fcdd4858232cc8f16"}, + {file = "lxml-6.1.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:de550d129f18d8ab819651ffe4f38b1b713c7e116707de3c0c6400d0ef34fbc1"}, + {file = "lxml-6.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c08da09dc003c9e8c70e06b53a11db6fb3b250c21c4236b03c7d7b443c318e7a"}, + {file = "lxml-6.1.0-cp310-cp310-win32.whl", hash = "sha256:37448bf9c7d7adfc5254763901e2bbd6bb876228dfc1fc7f66e58c06368a7544"}, + {file = "lxml-6.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:2593a0a6621545b9095b71ad74ed4226eba438a7d9fc3712a99bdb15508cf93a"}, + {file = "lxml-6.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:e80807d72f96b96ad5588cb85c75616e4f2795a7737d4630784c51497beb7776"}, + {file = "lxml-6.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cec05be8c876f92a5aa07b01d60bbb4d11cfbdd654cad0561c0d7b5c043a61b9"}, + {file = "lxml-6.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9c03e048b6ce8e77b09c734e931584894ecd58d08296804ca2d0b184c933ce50"}, + {file = "lxml-6.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:942454ff253da14218f972b23dc72fa4edf6c943f37edd19cd697618b626fac5"}, + {file = "lxml-6.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d036ee7b99d5148072ac7c9b847193decdfeac633db350363f7bce4fff108f0e"}, + {file = "lxml-6.1.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ae5d8d5427f3cc317e7950f2da7ad276df0cfa37b8de2f5658959e618ea8512"}, + {file = "lxml-6.1.0-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:363e47283bde87051b821826e71dde47f107e08614e1aa312ba0c5711e77738c"}, + {file = "lxml-6.1.0-cp311-cp311-manylinux_2_28_i686.whl", hash = "sha256:f504d861d9f2a8f94020130adac88d66de93841707a23a86244263d1e54682f5"}, + {file = "lxml-6.1.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:23a5dc68e08ed13331d61815c08f260f46b4a60fdd1640bbeb82cf89a9d90289"}, + {file = "lxml-6.1.0-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f15401d8d3dbf239e23c818afc10c7207f7b95f9a307e092122b6f86dd43209a"}, + {file = "lxml-6.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fcf3da95e93349e0647d48d4b36a12783105bcc74cb0c416952f9988410846a3"}, + {file = "lxml-6.1.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:0d082495c5fcf426e425a6e28daaba1fcb6d8f854a4ff01effb1f1f381203eb9"}, + {file = "lxml-6.1.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:e3c4f84b24a1fcba435157d111c4b755099c6ff00a3daee1ad281817de75ed11"}, + {file = "lxml-6.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:976a6b39b1b13e8c354ad8d3f261f3a4ac6609518af91bdb5094760a08f132c4"}, + {file = "lxml-6.1.0-cp311-cp311-win32.whl", hash = "sha256:857efde87d365706590847b916baff69c0bc9252dc5af030e378c9800c0b10e3"}, + {file = "lxml-6.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:183bfb45a493081943be7ea2b5adfc2b611e1cf377cefa8b8a8be404f45ef9a7"}, + {file = "lxml-6.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:19f4164243fc206d12ed3d866e80e74f5bc3627966520da1a5f97e42c32a3f39"}, + {file = "lxml-6.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d2f17a16cd8751e8eb233a7e41aecdf8e511712e00088bf9be455f604cd0d28d"}, + {file = "lxml-6.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f0cea5b1d3e6e77d71bd2b9972eb2446221a69dc52bb0b9c3c6f6e5700592d93"}, + {file = "lxml-6.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc46da94826188ed45cb53bd8e3fc076ae22675aea2087843d4735627f867c6d"}, + {file = "lxml-6.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9147d8e386ec3b82c3b15d88927f734f565b0aaadef7def562b853adca45784a"}, + {file = "lxml-6.1.0-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5715e0e28736a070f3f34a7ccc09e2fdcba0e3060abbcf61a1a5718ff6d6b105"}, + {file = "lxml-6.1.0-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4937460dc5df0cdd2f06a86c285c28afda06aefa3af949f9477d3e8df430c485"}, + {file = "lxml-6.1.0-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc783ee3147e60a25aa0445ea82b3e8aabb83b240f2b95d32cb75587ff781814"}, + {file = "lxml-6.1.0-cp312-cp312-manylinux_2_28_i686.whl", hash = "sha256:40d9189f80075f2e1f88db21ef815a2b17b28adf8e50aaf5c789bfe737027f32"}, + {file = "lxml-6.1.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:05b9b8787e35bec69e68daf4952b2e6dfcfb0db7ecf1a06f8cdfbbac4eb71aad"}, + {file = "lxml-6.1.0-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0f0f08beb0182e3e9a86fae124b3c47a7b41b7b69b225e1377db983802404e54"}, + {file = "lxml-6.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73becf6d8c81d4c76b1014dbd3584cb26d904492dcf73ca85dc8bff08dcd6d2d"}, + {file = "lxml-6.1.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1ae225f66e5938f4fa29d37e009a3bb3b13032ac57eb4eb42afa44f6e4054e69"}, + {file = "lxml-6.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:690022c7fae793b0489aa68a658822cea83e0d5933781811cabbf5ea3bcfe73d"}, + {file = "lxml-6.1.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:63aeafc26aac0be8aff14af7871249e87ea1319be92090bfd632ec68e03b16a5"}, + {file = "lxml-6.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:264c605ab9c0e4aa1a679636f4582c4d3313700009fac3ec9c3412ed0d8f3e1d"}, + {file = "lxml-6.1.0-cp312-cp312-win32.whl", hash = "sha256:56971379bc5ee8037c5a0f09fa88f66cdb7d37c3e38af3e45cf539f41131ac1f"}, + {file = "lxml-6.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:bba078de0031c219e5dd06cf3e6bf8fb8e6e64a77819b358f53bb132e3e03366"}, + {file = "lxml-6.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:c3592631e652afa34999a088f98ba7dfc7d6aff0d535c410bea77a71743f3819"}, + {file = "lxml-6.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a0092f2b107b69601adf562a57c956fbb596e05e3e6651cabd3054113b007e45"}, + {file = "lxml-6.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fc7140d7a7386e6b545d41b7358f4d02b656d4053f5fa6859f92f4b9c2572c4d"}, + {file = "lxml-6.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:419c58fc92cc3a2c3fa5f78c63dbf5da70c1fa9c1b25f25727ecee89a96c7de2"}, + {file = "lxml-6.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:37fabd1452852636cf38ecdcc9dd5ca4bba7a35d6c53fa09725deeb894a87491"}, + {file = "lxml-6.1.0-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2853c8b2170cc6cd54a6b4d50d2c1a8a7aeca201f23804b4898525c7a152cfc"}, + {file = "lxml-6.1.0-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8e369cbd690e788c8d15e56222d91a09c6a417f49cbc543040cba0fe2e25a79e"}, + {file = "lxml-6.1.0-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e69aa6805905807186eb00e66c6d97a935c928275182eb02ee40ba00da9623b2"}, + {file = "lxml-6.1.0-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:4bd1bdb8a9e0e2dd229de19b5f8aebac80e916921b4b2c6ef8a52bc131d0c1f9"}, + {file = "lxml-6.1.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:cbd7b79cdcb4986ad78a2662625882747f09db5e4cd7b2ae178a88c9c51b3dfe"}, + {file = "lxml-6.1.0-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:43e4d297f11080ec9d64a4b1ad7ac02b4484c9f0e2179d9c4ef78e886e747b88"}, + {file = "lxml-6.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cc16682cc987a3da00aa56a3aa3075b08edb10d9b1e476938cfdbee8f3b67181"}, + {file = "lxml-6.1.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d6d8efe71429635f0559579092bb5e60560d7b9115ee38c4adbea35632e7fa24"}, + {file = "lxml-6.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7e39ab3a28af7784e206d8606ec0e4bcad0190f63a492bca95e94e5a4aef7f6e"}, + {file = "lxml-6.1.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:9eb667bf50856c4a58145f8ca2d5e5be160191e79eb9e30855a476191b3c3495"}, + {file = "lxml-6.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7f4a77d6f7edf9230cee3e1f7f6764722a41604ee5681844f18db9a81ea0ec33"}, + {file = "lxml-6.1.0-cp313-cp313-win32.whl", hash = "sha256:28902146ffbe5222df411c5d19e5352490122e14447e98cd118907ee3fd6ee62"}, + {file = "lxml-6.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:4a1503c56e4e2b38dc76f2f2da7bae69670c0f1933e27cfa34b2fa5876410b16"}, + {file = "lxml-6.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:e0af85773850417d994d019741239b901b22c6680206f46a34766926e466141d"}, + {file = "lxml-6.1.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:ab863fd37458fed6456525f297d21239d987800c46e67da5ef04fc6b3dd93ac8"}, + {file = "lxml-6.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:6fd8b1df8254ff4fd93fd31da1fc15770bde23ac045be9bb1f87425702f61cc9"}, + {file = "lxml-6.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:47024feaae386a92a146af0d2aeed65229bf6fff738e6a11dda6b0015fb8fd03"}, + {file = "lxml-6.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3f00972f84450204cd5d93a5395965e348956aaceaadec693a22ec743f8ae3eb"}, + {file = "lxml-6.1.0-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97faa0860e13b05b15a51fb4986421ef7a30f0b3334061c416e0981e9450ca4c"}, + {file = "lxml-6.1.0-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:972a6451204798675407beaad97b868d0c733d9a74dafefc63120b81b8c2de28"}, + {file = "lxml-6.1.0-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fe022f20bc4569ec66b63b3fb275a3d628d9d32da6326b2982584104db6d3086"}, + {file = "lxml-6.1.0-cp314-cp314-manylinux_2_28_i686.whl", hash = "sha256:75c4c7c619a744f972f4451bf5adf6d0fb00992a1ffc9fd78e13b0bc817cc99f"}, + {file = "lxml-6.1.0-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:3648f20d25102a22b6061c688beb3a805099ea4beb0a01ce62975d926944d292"}, + {file = "lxml-6.1.0-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:77b9f99b17cbf14026d1e618035077060fc7195dd940d025149f3e2e830fbfcb"}, + {file = "lxml-6.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:32662519149fd7a9db354175aa5e417d83485a8039b8aaa62f873ceee7ea4cad"}, + {file = "lxml-6.1.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:73d658216fc173cf2c939e90e07b941c5e12736b0bf6a99e7af95459cfe8eabb"}, + {file = "lxml-6.1.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ac4db068889f8772a4a698c5980ec302771bb545e10c4b095d4c8be26749616f"}, + {file = "lxml-6.1.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:45e9dfbd1b661eb64ba0d4dbe762bd210c42d86dd1e5bd2bdf89d634231beb43"}, + {file = "lxml-6.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:89e8d73d09ac696a5ba42ec69787913d53284f12092f651506779314f10ba585"}, + {file = "lxml-6.1.0-cp314-cp314-win32.whl", hash = "sha256:ebe33f4ec1b2de38ceb225a1749a2965855bffeef435ba93cd2d5d540783bf2f"}, + {file = "lxml-6.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:398443df51c538bd578529aa7e5f7afc6c292644174b47961f3bf87fe5741120"}, + {file = "lxml-6.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:8c8984e1d8c4b3949e419158fda14d921ff703a9ed8a47236c6eb7a2b6cb4946"}, + {file = "lxml-6.1.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:1081dd10bc6fa437db2500e13993abf7cc30716d0a2f40e65abb935f02ec559c"}, + {file = "lxml-6.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:dabecc48db5f42ba348d1f5d5afdc54c6c4cc758e676926c7cd327045749517d"}, + {file = "lxml-6.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e3dd5fe19c9e0ac818a9c7f132a5e43c1339ec1cbbfecb1a938bd3a47875b7c9"}, + {file = "lxml-6.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9e7b0a4ca6dcc007a4cef00a761bba2dea959de4bd2df98f926b33c92ca5dfb9"}, + {file = "lxml-6.1.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d27bbe326c6b539c64b42638b18bc6003a8d88f76213a97ac9ed4f885efeab7"}, + {file = "lxml-6.1.0-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4e425db0c5445ef0ad56b0eec54f89b88b2d884656e536a90b2f52aecb4ca86"}, + {file = "lxml-6.1.0-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4b89b098105b8599dc57adac95d1813409ac476d3c948a498775d3d0c6124bfb"}, + {file = "lxml-6.1.0-cp314-cp314t-manylinux_2_28_i686.whl", hash = "sha256:c4a699432846df86cc3de502ee85f445ebad748a1c6021d445f3e514d2cd4b1c"}, + {file = "lxml-6.1.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:30e7b2ed63b6c8e97cca8af048589a788ab5c9c905f36d9cf1c2bb549f450d2f"}, + {file = "lxml-6.1.0-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:022981127642fe19866d2907d76241bb07ed21749601f727d5d5dd1ce5d1b773"}, + {file = "lxml-6.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:23cad0cc86046d4222f7f418910e46b89971c5a45d3c8abfad0f64b7b05e4a9b"}, + {file = "lxml-6.1.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:21c3302068f50d1e8728c67c87ba92aa87043abee517aa2576cca1855326b405"}, + {file = "lxml-6.1.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:be10838781cb3be19251e276910cd508fe127e27c3242e50521521a0f3781690"}, + {file = "lxml-6.1.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2173a7bffe97667bbf0767f8a99e587740a8c56fdf3befac4b09cb29a80276fd"}, + {file = "lxml-6.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c6854e9cf99c84beb004eecd7d3a3868ef1109bf2b1df92d7bc11e96a36c2180"}, + {file = "lxml-6.1.0-cp314-cp314t-win32.whl", hash = "sha256:00750d63ef0031a05331b9223463b1c7c02b9004cef2346a5b2877f0f9494dd2"}, + {file = "lxml-6.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:80410c3a7e3c617af04de17caa9f9f20adaa817093293d69eae7d7d0522836f5"}, + {file = "lxml-6.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:26dd9f57ee3bd41e7d35b4c98a2ffd89ed11591649f421f0ec19f67d50ec67ac"}, + {file = "lxml-6.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b6c2f225662bc5ad416bdd06f72ca301b31b39ce4261f0e0097017fc2891b940"}, + {file = "lxml-6.1.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a86f06f059e22a0d574990ee2df24ede03f7f3c68c1336293eee9536c4c776cd"}, + {file = "lxml-6.1.0-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:468479e52ecf3ec23799c863336d02c05fc2f7ffd1a1424eeeb9a28d4eb69d13"}, + {file = "lxml-6.1.0-cp38-cp38-manylinux_2_28_i686.whl", hash = "sha256:a02ca8fe48815bddcfca3248efe54451abb9dbf2f7d1c5744c8aa4142d476919"}, + {file = "lxml-6.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:bb40648d96157f9081886defe13eac99253e663be969ff938a9289eff6e47b72"}, + {file = "lxml-6.1.0-cp38-cp38-win32.whl", hash = "sha256:1dd6a1c3ad4cb674f44525d9957f3e9c209bb6dd9213245195167a281fcc2bdc"}, + {file = "lxml-6.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:4e2c54d6b47361d0f1d3bc8d4e082ad87201e56ccdcca4d3b9ee3644ff595ec8"}, + {file = "lxml-6.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:920354904d1cb86577d4b3cfe2830c2dbe81d6f4449e57ada428f1609b5985f7"}, + {file = "lxml-6.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c871299c595ee004d186f61840f0bfc4941aa3f17c8ba4a565ead7e4f4f820ee"}, + {file = "lxml-6.1.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d0d799ff958655781296ec870d5e2448e75150da2b3d07f13ff5b0c2c35beefd"}, + {file = "lxml-6.1.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ba11752e346bd804ea312ec2eea2532dfa8b8d3261d81a32ef9e6ab16256280"}, + {file = "lxml-6.1.0-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26c5272c6a4bf4cf32d3f5a7890c942b0e04438691157d341616d02cca74d4bd"}, + {file = "lxml-6.1.0-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c53fa3a5a52122d590e847a57ccf955557b9634a7f99ff5a35131321b0a85317"}, + {file = "lxml-6.1.0-cp39-cp39-manylinux_2_28_i686.whl", hash = "sha256:76b958b4ea3104483c20f74866d55aa056546e15ebe83dd7aecd63698f43b755"}, + {file = "lxml-6.1.0-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:8c11b984b5ce6add4dccc7144c7be5d364d298f15b0c6a57da1991baedc750ce"}, + {file = "lxml-6.1.0-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d3829a6e6fd550a219564912d4002c537f65da4c6ae4e093cc34462f4fa027ad"}, + {file = "lxml-6.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:52b0ac6903cf74ebf997eb8c682d2fbac7d1ab7e4c552413eec55868a9b73f39"}, + {file = "lxml-6.1.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:29f5c00cb7d752bce2c70ebd2d31b0a42f9499ffdd3ecb2f31a5b73ee43031ad"}, + {file = "lxml-6.1.0-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:c748ebcb6877de89f48ab90ca96642ac458fff5dec291a2b9337cd4d0934e383"}, + {file = "lxml-6.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:08950a23f296b3f83521577274e3d3b0f3d739bf2e68d01a752e4288bc50d286"}, + {file = "lxml-6.1.0-cp39-cp39-win32.whl", hash = "sha256:11a873c77a181b4fef9c2e357d08ed399542c2af1390101da66720a19c7c9618"}, + {file = "lxml-6.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:81ff55c70b67d19d52b6fd118a114c0a4c97d799cd3089ff9bd9e2ff4b414ee2"}, + {file = "lxml-6.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:481d6e2104285d9add34f41b42b247b76b61c5b5c26c303c2e9707bbf8bd9a64"}, + {file = "lxml-6.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:546b66c0dd1bb8d9fa89d7123e5fa19a8aff3a1f2141eb22df96112afb17b842"}, + {file = "lxml-6.1.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5cfa1a34df366d9dc0d5eaf420f4cf2bb1e1bebe1066d1c2fc28c179f8a4004c"}, + {file = "lxml-6.1.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db88156fcf544cdbf0d95588051515cfdfd4c876fc66444eb98bceb5d6db76de"}, + {file = "lxml-6.1.0-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:07f98f5496f96bf724b1e3c933c107f0cbf2745db18c03d2e13a291c3afd2635"}, + {file = "lxml-6.1.0-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4642e04449a1e164b5ff71ffd901ddb772dfabf5c9adf1b7be5dffe1212bc037"}, + {file = "lxml-6.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7da13bb6fbadfafb474e0226a30570a3445cfd47c86296f2446dafbd77079ace"}, + {file = "lxml-6.1.0.tar.gz", hash = "sha256:bfd57d8008c4965709a919c3e9a98f76c2c7cb319086b3d26858250620023b13"}, ] [package.extras] @@ -2357,15 +2466,15 @@ htmlsoup = ["BeautifulSoup4"] [[package]] name = "mako" -version = "1.3.10" +version = "1.3.12" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59"}, - {file = "mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28"}, + {file = "mako-1.3.12-py3-none-any.whl", hash = "sha256:8f61569480282dbf557145ce441e4ba888be453c30989f879f0d652e39f53ea9"}, + {file = "mako-1.3.12.tar.gz", hash = "sha256:9f778e93289bd410bb35daadeb4fc66d95a746f0b75777b942088b7fd7af550a"}, ] [package.dependencies] @@ -2575,6 +2684,21 @@ install-types = ["pip"] mypyc = ["setuptools (>=50)"] reports = ["lxml"] +[[package]] +name = "mypy-boto3-s3" +version = "1.42.94" +description = "Type annotations for boto3 S3 1.42.94 service generated with mypy-boto3-builder 8.12.0" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mypy_boto3_s3-1.42.94-py3-none-any.whl", hash = "sha256:d7c2111396d7ae344b241958b7df8ed33d478d746721202715f5c197f9cd0c83"}, + {file = "mypy_boto3_s3-1.42.94.tar.gz", hash = "sha256:1d92d722cf00573b8111e98493ab386e0c1b59a1530b7fee4af77f2d9a1c477d"}, +] + +[package.dependencies] +typing-extensions = {version = "*", markers = "python_version < \"3.12\""} + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -2666,14 +2790,14 @@ simplejson = "*" [[package]] name = "packaging" -version = "26.0" +version = "26.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"}, - {file = "packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4"}, + {file = "packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e"}, + {file = "packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661"}, ] [[package]] @@ -2793,15 +2917,15 @@ types-pytz = ">=2022.1.1" [[package]] name = "parse" -version = "1.21.0" +version = "1.21.1" description = "parse() is the opposite of format()" optional = true python-versions = "*" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "parse-1.21.0-py2.py3-none-any.whl", hash = "sha256:6d81f7bae0ab25fd72818375c4a9c71c8705256bfc42e8725be609cf8b904aed"}, - {file = "parse-1.21.0.tar.gz", hash = "sha256:937725d51330ffec9c7a26fdb5623baa135d8ba8ed78817ea9523538844e3ce4"}, + {file = "parse-1.21.1-py2.py3-none-any.whl", hash = "sha256:55339ca698019815df3b8e8b550e5933933527e623b0cdf1ca2f404da35ffb47"}, + {file = "parse-1.21.1.tar.gz", hash = "sha256:825e1a88e9d9fb481b8d2ca709c6195558b6eaa97c559ad3a9a20aa2d12815a3"}, ] [[package]] @@ -2852,14 +2976,14 @@ ptyprocess = ">=0.5" [[package]] name = "platformdirs" -version = "4.7.1" +version = "4.9.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "platformdirs-4.7.1-py3-none-any.whl", hash = "sha256:06ac79ae0c5025949f62711e3f7cd178736515a29bcc669f42a216016cd1dc7a"}, - {file = "platformdirs-4.7.1.tar.gz", hash = "sha256:6f4ff8472e482af4b7e67a183fbe63da846a9b34f57d5019c4d112a181003d82"}, + {file = "platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917"}, + {file = "platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a"}, ] [[package]] @@ -2892,14 +3016,14 @@ files = [ [[package]] name = "pre-commit" -version = "4.5.1" +version = "4.6.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77"}, - {file = "pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61"}, + {file = "pre_commit-4.6.0-py2.py3-none-any.whl", hash = "sha256:e2cf246f7299edcabcf15f9b0571fdce06058527f0a06535068a86d38089f29b"}, + {file = "pre_commit-4.6.0.tar.gz", hash = "sha256:718d2208cef53fdc38206e40524a6d4d9576d103eb16f0fec11c875e7716e9d9"}, ] [package.dependencies] @@ -2963,14 +3087,14 @@ test = ["psleak", "pytest", "pytest-instafail", "pytest-xdist", "pywin32 ; os_na [[package]] name = "psycopg" -version = "3.3.2" +version = "3.3.3" description = "PostgreSQL database adapter for Python" optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "psycopg-3.3.2-py3-none-any.whl", hash = "sha256:3e94bc5f4690247d734599af56e51bae8e0db8e4311ea413f801fef82b14a99b"}, - {file = "psycopg-3.3.2.tar.gz", hash = "sha256:707a67975ee214d200511177a6a80e56e654754c9afca06a7194ea6bbfde9ca7"}, + {file = "psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698"}, + {file = "psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9"}, ] [package.dependencies] @@ -2978,29 +3102,29 @@ typing-extensions = {version = ">=4.6", markers = "python_version < \"3.13\""} tzdata = {version = "*", markers = "sys_platform == \"win32\""} [package.extras] -binary = ["psycopg-binary (==3.3.2) ; implementation_name != \"pypy\""] -c = ["psycopg-c (==3.3.2) ; implementation_name != \"pypy\""] -dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "cython-lint (>=0.16)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.19.0)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] +binary = ["psycopg-binary (==3.3.3) ; implementation_name != \"pypy\""] +c = ["psycopg-c (==3.3.3) ; implementation_name != \"pypy\""] +dev = ["ast-comments (>=1.1.2)", "black (>=26.1.0)", "codespell (>=2.2)", "cython-lint (>=0.16)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.19.0)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] docs = ["Sphinx (>=5.0)", "furo (==2022.6.21)", "sphinx-autobuild (>=2021.3.14)", "sphinx-autodoc-typehints (>=1.12)"] pool = ["psycopg-pool"] test = ["anyio (>=4.0)", "mypy (>=1.19.0) ; implementation_name != \"pypy\"", "pproxy (>=2.7)", "pytest (>=6.2.5)", "pytest-cov (>=3.0)", "pytest-randomly (>=3.5)"] [[package]] name = "psycopg2" -version = "2.9.11" +version = "2.9.12" description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "psycopg2-2.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:103e857f46bb76908768ead4e2d0ba1d1a130e7b8ed77d3ae91e8b33481813e8"}, - {file = "psycopg2-2.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:210daed32e18f35e3140a1ebe059ac29209dd96468f2f7559aa59f75ee82a5cb"}, - {file = "psycopg2-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:e03e4a6dbe87ff81540b434f2e5dc2bddad10296db5eea7bdc995bf5f4162938"}, - {file = "psycopg2-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:8dc379166b5b7d5ea66dcebf433011dfc51a7bb8a5fc12367fa05668e5fc53c8"}, - {file = "psycopg2-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:f10a48acba5fe6e312b891f290b4d2ca595fc9a06850fe53320beac353575578"}, - {file = "psycopg2-2.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:6ecddcf573777536bddfefaea8079ce959287798c8f5804bee6933635d538924"}, - {file = "psycopg2-2.9.11.tar.gz", hash = "sha256:964d31caf728e217c697ff77ea69c2ba0865fa41ec20bb00f0977e62fdcc52e3"}, + {file = "psycopg2-2.9.12-cp310-cp310-win_amd64.whl", hash = "sha256:d5fbe092315fb007c03544704e6d1e678a6c0378139d01cea433dc59edf041b4"}, + {file = "psycopg2-2.9.12-cp311-cp311-win_amd64.whl", hash = "sha256:2532c0cdc6ad18c9c35cd935cc3159712e14f05276a6d29a6435c52d24b840c1"}, + {file = "psycopg2-2.9.12-cp312-cp312-win_amd64.whl", hash = "sha256:83d48e66e18c301d832e93c984a7bcbc0f4ac3bb79e2137e3bc335978c756dc0"}, + {file = "psycopg2-2.9.12-cp313-cp313-win_amd64.whl", hash = "sha256:3d23e684927d37b95cee9a943f6927b04ae2fdcd056fd0e2a30929ee89fee5a9"}, + {file = "psycopg2-2.9.12-cp314-cp314-win_amd64.whl", hash = "sha256:a73d5513bfe929c56555006c7a9cc7ae6e4276aa99dd2b1e2544eb8bb54f8b23"}, + {file = "psycopg2-2.9.12-cp39-cp39-win_amd64.whl", hash = "sha256:09826a6b89714626a662275d03f21639f1c68d183e2dcc9ba134d463a3da753e"}, + {file = "psycopg2-2.9.12.tar.gz", hash = "sha256:1dedb1c7a1d8552c4a6044c6b1c41a52e6a8e2d144af83eccac758076b1b7c15"}, ] [[package]] @@ -3034,15 +3158,15 @@ tests = ["pytest"] [[package]] name = "pyasn1" -version = "0.6.2" +version = "0.6.3" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf"}, - {file = "pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b"}, + {file = "pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde"}, + {file = "pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf"}, ] [[package]] @@ -3220,15 +3344,15 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" [[package]] name = "pygments" -version = "2.19.2" +version = "2.20.0" description = "Pygments is a syntax highlighting package written in Python." optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, - {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, + {file = "pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176"}, + {file = "pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f"}, ] [package.extras] @@ -3264,50 +3388,56 @@ dev = ["build", "flake8", "mypy", "pytest", "twine"] [[package]] name = "pysam" -version = "0.23.3" +version = "0.24.0" description = "Package for reading, manipulating, and writing genomic data" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "pysam-0.23.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a0b99d875f293fad0bd9c9c923e8910c03af62d291ebb7d20e69ceaf39e383d4"}, - {file = "pysam-0.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:725a32970cf4ce322f4ab2a52b755163297027a0349f0d151537fe16bdf525e5"}, - {file = "pysam-0.23.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5fd54146c0a5a41e37b67212e3b9b0c123b73d1dd2ba58082d21dc2236c1b290"}, - {file = "pysam-0.23.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a7d6b3dcbf4756bd178e217fa391187edc5793f8f50c3034e585d1e4d282d29b"}, - {file = "pysam-0.23.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bc391a099ca74a1134a1cf71812c8ddf9934ab9d6675f3a97fe299466f227a1f"}, - {file = "pysam-0.23.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d98ce73c07926d400c684773ce2521f03f78247a3dd6968c8206ba31b077b503"}, - {file = "pysam-0.23.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cb4c9c4eb245d643b60c7ec750d5554ebf17c6c9646f4f54439f94a3b3de15de"}, - {file = "pysam-0.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3449070e0bbe716f9eccd3911d2482476478fbad63f739378d0203f470a446d6"}, - {file = "pysam-0.23.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c6cb7069dcecca3d40bbe4a6d5adea5cafe483c11854892dbabd6e10e5776049"}, - {file = "pysam-0.23.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a7e9c835126f94ff57199e2f58e61436e12e84d47077e70aac8aa03531c4cc71"}, - {file = "pysam-0.23.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9b249367a4ad100e61afac9156bde6183c6119f2612bbd5d97ebe3153c643aed"}, - {file = "pysam-0.23.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a720cc0818aa84aca5ee4ef884fda82367598e77ec0c95d2050f670fb1fd0db5"}, - {file = "pysam-0.23.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69f90c0867fe43f04004bcea963f6b2e68b39180afab54bf551f61f43856638b"}, - {file = "pysam-0.23.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2310d72bfae7a0980d414156267e25b57aa221a768c11c087f3f7d00ceb9fed4"}, - {file = "pysam-0.23.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b2e45983efea190d374fcda0b6e0c835d6e9e474e02694729f3b3a14d680fa62"}, - {file = "pysam-0.23.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4099393fc5097b5081c7efaf46b0109e4f0a8ed18f86d497219a8bf739c73992"}, - {file = "pysam-0.23.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4f04b9aa9b23d767fe36652eacb8370791e3b56816a7e50553d52c65ccdce77f"}, - {file = "pysam-0.23.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:701843e5dc67c8eb217c3265039c699a5f83cce64fbc4225268141796e972353"}, - {file = "pysam-0.23.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2d3177c5b3e102bde297f86e079d23fa385ac88f16c4252502079ef368056d55"}, - {file = "pysam-0.23.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2b6f6891684213e89ee679c5ac786b4e845e7d39d24f6ea0e4d8ed8be9c34f48"}, - {file = "pysam-0.23.3-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:735b938b809f0dc19a389cf3cee04fe7a451e21e2b20d3e45fa6bc23016ae21d"}, - {file = "pysam-0.23.3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b721ae4c9118e0c27e1500be278c3b62022c886eeb913ecabc0463fdf98da38f"}, - {file = "pysam-0.23.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:915bd2883eed08b16a41964a33923818e67166ca69a51086598d27287df6bb4f"}, - {file = "pysam-0.23.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b80f1092ba290b738d6ed230cc58cc75ca815fda441afe76cb4c25639aec7ee7"}, - {file = "pysam-0.23.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9bf6281fc4709125f5089b5c8f83ffcb1b911c4aa9c601a0a4f62beb1de82413"}, - {file = "pysam-0.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:456fb5f1a22001cb237fcc5b2ec03960979e5e18a3171c8e0a0116e02d86f31a"}, - {file = "pysam-0.23.3-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:7565c85fc636d75029ef4e133461c513a848c2d0ecd0489571f4fde1efa22d3b"}, - {file = "pysam-0.23.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:ad3cf30c6a48f3e2751a0b78d36c47cd4b272249cb6428be655b46473676d8f9"}, - {file = "pysam-0.23.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:15945db1483fef9760f32cfa112af3c3b7d50d586edfaf245edce52b99bb5c25"}, - {file = "pysam-0.23.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:013738cca990e235c56a7200ccfa9f105d7144ef34c2683c1ae8086ee030238b"}, - {file = "pysam-0.23.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:83f6f22995fa9b89b619f0d932a6714108d0dd1536fff684d3e02257c3f59b3a"}, - {file = "pysam-0.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ecf7cbc3d15c84cbc14a6c00af0f866b8f5e6b8ea3d2a496f18ad87adf55bcc5"}, - {file = "pysam-0.23.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:be2283f2ff15346d6ac10ba3b4370359ac3c1afc34b99bb0f2f39e715749cb8b"}, - {file = "pysam-0.23.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:513fa67af426e9e01f82653654e384d7774d81876d7dc3020ad7f72aa1d9c309"}, - {file = "pysam-0.23.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fd35287d2f8d243d6e54746e8cd5df3eb6239b016e51e20bbca1a2b6ef5899df"}, - {file = "pysam-0.23.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7ddbf573f0d3c650a03f2dcb4cdce50d536d380dbbc692f434b1cfa0cd7da4d2"}, - {file = "pysam-0.23.3.tar.gz", hash = "sha256:9ebcb1f004b296fd139b103ec6fd7e415e80f89f194eb7d0d972ac6d11bbaf24"}, + {file = "pysam-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d06a94e2886cb8affba77bc4fe337513f29387113d708d14609765aa8bb143d3"}, + {file = "pysam-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4d3260795e066f659ea94e6b7cac07727ef2877b874f69029b6a2e48551d72bd"}, + {file = "pysam-0.24.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b299f6128601356e25ca94040eef9dadb1e196aa2f36736c93948aaa821c4bc"}, + {file = "pysam-0.24.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a40440b16bbf67b696914a10acc241a0cd0fa02dcb14aee8e614eb2c5d8b37"}, + {file = "pysam-0.24.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8e4f5f1ebf3bdcad4c3b8f3a656ec2afdc44f0efc96ce8523858556e95d8ca9a"}, + {file = "pysam-0.24.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fcce1edc48a9d8610958296c0aaf3e28723d143113bcd9b7c13b5cd821300d2a"}, + {file = "pysam-0.24.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:48524b2132158ffeca7ca21c52490e92dca6687446faad95522db2354e376641"}, + {file = "pysam-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:58e347406cec254bd68ac0f50da1e772273c3c891f65eab4b54641f2a89e8644"}, + {file = "pysam-0.24.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed1f202fe27f2bd677b3d210616890a8cbf043043ea89404ada3c121a5e9a20e"}, + {file = "pysam-0.24.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8de0b19109da0c4aa8e44c6fef2fae97f0e616f3cdf5be0f1b74f7946b8de262"}, + {file = "pysam-0.24.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d528fc09a44c85b7d4018ba8be543e1cc2e7a4d06eeb779a28ac34947e182f5a"}, + {file = "pysam-0.24.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e9f20388c80dd0e4c09dfd7fe38adc9fdf771ec665e071e1b7350dc8da2d692"}, + {file = "pysam-0.24.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:38d5cc5dff4bdaceabbb58c0700c41b132aacf783432b1d16060b46ac7d866e2"}, + {file = "pysam-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4da222cf6887b272c09351381efa20bfc96e0a47a944a822785fc5b6bc8b8c9d"}, + {file = "pysam-0.24.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:983cae779c49575de583b29fe3d0d66c18034f9c97ee4481dab7618faadcd44c"}, + {file = "pysam-0.24.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4a642f18649e59817de272173e9c27c031dceaca199809e4f8b338ebfc5d6698"}, + {file = "pysam-0.24.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:52d2e2328ac5698cbfa34232eb17933c7835b8f61aa69f75522c7ad1ec00f4d1"}, + {file = "pysam-0.24.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc3302c2c0c0f920040d8d38cf66ccac435aebafcccf7edf2018faeaceed39ac"}, + {file = "pysam-0.24.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c30f746060a8944adaf5004652b4a25b2460a07360fb01be2b6b38f883fae8e7"}, + {file = "pysam-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b9445a4c3be5ed4b60202690af3890a444452276372e3abb58564308cc6d5a45"}, + {file = "pysam-0.24.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:37f82c603837f708cc46a29bd5819d35dbb061c90609c1088701ed3bfbce2f70"}, + {file = "pysam-0.24.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1448b3100f79bd00830c586a256ea21b1f701d937f5ad4318adffdb5b2933478"}, + {file = "pysam-0.24.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:aeba818c56d625de5977119788f6bb1d85280b98d5f1fbab965bd5ba6aeddf7a"}, + {file = "pysam-0.24.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0cd378640c237a99a10e4658f2bb0ba368eea4463c31955064d35b842e4224a3"}, + {file = "pysam-0.24.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9591893006458ce834e861dab1603db7047bff0ddf201b354fcd3dbd50ded899"}, + {file = "pysam-0.24.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f6f95d44efb4ee1dcfd18fcd11b26ba00bc1afaba9128a1b56c4c8887208fb07"}, + {file = "pysam-0.24.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ee1af44c630f5e97d4c1bca708cd42e2f5b036b126bea374c3442c832c6ac43"}, + {file = "pysam-0.24.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6617456dbafd38216bdbfeccb2ac4cdd9dcca024c6e362076e6bbf32a54ec58d"}, + {file = "pysam-0.24.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7572b2c40c449286e22b8ca0c9fa1edaf67c663ac77178af728c4bc9927b7f1f"}, + {file = "pysam-0.24.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6bb3ba818e1075c849302301b84c674c3994bc53a40a71213a01058ef9ffa284"}, + {file = "pysam-0.24.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:449820696b87096cde28b4c8419c09757d1c4cdf449f002c9c2dc5765276374d"}, + {file = "pysam-0.24.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2eb95488430b5dff240b30947cbd0860e2381efdaeecdfbf712bac4c3b7a9f1b"}, + {file = "pysam-0.24.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ff176a7c6b97cdb2fa450e65c2ab42417e1a6cfeb5f9929b77519c8e32f67a2"}, + {file = "pysam-0.24.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf06bfb83391acec7ba10e9339f444d143c36082a79cbdf51b414a6d4c6a4fd"}, + {file = "pysam-0.24.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:cb5e2c12ce3ad222b5ee30aa1b8604047810f0ff2d4472076522b9e475e82f25"}, + {file = "pysam-0.24.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cf9be8762730cb0c59aab4ac58f701589050462f98fd8c5ef9b87ef2984c3406"}, + {file = "pysam-0.24.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b3a54ee482fd5402e98053ed97cf2c3c92ef6ee224c4ec2febdccac1c5229de"}, + {file = "pysam-0.24.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42f951a055e0337efe7a07594df48e615ab8629c9f5baefc4c55d37b127319d6"}, + {file = "pysam-0.24.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1aa91839fa4453fa978d233edf296bd70b8a003f9f829c35e37a7ac261fcf7c8"}, + {file = "pysam-0.24.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:294c6ec904cdf97ef826c43156d4f555f1d6ba55de2a6d2854564600e74638b0"}, + {file = "pysam-0.24.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:741cb2eb7145208014519754264a55d0cdca5387f0d2d7d9a96074d561d8e75f"}, + {file = "pysam-0.24.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:692a759d7798318a9e64c49bde6541c1212b30133c88f14c22c15127dc163f1a"}, + {file = "pysam-0.24.0.tar.gz", hash = "sha256:db0f86c15532ef5dad263748324f45d9a639668e3497d8cabce54ef47a1a78d9"}, ] [[package]] @@ -3372,22 +3502,22 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] [[package]] name = "pytest-postgresql" -version = "5.0.0" +version = "7.0.2" description = "Postgresql fixtures and fixture factories for Pytest." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "pytest-postgresql-5.0.0.tar.gz", hash = "sha256:22edcbafab8995ee85b8d948ddfaad4f70c2c7462303d7477ecd2f77fc9d15bd"}, - {file = "pytest_postgresql-5.0.0-py3-none-any.whl", hash = "sha256:6e8f0773b57c9b8975b6392c241b7b81b7018f32079a533f368f2fbda732ecd3"}, + {file = "pytest_postgresql-7.0.2-py3-none-any.whl", hash = "sha256:0b0d31c51620a9c1d6be93286af354256bc58a47c379f56f4147b22da6e81fb5"}, + {file = "pytest_postgresql-7.0.2.tar.gz", hash = "sha256:57c8d3f7d4e91d0ea8b2eac786d04f60080fa6ed6e66f1f94d747c71c9e5a4f4"}, ] [package.dependencies] -mirakuru = "*" -port-for = ">=0.6.0" +mirakuru = ">=2.6.0" +packaging = "*" +port-for = ">=0.7.3" psycopg = ">=3.0.0" -pytest = ">=6.2" -setuptools = "*" +pytest = ">=7.2" [[package]] name = "pytest-socket" @@ -3419,6 +3549,26 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-discovery" +version = "1.2.2" +description = "Python interpreter discovery" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "python_discovery-1.2.2-py3-none-any.whl", hash = "sha256:e1ae95d9af875e78f15e19aed0c6137ab1bb49c200f21f5061786490c9585c7a"}, + {file = "python_discovery-1.2.2.tar.gz", hash = "sha256:876e9c57139eb757cb5878cbdd9ae5379e5d96266c99ef731119e04fffe533bb"}, +] + +[package.dependencies] +filelock = ">=3.15.4" +platformdirs = ">=4.3.6,<5" + +[package.extras] +docs = ["furo (>=2025.12.19)", "sphinx (>=9.1)", "sphinx-autodoc-typehints (>=3.6.3)", "sphinxcontrib-mermaid (>=2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.5.4)", "pytest (>=8.3.5)", "pytest-mock (>=3.14)", "setuptools (>=75.1)"] + [[package]] name = "python-dotenv" version = "0.20.0" @@ -3473,27 +3623,27 @@ files = [ [[package]] name = "python-multipart" -version = "0.0.22" +version = "0.0.27" description = "A streaming multipart parser for Python" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155"}, - {file = "python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58"}, + {file = "python_multipart-0.0.27-py3-none-any.whl", hash = "sha256:6fccfad17a27334bd0193681b369f476eda3409f17381a2d65aa7df3f7275645"}, + {file = "python_multipart-0.0.27.tar.gz", hash = "sha256:9870a6a8c5a20a5bf4f07c017bd1489006ff8836cff097b6933355ee2b49b602"}, ] [[package]] name = "pytz" -version = "2025.2" +version = "2026.1.post1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, - {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, + {file = "pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a"}, + {file = "pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1"}, ] [[package]] @@ -3852,7 +4002,7 @@ version = "81.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main"] files = [ {file = "setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6"}, {file = "setuptools-81.0.0.tar.gz", hash = "sha256:487b53915f52501f0a79ccfd0c02c165ffe06631443a886740b91af4b7a5845a"}, @@ -3869,123 +4019,109 @@ type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.deve [[package]] name = "simplejson" -version = "3.20.2" +version = "4.1.1" description = "Simple, fast, extensible JSON encoder/decoder for Python" optional = true -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.5" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=2.7" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "simplejson-3.20.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:11847093fd36e3f5a4f595ff0506286c54885f8ad2d921dfb64a85bce67f72c4"}, - {file = "simplejson-3.20.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:4d291911d23b1ab8eb3241204dd54e3ec60ddcd74dfcb576939d3df327205865"}, - {file = "simplejson-3.20.2-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:da6d16d7108d366bbbf1c1f3274662294859c03266e80dd899fc432598115ea4"}, - {file = "simplejson-3.20.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9ddf9a07694c5bbb4856271cbc4247cc6cf48f224a7d128a280482a2f78bae3d"}, - {file = "simplejson-3.20.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:3a0d2337e490e6ab42d65a082e69473717f5cc75c3c3fb530504d3681c4cb40c"}, - {file = "simplejson-3.20.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8ba88696351ed26a8648f8378a1431223f02438f8036f006d23b4f5b572778fa"}, - {file = "simplejson-3.20.2-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:00bcd408a4430af99d1f8b2b103bb2f5133bb688596a511fcfa7db865fbb845e"}, - {file = "simplejson-3.20.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4fc62feb76f590ccaff6f903f52a01c58ba6423171aa117b96508afda9c210f0"}, - {file = "simplejson-3.20.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6d7286dc11af60a2f76eafb0c2acde2d997e87890e37e24590bb513bec9f1bc5"}, - {file = "simplejson-3.20.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c01379b4861c3b0aa40cba8d44f2b448f5743999aa68aaa5d3ef7049d4a28a2d"}, - {file = "simplejson-3.20.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16b029ca25645b3bc44e84a4f941efa51bf93c180b31bd704ce6349d1fc77c1"}, - {file = "simplejson-3.20.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e22a5fb7b1437ffb057e02e1936a3bfb19084ae9d221ec5e9f4cf85f69946b6"}, - {file = "simplejson-3.20.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b6ff02fc7b8555c906c24735908854819b0d0dc85883d453e23ca4c0445d01"}, - {file = "simplejson-3.20.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2bfc1c396ad972ba4431130b42307b2321dba14d988580c1ac421ec6a6b7cee3"}, - {file = "simplejson-3.20.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a97249ee1aee005d891b5a211faf58092a309f3d9d440bc269043b08f662eda"}, - {file = "simplejson-3.20.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f1036be00b5edaddbddbb89c0f80ed229714a941cfd21e51386dc69c237201c2"}, - {file = "simplejson-3.20.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5d6f5bacb8cdee64946b45f2680afa3f54cd38e62471ceda89f777693aeca4e4"}, - {file = "simplejson-3.20.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8db6841fb796ec5af632f677abf21c6425a1ebea0d9ac3ef1a340b8dc69f52b8"}, - {file = "simplejson-3.20.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c0a341f7cc2aae82ee2b31f8a827fd2e51d09626f8b3accc441a6907c88aedb7"}, - {file = "simplejson-3.20.2-cp310-cp310-win32.whl", hash = "sha256:27f9c01a6bc581d32ab026f515226864576da05ef322d7fc141cd8a15a95ce53"}, - {file = "simplejson-3.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:c0a63ec98a4547ff366871bf832a7367ee43d047bcec0b07b66c794e2137b476"}, - {file = "simplejson-3.20.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:06190b33cd7849efc413a5738d3da00b90e4a5382fd3d584c841ac20fb828c6f"}, - {file = "simplejson-3.20.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4ad4eac7d858947a30d2c404e61f16b84d16be79eb6fb316341885bdde864fa8"}, - {file = "simplejson-3.20.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b392e11c6165d4a0fde41754a0e13e1d88a5ad782b245a973dd4b2bdb4e5076a"}, - {file = "simplejson-3.20.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51eccc4e353eed3c50e0ea2326173acdc05e58f0c110405920b989d481287e51"}, - {file = "simplejson-3.20.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:306e83d7c331ad833d2d43c76a67f476c4b80c4a13334f6e34bb110e6105b3bd"}, - {file = "simplejson-3.20.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f820a6ac2ef0bc338ae4963f4f82ccebdb0824fe9caf6d660670c578abe01013"}, - {file = "simplejson-3.20.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21e7a066528a5451433eb3418184f05682ea0493d14e9aae690499b7e1eb6b81"}, - {file = "simplejson-3.20.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:438680ddde57ea87161a4824e8de04387b328ad51cfdf1eaf723623a3014b7aa"}, - {file = "simplejson-3.20.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:cac78470ae68b8d8c41b6fca97f5bf8e024ca80d5878c7724e024540f5cdaadb"}, - {file = "simplejson-3.20.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7524e19c2da5ef281860a3d74668050c6986be15c9dd99966034ba47c68828c2"}, - {file = "simplejson-3.20.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e9b6d845a603b2eef3394eb5e21edb8626cd9ae9a8361d14e267eb969dbe413"}, - {file = "simplejson-3.20.2-cp311-cp311-win32.whl", hash = "sha256:47d8927e5ac927fdd34c99cc617938abb3624b06ff86e8e219740a86507eb961"}, - {file = "simplejson-3.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:ba4edf3be8e97e4713d06c3d302cba1ff5c49d16e9d24c209884ac1b8455520c"}, - {file = "simplejson-3.20.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4376d5acae0d1e91e78baeba4ee3cf22fbf6509d81539d01b94e0951d28ec2b6"}, - {file = "simplejson-3.20.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f8fe6de652fcddae6dec8f281cc1e77e4e8f3575249e1800090aab48f73b4259"}, - {file = "simplejson-3.20.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25ca2663d99328d51e5a138f22018e54c9162438d831e26cfc3458688616eca8"}, - {file = "simplejson-3.20.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12a6b2816b6cab6c3fd273d43b1948bc9acf708272074c8858f579c394f4cbc9"}, - {file = "simplejson-3.20.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac20dc3fcdfc7b8415bfc3d7d51beccd8695c3f4acb7f74e3a3b538e76672868"}, - {file = "simplejson-3.20.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db0804d04564e70862ef807f3e1ace2cc212ef0e22deb1b3d6f80c45e5882c6b"}, - {file = "simplejson-3.20.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:979ce23ea663895ae39106946ef3d78527822d918a136dbc77b9e2b7f006237e"}, - {file = "simplejson-3.20.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a2ba921b047bb029805726800819675249ef25d2f65fd0edb90639c5b1c3033c"}, - {file = "simplejson-3.20.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:12d3d4dc33770069b780cc8f5abef909fe4a3f071f18f55f6d896a370fd0f970"}, - {file = "simplejson-3.20.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:aff032a59a201b3683a34be1169e71ddda683d9c3b43b261599c12055349251e"}, - {file = "simplejson-3.20.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:30e590e133b06773f0dc9c3f82e567463df40598b660b5adf53eb1c488202544"}, - {file = "simplejson-3.20.2-cp312-cp312-win32.whl", hash = "sha256:8d7be7c99939cc58e7c5bcf6bb52a842a58e6c65e1e9cdd2a94b697b24cddb54"}, - {file = "simplejson-3.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:2c0b4a67e75b945489052af6590e7dca0ed473ead5d0f3aad61fa584afe814ab"}, - {file = "simplejson-3.20.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:90d311ba8fcd733a3677e0be21804827226a57144130ba01c3c6a325e887dd86"}, - {file = "simplejson-3.20.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:feed6806f614bdf7f5cb6d0123cb0c1c5f40407ef103aa935cffaa694e2e0c74"}, - {file = "simplejson-3.20.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6b1d8d7c3e1a205c49e1aee6ba907dcb8ccea83651e6c3e2cb2062f1e52b0726"}, - {file = "simplejson-3.20.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552f55745044a24c3cb7ec67e54234be56d5d6d0e054f2e4cf4fb3e297429be5"}, - {file = "simplejson-3.20.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2da97ac65165d66b0570c9e545786f0ac7b5de5854d3711a16cacbcaa8c472d"}, - {file = "simplejson-3.20.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f59a12966daa356bf68927fca5a67bebac0033cd18b96de9c2d426cd11756cd0"}, - {file = "simplejson-3.20.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:133ae2098a8e162c71da97cdab1f383afdd91373b7ff5fe65169b04167da976b"}, - {file = "simplejson-3.20.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7977640af7b7d5e6a852d26622057d428706a550f7f5083e7c4dd010a84d941f"}, - {file = "simplejson-3.20.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b530ad6d55e71fa9e93e1109cf8182f427a6355848a4ffa09f69cc44e1512522"}, - {file = "simplejson-3.20.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bd96a7d981bf64f0e42345584768da4435c05b24fd3c364663f5fbc8fabf82e3"}, - {file = "simplejson-3.20.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f28ee755fadb426ba2e464d6fcf25d3f152a05eb6b38e0b4f790352f5540c769"}, - {file = "simplejson-3.20.2-cp313-cp313-win32.whl", hash = "sha256:472785b52e48e3eed9b78b95e26a256f59bb1ee38339be3075dad799e2e1e661"}, - {file = "simplejson-3.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:a1a85013eb33e4820286139540accbe2c98d2da894b2dcefd280209db508e608"}, - {file = "simplejson-3.20.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a135941a50795c934bdc9acc74e172b126e3694fe26de3c0c1bc0b33ea17e6ce"}, - {file = "simplejson-3.20.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25ba488decb18738f5d6bd082018409689ed8e74bc6c4d33a0b81af6edf1c9f4"}, - {file = "simplejson-3.20.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d81f8e982923d5e9841622ff6568be89756428f98a82c16e4158ac32b92a3787"}, - {file = "simplejson-3.20.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cdad497ccb1edc5020bef209e9c3e062a923e8e6fca5b8a39f0fb34380c8a66c"}, - {file = "simplejson-3.20.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a3f1db97bcd9fb592928159af7a405b18df7e847cbcc5682a209c5b2ad5d6b1"}, - {file = "simplejson-3.20.2-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:215b65b0dc2c432ab79c430aa4f1e595f37b07a83c1e4c4928d7e22e6b49a748"}, - {file = "simplejson-3.20.2-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:ece4863171ba53f086a3bfd87f02ec3d6abc586f413babfc6cf4de4d84894620"}, - {file = "simplejson-3.20.2-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:4a76d7c47d959afe6c41c88005f3041f583a4b9a1783cf341887a3628a77baa0"}, - {file = "simplejson-3.20.2-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:e9b0523582a57d9ea74f83ecefdffe18b2b0a907df1a9cef06955883341930d8"}, - {file = "simplejson-3.20.2-cp36-cp36m-win32.whl", hash = "sha256:16366591c8e08a4ac76b81d76a3fc97bf2bcc234c9c097b48d32ea6bfe2be2fe"}, - {file = "simplejson-3.20.2-cp36-cp36m-win_amd64.whl", hash = "sha256:732cf4c4ac1a258b4e9334e1e40a38303689f432497d3caeb491428b7547e782"}, - {file = "simplejson-3.20.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6c3a98e21e5f098e4f982ef302ebb1e681ff16a5d530cfce36296bea58fe2396"}, - {file = "simplejson-3.20.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10cf9ca1363dc3711c72f4ec7c1caed2bbd9aaa29a8d9122e31106022dc175c6"}, - {file = "simplejson-3.20.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:106762f8aedf3fc3364649bfe8dc9a40bf5104f872a4d2d86bae001b1af30d30"}, - {file = "simplejson-3.20.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b21659898b7496322e99674739193f81052e588afa8b31b6a1c7733d8829b925"}, - {file = "simplejson-3.20.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78fa1db6a02bca88829f2b2057c76a1d2dc2fccb8c5ff1199e352f213e9ec719"}, - {file = "simplejson-3.20.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:156139d94b660448ec8a4ea89f77ec476597f752c2ff66432d3656704c66b40e"}, - {file = "simplejson-3.20.2-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:b2620ac40be04dff08854baf6f4df10272f67079f61ed1b6274c0e840f2e2ae1"}, - {file = "simplejson-3.20.2-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:9ccef5b5d3e3ac5d9da0a0ca1d2de8cf2b0fb56b06aa0ab79325fa4bcc5a1d60"}, - {file = "simplejson-3.20.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:f526304c2cc9fd8b8d18afacb75bc171650f83a7097b2c92ad6a431b5d7c1b72"}, - {file = "simplejson-3.20.2-cp37-cp37m-win32.whl", hash = "sha256:e0f661105398121dd48d9987a2a8f7825b8297b3b2a7fe5b0d247370396119d5"}, - {file = "simplejson-3.20.2-cp37-cp37m-win_amd64.whl", hash = "sha256:dab98625b3d6821e77ea59c4d0e71059f8063825a0885b50ed410e5c8bd5cb66"}, - {file = "simplejson-3.20.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b8205f113082e7d8f667d6cd37d019a7ee5ef30b48463f9de48e1853726c6127"}, - {file = "simplejson-3.20.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fc8da64929ef0ff16448b602394a76fd9968a39afff0692e5ab53669df1f047f"}, - {file = "simplejson-3.20.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfe704864b5fead4f21c8d448a89ee101c9b0fc92a5f40b674111da9272b3a90"}, - {file = "simplejson-3.20.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40ca7cbe7d2f423b97ed4e70989ef357f027a7e487606628c11b79667639dc84"}, - {file = "simplejson-3.20.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cec1868b237fe9fb2d466d6ce0c7b772e005aadeeda582d867f6f1ec9710cad"}, - {file = "simplejson-3.20.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:792debfba68d8dd61085ffb332d72b9f5b38269cda0c99f92c7a054382f55246"}, - {file = "simplejson-3.20.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e022b2c4c54cb4855e555f64aa3377e3e5ca912c372fa9e3edcc90ebbad93dce"}, - {file = "simplejson-3.20.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5de26f11d5aca575d3825dddc65f69fdcba18f6ca2b4db5cef16f41f969cef15"}, - {file = "simplejson-3.20.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:e2162b2a43614727ec3df75baeda8881ab129824aa1b49410d4b6c64f55a45b4"}, - {file = "simplejson-3.20.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e11a1d6b2f7e72ca546bdb4e6374b237ebae9220e764051b867111df83acbd13"}, - {file = "simplejson-3.20.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:daf7cd18fe99eb427fa6ddb6b437cfde65125a96dc27b93a8969b6fe90a1dbea"}, - {file = "simplejson-3.20.2-cp38-cp38-win32.whl", hash = "sha256:da795ea5f440052f4f497b496010e2c4e05940d449ea7b5c417794ec1be55d01"}, - {file = "simplejson-3.20.2-cp38-cp38-win_amd64.whl", hash = "sha256:6a4b5e7864f952fcce4244a70166797d7b8fd6069b4286d3e8403c14b88656b6"}, - {file = "simplejson-3.20.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b3bf76512ccb07d47944ebdca44c65b781612d38b9098566b4bb40f713fc4047"}, - {file = "simplejson-3.20.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:214e26acf2dfb9ff3314e65c4e168a6b125bced0e2d99a65ea7b0f169db1e562"}, - {file = "simplejson-3.20.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2fb1259ca9c385b0395bad59cdbf79535a5a84fb1988f339a49bfbc57455a35a"}, - {file = "simplejson-3.20.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c34e028a2ba8553a208ded1da5fa8501833875078c4c00a50dffc33622057881"}, - {file = "simplejson-3.20.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b538f9d9e503b0dd43af60496780cb50755e4d8e5b34e5647b887675c1ae9fee"}, - {file = "simplejson-3.20.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab998e416ded6c58f549a22b6a8847e75a9e1ef98eb9fbb2863e1f9e61a4105b"}, - {file = "simplejson-3.20.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a8f1c307edf5fbf0c6db3396c5d3471409c4a40c7a2a466fbc762f20d46601a"}, - {file = "simplejson-3.20.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5a7bbac80bdb82a44303f5630baee140aee208e5a4618e8b9fde3fc400a42671"}, - {file = "simplejson-3.20.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:5ef70ec8fe1569872e5a3e4720c1e1dcb823879a3c78bc02589eb88fab920b1f"}, - {file = "simplejson-3.20.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:cb11c09c99253a74c36925d461c86ea25f0140f3b98ff678322734ddc0f038d7"}, - {file = "simplejson-3.20.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:66f7c78c6ef776f8bd9afaad455e88b8197a51e95617bcc44b50dd974a7825ba"}, - {file = "simplejson-3.20.2-cp39-cp39-win32.whl", hash = "sha256:619ada86bfe3a5aa02b8222ca6bfc5aa3e1075c1fb5b3263d24ba579382df472"}, - {file = "simplejson-3.20.2-cp39-cp39-win_amd64.whl", hash = "sha256:44a6235e09ca5cc41aa5870a952489c06aa4aee3361ae46daa947d8398e57502"}, - {file = "simplejson-3.20.2-py3-none-any.whl", hash = "sha256:3b6bb7fb96efd673eac2e4235200bfffdc2353ad12c54117e1e4e2fc485ac017"}, - {file = "simplejson-3.20.2.tar.gz", hash = "sha256:5fe7a6ce14d1c300d80d08695b7f7e633de6cd72c80644021874d985b3393649"}, + {file = "simplejson-4.1.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:1ad45da7462afc3dc4a0fe374a40b62f816821b046297b4acc670e641e45cc8d"}, + {file = "simplejson-4.1.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:797ad726434bf23c47a1e51140b6d1afb35471984c8fd31db76ba375fc0a296d"}, + {file = "simplejson-4.1.1-cp27-cp27m-win32.whl", hash = "sha256:30944f06a9c0787a8c69d8295b3abc960161f08b5ce26d41be88d2bc5539ea3c"}, + {file = "simplejson-4.1.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e2778625bd6c839588373f07d7ade6fc0fb6b8365a146b84f27edfadb13ab597"}, + {file = "simplejson-4.1.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:21d1b82f5d58f776a184a42bdf5ce4a6d7c36191a917131c577b41019f6b7daf"}, + {file = "simplejson-4.1.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:3427a069695405658b2270989d9e6f03f39e34a24d1b8548b562abc282ee400b"}, + {file = "simplejson-4.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7f61eefab86235c800e7f4e37d977080ec424bb2bf0b74e95a2d17ecb48eac0a"}, + {file = "simplejson-4.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4484960512db9c8124bfa91e0d8a9f9c302338f1c5454e74c21d7d022df10f46"}, + {file = "simplejson-4.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b75c7ef874dbb350f41827cdf3cee23f5257bdcb0df46d4c01b34badb62dcfe8"}, + {file = "simplejson-4.1.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c7494c75b95171194f965ea609e97081837a26494d91dcc046ad27dd9c3503e2"}, + {file = "simplejson-4.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1778e09a6e4bb4ef304627915dc4a838569d9e6b737c787925b4e98244bbbc16"}, + {file = "simplejson-4.1.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:67e43e7c0555e10de6d83e1408035652fad28c983516e38c4e3a9a748c9af129"}, + {file = "simplejson-4.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:93bf6653420258372444de90194dab8de8ff13d74b5d4263a5fefbbe8b8d2060"}, + {file = "simplejson-4.1.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0662cfe0482c9796bd097213b27f006815bfdc9b671264c3c0b7fc0e72b71d00"}, + {file = "simplejson-4.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a9ab55d2459f6d0fdf9984a7a0fb0280dae12979f4fcc3171f5096a4fcf5fafe"}, + {file = "simplejson-4.1.1-cp310-cp310-win32.whl", hash = "sha256:dfb84ace97acbdf1916c5a675387493fc5a7f67c2e15d4a7687143f8c73024d4"}, + {file = "simplejson-4.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:8eb821ef27f688f59ed4a93b17a666a7ebacf8dd65fecaa2b3c531a3aea62eaf"}, + {file = "simplejson-4.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2867c64d92abd1992c15666fae198203093f593e43d6b81adf176bae530d493a"}, + {file = "simplejson-4.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c47c46e16c8ea9e4850061e6ed5aa2b9cd2074cb2274bfd9c138cba15ce7453"}, + {file = "simplejson-4.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e294e33dbf316a9bbdd4030d46503c9b0f19470ae7ad6af5bae6c426bc2e869f"}, + {file = "simplejson-4.1.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7ce252b28fddbdd83db5bd7d93dad2a8a591d7ada098afec9c1b23d6b722a7a4"}, + {file = "simplejson-4.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c44ef6b02a4eb67ed17a72342341792149b3ff46f15426c26e970e49addf327"}, + {file = "simplejson-4.1.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82bfca2b85a34178c25829c703f0a9e9f113a5af7539285bd3efb583a0bf1ba3"}, + {file = "simplejson-4.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0e4b23f71dd781f8830f1663dc01a4944d3dbf87a1f93d78fba1cf64722d0ccf"}, + {file = "simplejson-4.1.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:82fee635d7b73ad801030b05a75fbd34a098da0c2ecf600667a03636d09e1e42"}, + {file = "simplejson-4.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:68e62eda21192c5ea9bb92d571ca46a4477fef48762f50d433de2b4253051551"}, + {file = "simplejson-4.1.1-cp311-cp311-win32.whl", hash = "sha256:ffd3d82294b47f5ec64050021ace95fd62628a0c1cc8bbf4d06d2d1fb697e055"}, + {file = "simplejson-4.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:78a3fe0995be42bed62a26aa78e0e0b4d87c6545785346b9cc898f3389569a35"}, + {file = "simplejson-4.1.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:19040a17154dc03d289bab68d73ce0a6a0be01de30c584bbdd93490bead14b22"}, + {file = "simplejson-4.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a94ebaecdbaa80d9551a3ec6bf0c9302fc8b53ab6c1b2bfd498a1df4cb28158d"}, + {file = "simplejson-4.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:67341c95c0a168ab4a6d1e807e50463f1c8da932c3286d81e201266c427061fa"}, + {file = "simplejson-4.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:45ec18e337fec538b7e902d489505c450b2454653d1290f3f50385e6fd8aa607"}, + {file = "simplejson-4.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:820c69a4710400e9b248d5670647d60be58824369282d3925e516b3ff1a7cd82"}, + {file = "simplejson-4.1.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e708d373a10e4378ef2d59f8361850c7150fd907ed49efe49bc5492160476d1"}, + {file = "simplejson-4.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:980fc33353f81fd12d8c49d44f8c2760d1dc8192285e627c5180d141035b228a"}, + {file = "simplejson-4.1.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:de2ed102fff88dacf543699f53ee3a533cc11539a39baa176b7e09dd783069d6"}, + {file = "simplejson-4.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2785ff8edc0e28bf773a32543a6bbed46351453c997b3f6709c744e3c2f7eabb"}, + {file = "simplejson-4.1.1-cp312-cp312-win32.whl", hash = "sha256:2e0d5ead6d14610467ec356ec1f6b5d8a56aa216abaad8d41c8b873b16cf313f"}, + {file = "simplejson-4.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:63a5451f557d6be48a231bae932458655c620902b868170b2f1c8afed496f6b4"}, + {file = "simplejson-4.1.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dff52fc7af272e84fc21cc5a06c927c823ca6ae00af14f3b0d7707b42775ed98"}, + {file = "simplejson-4.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:971aed0647ad6e840a3943bec812fcda5f2d26a5497a4981d1fb49aa4f9a396c"}, + {file = "simplejson-4.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:249e2e220aa6d9b9d936bde84eb7bf79d5b6c5a8273c6e411f8b1635a9073f2d"}, + {file = "simplejson-4.1.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e5cdd6a5d52299f345c15ab5678cc4249e24f383f361d986afbc3c7072a6b6b"}, + {file = "simplejson-4.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642cec364e0676e2d5a73fa4d31d0c7c55886997caa2fde24e8292ca44d32728"}, + {file = "simplejson-4.1.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:76fe296ca1df23d290033f10aaacf534fd1b3e3007e7f9ff8aa68b21413aaa78"}, + {file = "simplejson-4.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f0ad25b7dc4e0fb23858355819f2e994f1a5badcdcde8737eac7921c2f1ed2a"}, + {file = "simplejson-4.1.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a59ebd0533f03fd06ff0c42ba0f02d93cbcdd7944922bf3b93911327a95b901f"}, + {file = "simplejson-4.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bccbf4419676b517939852e5aeff2af6aee4dc046881c67a1581fa6f1cb01abd"}, + {file = "simplejson-4.1.1-cp313-cp313-win32.whl", hash = "sha256:6c845363eb5fd166fb7c72243da38f4fcfde666ede7fdf2cc6fd7762894626f7"}, + {file = "simplejson-4.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:104d8324c34f25b4b90800bc5fa363780cbc3d8496aef061cba7ce1af9162270"}, + {file = "simplejson-4.1.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:ed7473602b6625de793b6acba49aa949f144a475f538792067e4cf2fda2071f5"}, + {file = "simplejson-4.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:225c9caa324c5b554d009fb9cac22aee7711e71bd96f487938c659af467e828e"}, + {file = "simplejson-4.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:95407269340c7f22f09776ea7b717a52cf56cfcf119b5e45f66faa4a26445bea"}, + {file = "simplejson-4.1.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3851658d642c1184d2023f0e6c9ce44a21eb1629e74e7c84ef956b128841fe12"}, + {file = "simplejson-4.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:95a3bb0f78e85f4937f99092239f2011ce06f0f2d803df5c299cc05abbeae008"}, + {file = "simplejson-4.1.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bbfdaa7c0603f75b7b14b211b7f2be44696d4e26833ad2d91d5c87bf5fb9a920"}, + {file = "simplejson-4.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:39e3c584071dced8c21b4689f0254303521daeb9b5bc1f4289755d71fa3cb0d3"}, + {file = "simplejson-4.1.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:036a27bd0469b9d79557cbddb392969f876cd7f278cfbd0fba81534927a06575"}, + {file = "simplejson-4.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b70bfd2f67f3351baba08aa3ae9233c83f21fd95ae5e6b3d0ecb8c647929112f"}, + {file = "simplejson-4.1.1-cp314-cp314-win32.whl", hash = "sha256:37233c72ce88d06acb92747347742b3c07871eba6789f060c179c9302dde8efe"}, + {file = "simplejson-4.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:cc0442dea71cd9cbf30a0b8b9929ab5aa6c02c0443a3d977351e6ec5bada4388"}, + {file = "simplejson-4.1.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:c996a4d38290c515af347740659ce095b425449c164a5c9fa3977caa6eff5dbe"}, + {file = "simplejson-4.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c65c763fb20d7ca113c1c14dce2fc04a0fc3a57aceff533d6fdac707c7bffb40"}, + {file = "simplejson-4.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0da5c9f57206ee7ef280ff7f1d924937b0a64f9a271a5ef371a2ecdbebba7421"}, + {file = "simplejson-4.1.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ea3426e786425d10e9e82f8a6eda74a7d6eb10d99165ac3d0d3bbcb65c0ea343"}, + {file = "simplejson-4.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d75cea7a1025edd7e439b2966b3d977c45b5b899e2adaf422811b3ac702ed9fb"}, + {file = "simplejson-4.1.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63c2ada8e58f266491f19eed2eeeb7c25c6141e52f8f9e820f6bb94156cf8dbc"}, + {file = "simplejson-4.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d1fffb56305c5b475ee746cf9e04f97423ba5aaacd292dc1255bd75b1d3b124b"}, + {file = "simplejson-4.1.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a6525ec733f43d0541206cffa64fd2aad5a7ae3eb76566aff49cd4db6382209a"}, + {file = "simplejson-4.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:861e393260508efa64d8805a8e49c416c3484907e3f146ce966c69552b49b9a3"}, + {file = "simplejson-4.1.1-cp314-cp314t-win32.whl", hash = "sha256:d083b89d30948a751d3d97476c2ed91e4caaa24a1a1459bdbadb8876242c71fe"}, + {file = "simplejson-4.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4cbb299d0528ec0447fe366d8c9641860e28f997a62730690fef905f1f41046e"}, + {file = "simplejson-4.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:110a14b2702e9fa01d13d510b66bcf823c9ddd700bd0050301bfbd1bcdf95991"}, + {file = "simplejson-4.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:617e36ae79f4d53c91a4aec95eb09c469ef28fdec7926821254505a1de920e3f"}, + {file = "simplejson-4.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:112b37ba2ff805da9e657c05b4ffa1a7428eaa191918af106dfacc5ab70663d4"}, + {file = "simplejson-4.1.1-cp38-cp38-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2669ff10c7090ae9099afa9f7ae3a8c1a5170e78f18c7011dd8d5ce73beba6c0"}, + {file = "simplejson-4.1.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:453a25bcad03b1ec4584998c58c27c5eb54148e13bcbf49b796f953048349c78"}, + {file = "simplejson-4.1.1-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e338a0029a0e8e4da2657b63a8df39b0269007dc9b506eadacca384519b2bd9c"}, + {file = "simplejson-4.1.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:604b8d73f396078c122f696c2fa5f3527a00667633863560096180baf1356257"}, + {file = "simplejson-4.1.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:14465703aa66668cc1b697ec834c4d1566b72464f92d3ba4cb00ecd2c4438b2d"}, + {file = "simplejson-4.1.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:7c0573d7370232d71616653080e397df61d4daf1cd7d6a717f23fd21eda90731"}, + {file = "simplejson-4.1.1-cp38-cp38-win32.whl", hash = "sha256:5f09293dc60ca29b4d588583f0423863d1e9faa0074fdbf00a9955ecc6365331"}, + {file = "simplejson-4.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:25773dbc326799824767d40921e88a8f07c5bf1a0738884fb16cab574610aa28"}, + {file = "simplejson-4.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:97a02325a00617c26cfc974f4ebb191c8de6e87cb96d33e51612091150637c3d"}, + {file = "simplejson-4.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6bbc61cd7982ff77a68df06d103a3ba459eefd1d3cb6f4f4944cdf9f091d7bf7"}, + {file = "simplejson-4.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c7876ec2ef53ff5e6714a382b3f8f042a744b944728ae0baef99421740cc57a3"}, + {file = "simplejson-4.1.1-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:42ee1aeaa295364bb2c079c42c5796bf1db4b0d5c4bf95f2fcdddba770618cb4"}, + {file = "simplejson-4.1.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f42a7911f64ed8f738ba55480c20d5c685851781d411f9473cafa7a643e52fe4"}, + {file = "simplejson-4.1.1-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8b2da172a6ff43f74463522a1aa1d7a481ac2dba2de4b18ed51e989190352ba7"}, + {file = "simplejson-4.1.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:677fbb192b2cbefb3dc21862eaf0bf560b4b370662503036c513f1e3eb32dfac"}, + {file = "simplejson-4.1.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:10048ab9b9e0f7e95f1680829f0925a63b190fa8e8e9bb91369538fe382df827"}, + {file = "simplejson-4.1.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:82e42ff58ee856f4029c732d35673dbe62d589445a8e6c3c98ced8fd78096617"}, + {file = "simplejson-4.1.1-cp39-cp39-win32.whl", hash = "sha256:43fa9a1ccf477e415c025ba507ada54984f5ed927d28d304cf50e089818818b0"}, + {file = "simplejson-4.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4c1eecc2d6a186eaf5d111cf9b311fa9a9ecf68703db7b63ed5938049f3e74f5"}, + {file = "simplejson-4.1.1-py3-none-any.whl", hash = "sha256:2ce92b3748f02423e26d2bfb636fb9d7a8f67c8f5854dcae69d350d123b2eee2"}, + {file = "simplejson-4.1.1.tar.gz", hash = "sha256:c08eb9f7a90f77ae470e19a07472e9a79ebc0d1c2315d86a72767665bd5ba79f"}, ] [[package]] @@ -4057,71 +4193,75 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.46" +version = "2.0.49" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "sqlalchemy-2.0.46-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:895296687ad06dc9b11a024cf68e8d9d3943aa0b4964278d2553b86f1b267735"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab65cb2885a9f80f979b85aa4e9c9165a31381ca322cbde7c638fe6eefd1ec39"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52fe29b3817bd191cc20bad564237c808967972c97fa683c04b28ec8979ae36f"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:09168817d6c19954d3b7655da6ba87fcb3a62bb575fb396a81a8b6a9fadfe8b5"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:be6c0466b4c25b44c5d82b0426b5501de3c424d7a3220e86cd32f319ba56798e"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-win32.whl", hash = "sha256:1bc3f601f0a818d27bfe139f6766487d9c88502062a2cd3a7ee6c342e81d5047"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-win_amd64.whl", hash = "sha256:e0c05aff5c6b1bb5fb46a87e0f9d2f733f83ef6cbbbcd5c642b6c01678268061"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:261c4b1f101b4a411154f1da2b76497d73abbfc42740029205d4d01fa1052684"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:181903fe8c1b9082995325f1b2e84ac078b1189e2819380c2303a5f90e114a62"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:590be24e20e2424a4c3c1b0835e9405fa3d0af5823a1a9fc02e5dff56471515f"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7568fe771f974abadce52669ef3a03150ff03186d8eb82613bc8adc435a03f01"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf7e1e78af38047e08836d33502c7a278915698b7c2145d045f780201679999"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-win32.whl", hash = "sha256:9d80ea2ac519c364a7286e8d765d6cd08648f5b21ca855a8017d9871f075542d"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-win_amd64.whl", hash = "sha256:585af6afe518732d9ccd3aea33af2edaae4a7aa881af5d8f6f4fe3a368699597"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a9a72b0da8387f15d5810f1facca8f879de9b85af8c645138cba61ea147968c"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2347c3f0efc4de367ba00218e0ae5c4ba2306e47216ef80d6e31761ac97cb0b9"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9094c8b3197db12aa6f05c51c05daaad0a92b8c9af5388569847b03b1007fb1b"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37fee2164cf21417478b6a906adc1a91d69ae9aba8f9533e67ce882f4bb1de53"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b1e14b2f6965a685c7128bd315e27387205429c2e339eeec55cb75ca4ab0ea2e"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-win32.whl", hash = "sha256:412f26bb4ba942d52016edc8d12fb15d91d3cd46b0047ba46e424213ad407bcb"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-win_amd64.whl", hash = "sha256:ea3cd46b6713a10216323cda3333514944e510aa691c945334713fca6b5279ff"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:93a12da97cca70cea10d4b4fc602589c4511f96c1f8f6c11817620c021d21d00"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af865c18752d416798dae13f83f38927c52f085c52e2f32b8ab0fef46fdd02c2"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8d679b5f318423eacb61f933a9a0f75535bfca7056daeadbf6bd5bcee6183aee"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64901e08c33462acc9ec3bad27fc7a5c2b6491665f2aa57564e57a4f5d7c52ad"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8ac45e8f4eaac0f9f8043ea0e224158855c6a4329fd4ee37c45c61e3beb518e"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-win32.whl", hash = "sha256:8d3b44b3d0ab2f1319d71d9863d76eeb46766f8cf9e921ac293511804d39813f"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-win_amd64.whl", hash = "sha256:77f8071d8fbcbb2dd11b7fd40dedd04e8ebe2eb80497916efedba844298065ef"}, - {file = "sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1e8cc6cc01da346dc92d9509a63033b9b1bda4fed7a7a7807ed385c7dccdc10"}, - {file = "sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:96c7cca1a4babaaf3bfff3e4e606e38578856917e52f0384635a95b226c87764"}, - {file = "sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b2a9f9aee38039cf4755891a1e50e1effcc42ea6ba053743f452c372c3152b1b"}, - {file = "sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:db23b1bf8cfe1f7fda19018e7207b20cdb5168f83c437ff7e95d19e39289c447"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:56bdd261bfd0895452006d5316cbf35739c53b9bb71a170a331fa0ea560b2ada"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33e462154edb9493f6c3ad2125931e273bbd0be8ae53f3ecd1c161ea9a1dd366"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bcdce05f056622a632f1d44bb47dbdb677f58cad393612280406ce37530eb6d"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e84b09a9b0f19accedcbeff5c2caf36e0dd537341a33aad8d680336152dc34e"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4f52f7291a92381e9b4de9050b0a65ce5d6a763333406861e33906b8aa4906bf"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-win32.whl", hash = "sha256:70ed2830b169a9960193f4d4322d22be5c0925357d82cbf485b3369893350908"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-win_amd64.whl", hash = "sha256:3c32e993bc57be6d177f7d5d31edb93f30726d798ad86ff9066d75d9bf2e0b6b"}, - {file = "sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4dafb537740eef640c4d6a7c254611dca2df87eaf6d14d6a5fca9d1f4c3fc0fa"}, - {file = "sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42a1643dc5427b69aca967dae540a90b0fbf57eaf248f13a90ea5930e0966863"}, - {file = "sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ff33c6e6ad006bbc0f34f5faf941cfc62c45841c64c0a058ac38c799f15b5ede"}, - {file = "sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:82ec52100ec1e6ec671563bbd02d7c7c8d0b9e71a0723c72f22ecf52d1755330"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6ac245604295b521de49b465bab845e3afe6916bcb2147e5929c8041b4ec0545"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e6199143d51e3e1168bedd98cc698397404a8f7508831b81b6a29b18b051069"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:716be5bcabf327b6d5d265dbdc6213a01199be587224eb991ad0d37e83d728fd"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6f827fd687fa1ba7f51699e1132129eac8db8003695513fcf13fc587e1bd47a5"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c805fa6e5d461329fa02f53f88c914d189ea771b6821083937e79550bf31fc19"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-win32.whl", hash = "sha256:3aac08f7546179889c62b53b18ebf1148b10244b3405569c93984b0388d016a7"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-win_amd64.whl", hash = "sha256:0cc3117db526cad3e61074100bd2867b533e2c7dc1569e95c14089735d6fb4fe"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:90bde6c6b1827565a95fde597da001212ab436f1b2e0c2dcc7246e14db26e2a3"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94b1e5f3a5f1ff4f42d5daab047428cd45a3380e51e191360a35cef71c9a7a2a"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93bb0aae40b52c57fd74ef9c6933c08c040ba98daf23ad33c3f9893494b8d3ce"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c4e2cc868b7b5208aec6c960950b7bb821f82c2fe66446c92ee0a571765e91a5"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:965c62be8256d10c11f8907e7a8d3e18127a4c527a5919d85fa87fd9ecc2cfdc"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-win32.whl", hash = "sha256:9397b381dcee8a2d6b99447ae85ea2530dcac82ca494d1db877087a13e38926d"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-win_amd64.whl", hash = "sha256:4396c948d8217e83e2c202fbdcc0389cf8c93d2c1c5e60fa5c5a955eae0e64be"}, - {file = "sqlalchemy-2.0.46-py3-none-any.whl", hash = "sha256:f9c11766e7e7c0a2767dda5acb006a118640c9fc0a4104214b96269bfb78399e"}, - {file = "sqlalchemy-2.0.46.tar.gz", hash = "sha256:cf36851ee7219c170bb0793dbc3da3e80c582e04a5437bc601bfe8c85c9216d7"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:42e8804962f9e6f4be2cbaedc0c3718f08f60a16910fa3d86da5a1e3b1bfe60f"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc992c6ed024c8c3c592c5fc9846a03dd68a425674900c70122c77ea16c5fb0b"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6eb188b84269f357669b62cb576b5b918de10fb7c728a005fa0ebb0b758adce1"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:62557958002b69699bdb7f5137c6714ca1133f045f97b3903964f47db97ea339"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da9b91bca419dc9b9267ffadde24eae9b1a6bffcd09d0a207e5e3af99a03ce0d"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-win32.whl", hash = "sha256:5e61abbec255be7b122aa461021daa7c3f310f3e743411a67079f9b3cc91ece3"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-win_amd64.whl", hash = "sha256:0c98c59075b890df8abfcc6ad632879540f5791c68baebacb4f833713b510e75"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5070135e1b7409c4161133aa525419b0062088ed77c92b1da95366ec5cbebbe"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ac7a3e245fd0310fd31495eb61af772e637bdf7d88ee81e7f10a3f271bff014"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d4e5a0ceba319942fa6b585cf82539288a61e314ef006c1209f734551ab9536"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3ddcb27fb39171de36e207600116ac9dfd4ae46f86c82a9bf3934043e80ebb88"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:32fe6a41ad97302db2931f05bb91abbcc65b5ce4c675cd44b972428dd2947700"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-win32.whl", hash = "sha256:46d51518d53edfbe0563662c96954dc8fcace9832332b914375f45a99b77cc9a"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-win_amd64.whl", hash = "sha256:951d4a210744813be63019f3df343bf233b7432aadf0db54c75802247330d3af"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4bbccb45260e4ff1b7db0be80a9025bb1e6698bdb808b83fff0000f7a90b2c0b"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb37f15714ec2652d574f021d479e78cd4eb9d04396dca36568fdfffb3487982"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bb9ec6436a820a4c006aad1ac351f12de2f2dbdaad171692ee457a02429b672"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8d6efc136f44a7e8bc8088507eaabbb8c2b55b3dbb63fe102c690da0ddebe55e"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e06e617e3d4fd9e51d385dfe45b077a41e9d1b033a7702551e3278ac597dc750"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-win32.whl", hash = "sha256:83101a6930332b87653886c01d1ee7e294b1fe46a07dd9a2d2b4f91bcc88eec0"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-win_amd64.whl", hash = "sha256:618a308215b6cececb6240b9abde545e3acdabac7ae3e1d4e666896bf5ba44b4"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df2d441bacf97022e81ad047e1597552eb3f83ca8a8f1a1fdd43cd7fe3898120"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e20e511dc15265fb433571391ba313e10dd8ea7e509d51686a51313b4ac01a2"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47604cb2159f8bbd5a1ab48a714557156320f20871ee64d550d8bf2683d980d3"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:22d8798819f86720bc646ab015baff5ea4c971d68121cb36e2ebc2ee43ead2b7"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9b1c058c171b739e7c330760044803099c7fff11511e3ab3573e5327116a9c33"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-win32.whl", hash = "sha256:a143af2ea6672f2af3f44ed8f9cd020e9cc34c56f0e8db12019d5d9ecf41cb3b"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-win_amd64.whl", hash = "sha256:12b04d1db2663b421fe072d638a138460a51d5a862403295671c4f3987fb9148"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:24bd94bb301ec672d8f0623eba9226cc90d775d25a0c92b5f8e4965d7f3a1518"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a51d3db74ba489266ef55c7a4534eb0b8db9a326553df481c11e5d7660c8364d"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:55250fe61d6ebfd6934a272ee16ef1244e0f16b7af6cd18ab5b1fc9f08631db0"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:46796877b47034b559a593d7e4b549aba151dae73f9e78212a3478161c12ab08"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-win32.whl", hash = "sha256:9c4969a86e41454f2858256c39bdfb966a20961e9b58bf8749b65abf447e9a8d"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-win_amd64.whl", hash = "sha256:b9870d15ef00e4d0559ae10ee5bc71b654d1f20076dbe8bc7ed19b4c0625ceba"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:233088b4b99ebcbc5258c755a097aa52fbf90727a03a5a80781c4b9c54347a2e"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57ca426a48eb2c682dae8204cd89ea8ab7031e2675120a47924fabc7caacbc2a"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:685e93e9c8f399b0c96a624799820176312f5ceef958c0f88215af4013d29066"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9e0400fa22f79acc334d9a6b185dc00a44a8e6578aa7e12d0ddcd8434152b187"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a05977bffe9bffd2229f477fa75eabe3192b1b05f408961d1bebff8d1cd4d401"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-win32.whl", hash = "sha256:0f2fa354ba106eafff2c14b0cc51f22801d1e8b2e4149342023bd6f0955de5f5"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-win_amd64.whl", hash = "sha256:77641d299179c37b89cf2343ca9972c88bb6eef0d5fc504a2f86afd15cd5adf5"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c1dc3368794d522f43914e03312202523cc89692f5389c32bea0233924f8d977"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c821c47ecfe05cc32140dcf8dc6fd5d21971c86dbd56eabfe5ba07a64910c01"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9c04bff9a5335eb95c6ecf1c117576a0aa560def274876fd156cfe5510fccc61"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7f605a456948c35260e7b2a39f8952a26f077fd25653c37740ed186b90aaa68a"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-win32.whl", hash = "sha256:6270d717b11c5476b0cbb21eedc8d4dbb7d1a956fd6c15a23e96f197a6193158"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-win_amd64.whl", hash = "sha256:275424295f4256fd301744b8f335cff367825d270f155d522b30c7bf49903ee7"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8a97ac839c2c6672c4865e48f3cbad7152cee85f4233fb4ca6291d775b9b954a"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c338ec6ec01c0bc8e735c58b9f5d51e75bacb6ff23296658826d7cfdfdb8678a"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:566df36fd0e901625523a5a1835032f1ebdd7f7886c54584143fa6c668b4df3b"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d99945830a6f3e9638d89a28ed130b1eb24c91255e4f24366fbe699b983f29e4"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:01146546d84185f12721a1d2ce0c6673451a7894d1460b592d378ca4871a0c72"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-win32.whl", hash = "sha256:69469ce8ce7a8df4d37620e3163b71238719e1e2e5048d114a1b6ce0fbf8c662"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-win_amd64.whl", hash = "sha256:b95b2f470c1b2683febd2e7eab1d3f0e078c91dbdd0b00e9c645d07a413bb99f"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43d044780732d9e0381ac8d5316f95d7f02ef04d6e4ef6dc82379f09795d993f"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d6be30b2a75362325176c036d7fb8d19e8846c77e87683ffaa8177b35135613"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d898cc2c76c135ef65517f4ddd7a3512fb41f23087b0650efb3418b8389a3cd1"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:059d7151fff513c53a4638da8778be7fce81a0c4854c7348ebd0c4078ddf28fe"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:334edbcff10514ad1d66e3a70b339c0a29886394892490119dbb669627b17717"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-win32.whl", hash = "sha256:74ab4ee7794d7ed1b0c37e7333640e0f0a626fc7b398c07a7aef52f484fddde3"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-win_amd64.whl", hash = "sha256:88690f4e1f0fbf5339bedbb127e240fec1fd3070e9934c0b7bef83432f779d2f"}, + {file = "sqlalchemy-2.0.49-py3-none-any.whl", hash = "sha256:ec44cfa7ef1a728e88ad41674de50f6db8cfdb3e2af84af86e0041aaf02d43d0"}, + {file = "sqlalchemy-2.0.49.tar.gz", hash = "sha256:d15950a57a210e36dd4cec1aac22787e2a4d57ba9318233e2ef8b2daf9ff2d5f"}, ] [package.dependencies] @@ -4230,15 +4370,15 @@ starlette = "*" [[package]] name = "tabulate" -version = "0.9.0" +version = "0.10.0" description = "Pretty-print tabular data" optional = true -python-versions = ">=3.7" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, - {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, + {file = "tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3"}, + {file = "tabulate-0.10.0.tar.gz", hash = "sha256:e2cfde8f79420f6deeffdeda9aaec3b6bc5abce947655d17ac662b126e48a60d"}, ] [package.extras] @@ -4303,26 +4443,26 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "types-awscrt" -version = "0.31.1" +version = "0.31.3" description = "Type annotations and code completion for awscrt" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "types_awscrt-0.31.1-py3-none-any.whl", hash = "sha256:7e4364ac635f72bd57f52b093883640b1448a6eded0ecbac6e900bf4b1e4777b"}, - {file = "types_awscrt-0.31.1.tar.gz", hash = "sha256:08b13494f93f45c1a92eb264755fce50ed0d1dc75059abb5e31670feb9a09724"}, + {file = "types_awscrt-0.31.3-py3-none-any.whl", hash = "sha256:e5ce65a00a2ab4f35eacc1e3d700d792338d56e4823ee7b4dbe017f94cfc4458"}, + {file = "types_awscrt-0.31.3.tar.gz", hash = "sha256:09d3eaf00231e0f47e101bd9867e430873bc57040050e2a3bd8305cb4fc30865"}, ] [[package]] name = "types-pyasn1" -version = "0.6.0.20250914" +version = "0.6.0.20260408" description = "Typing stubs for pyasn1" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "types_pyasn1-0.6.0.20250914-py3-none-any.whl", hash = "sha256:68ffeef3c28e1ed120b8b81a242f238f137543e68d466d84a97edcf3e4203b5b"}, - {file = "types_pyasn1-0.6.0.20250914.tar.gz", hash = "sha256:236102553b76c938953037b7ae93d11d395d9413b7f2f8083d3b19d740f7eda6"}, + {file = "types_pyasn1-0.6.0.20260408-py3-none-any.whl", hash = "sha256:ee7fbd98bce61193c5d4f8f7812fa53cddc5b8cc5ceb9fcda6eea539947c6d6b"}, + {file = "types_pyasn1-0.6.0.20260408.tar.gz", hash = "sha256:32dc90927adbe504fd2eee83ae30cf5ef934e5db0d1d94886071fed47eb50c8c"}, ] [[package]] @@ -4342,26 +4482,26 @@ types-pyasn1 = "*" [[package]] name = "types-pytz" -version = "2025.2.0.20251108" +version = "2026.1.1.20260408" description = "Typing stubs for pytz" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c"}, - {file = "types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb"}, + {file = "types_pytz-2026.1.1.20260408-py3-none-any.whl", hash = "sha256:c7e4dec76221fb7d0c97b91ad8561d689bebe39b6bcb7b728387e7ffd8cde788"}, + {file = "types_pytz-2026.1.1.20260408.tar.gz", hash = "sha256:89b6a34b9198ea2a4b98a9d15cbca987053f52a105fd44f7ce3789cae4349408"}, ] [[package]] name = "types-pyyaml" -version = "6.0.12.20250915" +version = "6.0.12.20260408" description = "Typing stubs for PyYAML" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6"}, - {file = "types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3"}, + {file = "types_pyyaml-6.0.12.20260408-py3-none-any.whl", hash = "sha256:fbc42037d12159d9c801ebfcc79ebd28335a7c13b08a4cfbc6916df78fee9384"}, + {file = "types_pyyaml-6.0.12.20260408.tar.gz", hash = "sha256:92a73f2b8d7f39ef392a38131f76b970f8c66e4c42b3125ae872b7c93b556307"}, ] [[package]] @@ -4405,14 +4545,14 @@ files = [ [[package]] name = "tzdata" -version = "2025.3" +version = "2026.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" groups = ["main", "dev"] files = [ - {file = "tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1"}, - {file = "tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7"}, + {file = "tzdata-2026.2-py2.py3-none-any.whl", hash = "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7"}, + {file = "tzdata-2026.2.tar.gz", hash = "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10"}, ] markers = {dev = "sys_platform == \"win32\""} @@ -4436,15 +4576,15 @@ zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] [[package]] name = "uvicorn" -version = "0.40.0" +version = "0.46.0" description = "The lightning-fast ASGI server." optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee"}, - {file = "uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea"}, + {file = "uvicorn-0.46.0-py3-none-any.whl", hash = "sha256:bbebbcbed972d162afca128605223022bedd345b7bc7855ce66deb31487a9048"}, + {file = "uvicorn-0.46.0.tar.gz", hash = "sha256:fb9da0926999cc6cb22dc7cd71a94a632f078e6ae47ff683c5c420750fb7413d"}, ] [package.dependencies] @@ -4455,11 +4595,11 @@ httptools = {version = ">=0.6.3", optional = true, markers = "extra == \"standar python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} uvloop = {version = ">=0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} -watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} +watchfiles = {version = ">=0.20", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] -standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.20)", "websockets (>=10.4)"] [[package]] name = "uvloop" @@ -4528,24 +4668,21 @@ test = ["aiohttp (>=3.10.5)", "flake8 (>=6.1,<7.0)", "mypy (>=0.800)", "psutil", [[package]] name = "virtualenv" -version = "20.36.1" +version = "21.3.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f"}, - {file = "virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba"}, + {file = "virtualenv-21.3.0-py3-none-any.whl", hash = "sha256:4d28ee41f6d9ec8f1f00cd472b9ffbcedda1b3d3b9a575b5c94a2d004fd51bd7"}, + {file = "virtualenv-21.3.0.tar.gz", hash = "sha256:733750db978ec95c2d8eb4feadaa57091002bce404cb39ba69899cf7bd28944e"}, ] [package.dependencies] distlib = ">=0.3.7,<1" -filelock = {version = ">=3.20.1,<4", markers = "python_version >= \"3.10\""} +filelock = {version = ">=3.24.2,<4", markers = "python_version >= \"3.10\""} platformdirs = ">=3.9.1,<5" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +python-discovery = ">=1.2.2" [[package]] name = "watchfiles" @@ -4811,15 +4948,15 @@ pyodbc = ["pyodbc"] [[package]] name = "zipp" -version = "3.23.0" +version = "3.23.1" description = "Backport of pathlib-compatible object wrapper for zip files" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e"}, - {file = "zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166"}, + {file = "zipp-3.23.1-py3-none-any.whl", hash = "sha256:0b3596c50a5c700c9cb40ba8d86d9f2cc4807e9bedb06bcdf7fac85633e444dc"}, + {file = "zipp-3.23.1.tar.gz", hash = "sha256:32120e378d32cd9714ad503c1d024619063ec28aad2248dc6672ad13edfa5110"}, ] [package.extras] @@ -4831,9 +4968,9 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_it type = ["pytest-mypy"] [extras] -server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "pyathena", "python-jose", "python-multipart", "requests", "slack-sdk", "starlette", "starlette-context", "uvicorn", "watchtower"] +server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "pyathena", "python-jose", "python-multipart", "requests", "slack-sdk", "starlette", "starlette-context", "uvicorn", "watchtower"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "a0d85338573e1d2220f0af0884e3245f172c5224ed1fe5c9ae75e83fc1c34159" +content-hash = "5b7368112d3edf1cbddc0d0ced029b5d505bfcdc8c9e744b57150071993b3923" diff --git a/pyproject.toml b/pyproject.toml index 664b75412..a7ebeeccc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ pyyaml = "~6.0.1" IDUtils = "~1.2.0" mavehgvs = "~0.7.0" eutils = "~0.6.0" +setuptools = ">=69.0,<82.0" # eutils requires pkg_resources at import time; removed in setuptools 82+ email_validator = "~2.1.1" numpy = "~1.26" httpx = "~0.26.0" @@ -41,6 +42,7 @@ SQLAlchemy = "~2.0.29" ga4gh-va-spec = "~0.4.2" # Optional dependencies for running this application as a server +aiocache = { extras = ["redis"], version = "~0.12.2", optional = true } alembic = { version = "~1.14.0", optional = true } alembic-utils = { version = "0.8.1", optional = true } arq = { version = "~0.25.0", optional = true } @@ -62,19 +64,21 @@ starlette-context = { version = "^0.3.6", optional = true } slack-sdk = { version = "~3.21.3", optional = true } uvicorn = { extras = ["standard"], version = "*", optional = true } watchtower = { version = "~3.2.0", optional = true } +asyncclick = "^8.3.0.7" +filelock = "^3.29.0" [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] -boto3-stubs = "~1.34.97" +boto3-stubs = { extras = ["s3"], version = "~1.42.33" } mypy = "~1.10.0" pre-commit = "*" jsonschema = "*" fakeredis = "~2.21.1" pytest = "~7.2.0" pytest-cov = "~5.0.0" -pytest-postgresql = "~5.0.0" +pytest-postgresql = "~7.0.0" pytest-asyncio = "~0.23.5" pytest-socket = "~0.6.0" pandas-stubs = "~2.1.4" @@ -87,8 +91,9 @@ ruff = "^0.6.8" SQLAlchemy = { extras = ["mypy"], version = "~2.0.0" } + [tool.poetry.extras] -server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "pyathena", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"] +server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "pyathena", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"] [tool.mypy] @@ -100,11 +105,17 @@ plugins = [ mypy_path = "mypy_stubs" [tool.pytest.ini_options] -addopts = "-v --import-mode=importlib --disable-socket --allow-unix-socket --allow-hosts localhost,::1,127.0.0.1" +addopts = "-v --import-mode=importlib" asyncio_mode = 'strict' testpaths = "tests/" pythonpath = "." norecursedirs = "tests/helpers/" +markers = """ + integration: mark a test as an integration test. + unit: mark a test as a unit test. + network: mark a test that requires network access. + slow: mark a test as slow-running. +""" # Uncomment the following lines to include application log output in Pytest logs. # log_cli = true # log_cli_level = "DEBUG" diff --git a/settings/.env.template b/settings/.env.template index fbb5b861a..585bd354f 100644 --- a/settings/.env.template +++ b/settings/.env.template @@ -98,3 +98,27 @@ AWS_REGION_NAME=us-west-2 ATHENA_SCHEMA_NAME=default ATHENA_S3_STAGING_DIR=s3://your-bucket/path/to/staging/ GNOMAD_DATA_VERSION=v4.1 + +#################################################################################################### +# Environment variables for S3 connection +#################################################################################################### + +AWS_ACCESS_KEY_ID=test +AWS_SECRET_ACCESS_KEY=test +S3_ENDPOINT_URL=http://localstack:4566 +UPLOAD_S3_BUCKET_NAME=score-set-csv-uploads-dev + +#################################################################################################### +# Environment variables for ClinGen cache settings +#################################################################################################### + +CLINGEN_CACHE_BACKEND=redis +CLINGEN_REDIS_HOST=localhost +CLINGEN_REDIS_PORT=6379 +CLINGEN_REDIS_SSL=false + +#################################################################################################### +# Environment variables for ClinVar cache settings +#################################################################################################### + +CLINVAR_CACHE_DIR=/data/clinvar_cache \ No newline at end of file diff --git a/src/mavedb/data_providers/services.py b/src/mavedb/data_providers/services.py index eed9b01dc..a94c16d6e 100644 --- a/src/mavedb/data_providers/services.py +++ b/src/mavedb/data_providers/services.py @@ -1,10 +1,14 @@ import os -from typing import Optional +from typing import TYPE_CHECKING, Optional -from cdot.hgvs.dataproviders import SeqFetcher, ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider +import boto3 +from cdot.hgvs.dataproviders import ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider, SeqFetcher from mavedb.lib.mapping import VRSMap +if TYPE_CHECKING: + from mypy_boto3_s3.client import S3Client + GENOMIC_FASTA_FILES = [ "/data/GCF_000001405.39_GRCh38.p13_genomic.fna.gz", "/data/GCF_000001405.25_GRCh37.p13_genomic.fna.gz", @@ -12,6 +16,7 @@ DCD_MAP_URL = os.environ.get("DCD_MAPPING_URL", "http://dcd-mapping:8000") CDOT_URL = os.environ.get("CDOT_URL", "http://cdot-rest:8000") +CSV_UPLOAD_S3_BUCKET_NAME = os.getenv("UPLOAD_S3_BUCKET_NAME", "score-set-csv-uploads-dev") def seqfetcher() -> ChainedSeqFetcher: @@ -24,3 +29,13 @@ def cdot_rest() -> RESTDataProvider: def vrs_mapper(url: Optional[str] = None) -> VRSMap: return VRSMap(DCD_MAP_URL) if not url else VRSMap(url) + + +def s3_client() -> "S3Client": + return boto3.client( + "s3", + endpoint_url=os.getenv("S3_ENDPOINT_URL"), + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + region_name=os.getenv("AWS_REGION_NAME", "us-west-2"), + ) diff --git a/src/mavedb/db/session.py b/src/mavedb/db/session.py index 0ddb1c320..8c0127ac4 100644 --- a/src/mavedb/db/session.py +++ b/src/mavedb/db/session.py @@ -1,4 +1,5 @@ import os +from contextlib import contextmanager from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker @@ -21,3 +22,17 @@ pool_pre_ping=True, ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +@contextmanager +def db_session(): + """Provide a transactional scope around a series of operations.""" + session = SessionLocal() + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py new file mode 100644 index 000000000..6598bfabd --- /dev/null +++ b/src/mavedb/lib/annotation_status_manager.py @@ -0,0 +1,207 @@ +"""Manage annotation statuses for variants. + +This module provides functionality to insert and retrieve annotation statuses +for genetic variants, ensuring that only one current status exists per +(variant, annotation type, version) combination. +""" + +import logging +from typing import Optional + +from sqlalchemy import select, update +from sqlalchemy.orm import Session +from sqlalchemy.sql import desc + +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +logger = logging.getLogger(__name__) + +# Default number of pending annotations to accumulate before auto-flushing. +DEFAULT_BATCH_SIZE = 500 + + +class AnnotationStatusManager: + """ + Manager for handling variant annotation statuses with batched writes. + + Annotations are accumulated in memory and flushed to the database in + batches (default 500) to reduce round-trips. Callers **must** call + :meth:`flush` after the last ``add_annotation`` to persist any remainder. + """ + + def __init__(self, session: Session, job_run_id: Optional[int] = None, *, batch_size: int = DEFAULT_BATCH_SIZE): + self.session = session + self.job_run_id = job_run_id + self.batch_size = batch_size + self._pending: list[VariantAnnotationStatus] = [] + self._retirement_filters: list[dict] = [] + + def add_annotation( + self, + variant_id: int, + annotation_type: AnnotationType, + status: AnnotationStatus, + version: Optional[str] = None, + failure_category: Optional[AnnotationFailureCategory] = None, + annotation_data: dict = {}, + current: bool = True, + replace_all_versions: bool = True, + ) -> None: + """ + Stage a new annotation and schedule retirement of previous current rows. + + By default (``replace_all_versions=True``), all existing current annotations for + (variant, type) are retired regardless of version. + + When ``replace_all_versions=False``, only existing current annotations matching + (variant, type, version) are retired. + + Writes are accumulated in memory and flushed to the database when + ``batch_size`` is reached. Call :meth:`flush` after the last add to + persist any remaining annotations. + + NOTE: + This method does not commit the session. The caller is responsible + for persisting changes (e.g., via ``session.commit()``). + """ + self._retirement_filters.append( + { + "variant_id": variant_id, + "annotation_type": annotation_type, + "replace_all_versions": replace_all_versions, + "version": version, + } + ) + + self._pending.append( + VariantAnnotationStatus( + variant_id=variant_id, + annotation_type=annotation_type, + status=status, + version=version, + failure_category=failure_category, + current=current, + job_run_id=self.job_run_id, + **annotation_data, + ) # type: ignore[call-arg] + ) + + if len(self._pending) >= self.batch_size: + self.flush() + + def flush(self) -> None: + """Flush all pending annotations to the database. + + Retires old ``current=True`` rows in bulk, then inserts all pending + new rows in a single ``add_all`` + ``flush``. This replaces the + previous pattern of 2 flushes per ``add_annotation`` call. + """ + if not self._pending: + return + + self._retire_existing() + self.session.add_all(self._pending) + self.session.flush() + + logger.debug(f"Flushed {len(self._pending)} annotation statuses") + self._pending.clear() + self._retirement_filters.clear() + + def _retire_existing(self) -> None: + """Bulk-retire existing current annotations for all pending writes. + + Groups retirement filters by (annotation_type, replace_all_versions, version) + and issues one UPDATE per group, minimizing round-trips. + """ + # Group filters to minimize UPDATE statements. + # Key: (annotation_type, replace_all_versions, version) -> list of variant_ids + groups: dict[tuple, list[int]] = {} + for f in self._retirement_filters: + key = (f["annotation_type"], f["replace_all_versions"], f["version"]) + groups.setdefault(key, []).append(f["variant_id"]) + + for (annotation_type, replace_all_versions, version), variant_ids in groups.items(): + conditions = [ + VariantAnnotationStatus.variant_id.in_(variant_ids), + VariantAnnotationStatus.annotation_type == annotation_type, + VariantAnnotationStatus.current.is_(True), + ] + if not replace_all_versions: + conditions.append(VariantAnnotationStatus.version == version) + + stmt = update(VariantAnnotationStatus).where(*conditions).values(current=False) + self.session.execute(stmt) + + def get_current_annotation( + self, variant_id: int, annotation_type: AnnotationType, version: Optional[str] = None + ) -> Optional[VariantAnnotationStatus]: + """ + Retrieve the current annotation for a given variant/type/version. + + Flushes pending annotations first to ensure the result is up to date. + """ + self.flush() + + stmt = select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.annotation_type == annotation_type, + VariantAnnotationStatus.current.is_(True), + ) + + if version is not None: + stmt = stmt.where(VariantAnnotationStatus.version == version) + + result = self.session.execute(stmt) + return result.scalar_one_or_none() + + def get_annotation_history( + self, + variant_id: int, + annotation_type: AnnotationType, + version: Optional[str] = None, + ) -> list[VariantAnnotationStatus]: + """ + Return the full annotation timeline for a variant/type, newest first. + + Includes both current and retired rows — useful for debugging and + support investigations. + """ + self.flush() + + stmt = ( + select(VariantAnnotationStatus) + .where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.annotation_type == annotation_type, + ) + .order_by(desc(VariantAnnotationStatus.id)) + ) + + if version is not None: + stmt = stmt.where(VariantAnnotationStatus.version == version) + + return list(self.session.scalars(stmt).all()) + + def get_all_current_annotations( + self, + variant_id: int, + ) -> list[VariantAnnotationStatus]: + """ + Return all current annotations for a variant, across all types and versions. + + Useful for a quick overview of what annotations are active for a given variant. + """ + self.flush() + + stmt = ( + select(VariantAnnotationStatus) + .where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.current.is_(True), + ) + .order_by(VariantAnnotationStatus.annotation_type, VariantAnnotationStatus.version) + ) + + return list(self.session.scalars(stmt).all()) diff --git a/src/mavedb/lib/clingen/allele_registry.py b/src/mavedb/lib/clingen/allele_registry.py index 5e025b140..4e00dd21b 100644 --- a/src/mavedb/lib/clingen/allele_registry.py +++ b/src/mavedb/lib/clingen/allele_registry.py @@ -1,5 +1,11 @@ +import asyncio import logging +from typing import Optional + import requests +from aiocache import cached + +from mavedb.lib.clingen.cache import CACHE_CLASS, CACHE_CONFIG, CACHE_TTL_SECONDS, clingen_cache_key_builder logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -7,14 +13,53 @@ CLINGEN_API_URL = "https://reg.genome.network/allele" -def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: - """ "Retrieve any canonical PA IDs from the ClinGen API for a given clingen allele ID.""" - response = requests.get(f"{CLINGEN_API_URL}/{clingen_allele_id}") +@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) +async def get_clingen_allele_data(clingen_allele_id: str) -> Optional[dict]: + """Retrieve full allele data from the ClinGen Allele Registry. + + Results are automatically cached for 24 hours using aiocache with configurable backend. + + Args: + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456 or PA123456). + + Returns: + Full JSON response from the ClinGen API, or None if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns None). + """ + loop = asyncio.get_running_loop() + response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_allele_id}") + + if response.status_code == 404: + return None + if response.status_code != 200: - logger.error(f"Failed to query ClinGen API for {clingen_allele_id}: {response.status_code}") - return [] + response.raise_for_status() + + return response.json() + + +async def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: + """Retrieve canonical PA IDs from the ClinGen API for a given ClinGen allele ID. + + Uses the cached allele data from `get_clingen_allele_data` to avoid redundant API calls. - data = response.json() + Args: + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) + + Returns: + List of canonical PA IDs associated with the allele. Returns empty list if + the allele has no MANE transcripts or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty list). + """ + data = await get_clingen_allele_data(clingen_allele_id) + if data is None: + return [] pa_ids = [] if data.get("transcriptAlleles"): @@ -26,20 +71,154 @@ def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: return pa_ids -def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: - """Retrieve all matching registered transcript CA IDs for a given PA ID from the ClinGen API.""" - response = requests.get(f"{CLINGEN_API_URL}/{clingen_pa_id}") - if response.status_code != 200: - logger.error(f"Failed to query ClinGen API for {clingen_pa_id}: {response.status_code}") - return [] +async def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: + """Retrieve matching registered transcript CA IDs for a given PA ID from the ClinGen API. + + Uses the cached allele data from `get_clingen_allele_data` to avoid redundant API calls. + + Args: + clingen_pa_id: ClinGen protein allele ID to query (e.g., PA123456) - data = response.json() + Returns: + List of matching registered transcript CA IDs. Returns empty list if no + matching transcripts are found or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty list). + """ + data = await get_clingen_allele_data(clingen_pa_id) + if data is None: + return [] ca_ids = [] if data.get("aminoAcidAlleles"): for allele in data["aminoAcidAlleles"]: if allele.get("matchingRegisteredTranscripts"): - # @id field returns url; the last component is the PA ID - ca_ids.extend([allele["@id"].split("/")[-1] for allele in allele["matchingRegisteredTranscripts"]]) + # @id field returns URL; the last component is the transcript CA ID + ca_ids.extend( + [transcript["@id"].split("/")[-1] for transcript in allele["matchingRegisteredTranscripts"]] + ) return ca_ids + + +async def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str: + """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID. + + Uses the cached allele data from `get_clingen_allele_data` to avoid redundant API calls. + + Returns empty string when no ClinVar association exists or when the allele doesn't exist + in ClinGen's registry (404). + + Args: + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) + + Returns: + Associated ClinVar allele ID as a string, or empty string if no association exists + or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty string). + """ + data = await get_clingen_allele_data(clingen_allele_id) + if data is None: + return "" + + clinvar_allele_id = data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") + if clinvar_allele_id: + return str(clinvar_allele_id) + + return "" + + +def extract_hgvs_from_ca_allele_data( + data: dict, + target_is_coding: bool, + transcript_accession: Optional[str], +) -> tuple[Optional[str], Optional[str], Optional[str]]: + """Extract HGVS strings from ClinGen allele data for a CA (canonical allele) ID. + + Parses the ClinGen API response to find GRCh38 genomic HGVS, coding HGVS + matching the target transcript (or MANE fallback), and protein HGVS. + + Args: + data: Parsed JSON response from the ClinGen Allele Registry API. + target_is_coding: Whether the score set target is protein-coding. + transcript_accession: Specific transcript accession to match, or None to use MANE. + + Returns: + Tuple of (hgvs_g, hgvs_c, hgvs_p), any of which may be None. + """ + hgvs_g: Optional[str] = None + hgvs_c: Optional[str] = None + hgvs_p: Optional[str] = None + + if data.get("genomicAlleles"): + for allele in data["genomicAlleles"]: + if allele.get("referenceGenome") == "GRCh38" and allele.get("hgvs"): + hgvs_g = allele["hgvs"][0] + break + + if target_is_coding and data.get("transcriptAlleles"): + if transcript_accession: + for allele in data["transcriptAlleles"]: + if allele.get("hgvs"): + for hgvs_string in allele["hgvs"]: + hgvs_reference_sequence = hgvs_string.split(":")[0] + if transcript_accession == hgvs_reference_sequence: + hgvs_c = hgvs_string + break + if hgvs_c: + if allele.get("proteinEffect"): + hgvs_p = allele["proteinEffect"].get("hgvs") + break + else: + # No transcript specified; use MANE if available + for allele in data["transcriptAlleles"]: + if allele.get("MANE"): + hgvs_c = allele["MANE"].get("nucleotide", {}).get("RefSeq", {}).get("hgvs") + hgvs_p = allele["MANE"].get("protein", {}).get("RefSeq", {}).get("hgvs") + break + + return hgvs_g, hgvs_c, hgvs_p + + +def extract_hgvs_from_pa_allele_data(data: dict) -> tuple[Optional[str], Optional[str], Optional[str]]: + """Extract HGVS strings from ClinGen allele data for a PA (protein allele) ID. + + For PA alleles, only hgvs_p is extracted from aminoAcidAlleles. + + Args: + data: Parsed JSON response from the ClinGen Allele Registry API. + + Returns: + Tuple of (None, None, hgvs_p), where hgvs_p may be None. + """ + hgvs_p: Optional[str] = None + + if data.get("aminoAcidAlleles"): + for allele in data["aminoAcidAlleles"]: + if allele.get("hgvs"): + hgvs_p = allele["hgvs"][0] + break + + return None, None, hgvs_p + + +def expand_allele_ids(clingen_allele_ids: list[Optional[str]]) -> set[str]: + """Expand comma-separated multi-variant ClinGen allele IDs into individual IDs. + + Multi-variant alleles may contain multiple comma-separated ClinGen IDs. + This function normalizes them into individual IDs for independent processing. + """ + expanded: set[str] = set() + for allele_id in clingen_allele_ids: + if not allele_id: + continue + if "," in allele_id: + expanded.update(single_id.strip() for single_id in allele_id.split(",")) + else: + expanded.add(allele_id) + return expanded diff --git a/src/mavedb/lib/clingen/cache.py b/src/mavedb/lib/clingen/cache.py new file mode 100644 index 000000000..3f160d829 --- /dev/null +++ b/src/mavedb/lib/clingen/cache.py @@ -0,0 +1,120 @@ +"""Cache configuration for ClinGen API requests. + +This module provides centralized cache configuration for ClinGen API calls that works +from both worker and API contexts. The cache backend is configurable via environment +variables, enabling different backends for dev/test/prod environments. + +The caching layer significantly reduces redundant API calls to ClinGen's Allele +Registry when refreshing ClinVar controls across multiple months/years. With a +24-hour TTL, subsequent jobs within the cache window experience 100% cache hit +rates, eliminating unnecessary API load. + +Note: Configuration is evaluated at module import time (when decorators are applied). +For testing purposes, use get_cache_configuration() to retrieve config with different +environment variables. +""" + +import logging +import os + +from aiocache import Cache + +logger = logging.getLogger(__name__) + +# Cache constants +CACHE_KEY_PREFIX = "mavedb:clingen" +CACHE_KEY_VERSION = "v1" +CACHE_TTL_SECONDS = 86400 # 24 hours +# aiocache default is 5s, which times out under connection pool contention when +# concurrent annotation jobs all hit Redis simultaneously. +CACHE_TIMEOUT_SECONDS = 30 + + +def get_cache_configuration(backend=None, redis_host=None, redis_port=None, redis_ssl=None): + """Get cache configuration based on environment variables or provided parameters. + + This function is provided for testing purposes, allowing configuration to be + retrieved with custom parameters. In production, module-level CACHE_CLASS and + CACHE_CONFIG are used (evaluated at import time). + + Args: + backend: Cache backend ('redis' or 'memory'). If None, reads from CLINGEN_CACHE_BACKEND env var. + redis_host: Redis host. If None, reads from CLINGEN_REDIS_HOST env var. + redis_port: Redis port. If None, reads from CLINGEN_REDIS_PORT env var. + redis_ssl: Redis SSL enabled. If None, reads from CLINGEN_REDIS_SSL env var. + + Returns: + tuple: (cache_class, cache_config_dict) + + Raises: + ValueError: If backend is not 'redis' or 'memory' + """ + cache_backend = backend or os.getenv("CLINGEN_CACHE_BACKEND", "redis") + + if cache_backend == "redis": + host = redis_host or os.getenv("CLINGEN_REDIS_HOST", "redis") + port = redis_port or int(os.getenv("CLINGEN_REDIS_PORT", "6379")) + ssl = redis_ssl if redis_ssl is not None else os.getenv("CLINGEN_REDIS_SSL", "false").lower() == "true" + + cache_class = Cache.REDIS + cache_config = { + "endpoint": host, + "port": port, + "ssl": ssl, + "namespace": CACHE_KEY_PREFIX, + "timeout": CACHE_TIMEOUT_SECONDS, + } + return cache_class, cache_config + + elif cache_backend == "memory": + cache_class = Cache.MEMORY + cache_config = { + "namespace": CACHE_KEY_PREFIX, + "timeout": CACHE_TIMEOUT_SECONDS, + } + return cache_class, cache_config + + else: + raise ValueError(f"Unsupported cache backend: {cache_backend}. Valid options are 'redis' or 'memory'.") + + +# Module-level configuration (evaluated at import time for decorator usage) +# The @cached decorators in allele_registry.py use these at function definition time +CACHE_CLASS, CACHE_CONFIG = get_cache_configuration() + +# Log the configuration that was selected +backend_name = "memory" if CACHE_CLASS == Cache.MEMORY else CACHE_CONFIG.get("endpoint") or "unknown" +logger.info(f"ClinGen cache initialized: backend={backend_name}, TTL={CACHE_TTL_SECONDS}s, prefix={CACHE_KEY_PREFIX}") + + +def clingen_cache_key_builder(func, *args, **kwargs): + """Build cache key for ClinGen API functions. + + The key includes a version prefix to enable cache invalidation if the + response format changes in the future. Different ClinGen API functions + (get_canonical_pa_ids, get_matching_registered_ca_ids, get_associated_clinvar_allele_id) + are cached separately as they return different data for the same allele ID. + + Cache key format: v1:{function_name}:{allele_id} + The namespace prefix (mavedb:clingen) is added by aiocache automatically. + + Full Redis key example: mavedb:clingen:v1:get_associated_clinvar_allele_id:CA123456 + + Args: + func: The decorated function being cached + *args: Positional arguments (first arg is always the allele_id for ClinGen functions) + **kwargs: Keyword arguments (may contain clingen_allele_id or clingen_pa_id) + + Returns: + Cache key string in format: v1:{function_name}:{allele_id} + """ + function_name = func.__name__ + + # First positional arg is always the allele ID for ClinGen API functions + # Fallback to kwargs for flexibility (though not currently used) + allele_id = args[0] if args else kwargs.get("clingen_allele_id") or kwargs.get("clingen_pa_id") + + if not allele_id: + raise ValueError(f"Cannot build cache key for {function_name}: allele_id is required") + + return f"{CACHE_KEY_VERSION}:{function_name}:{allele_id}" diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 2bc6979be..5787501f6 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -14,8 +14,8 @@ LDH_ENTITY_ENDPOINT = "maveDb" # for some reason, not the same :/ DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100 +CLINGEN_CACHE_WARMING_CONCURRENCY = 5 +"""Maximum number of concurrent requests to make to the ClinGen API when pre-warming the cache for mapped variants.""" LDH_SUBMISSION_ENDPOINT = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" LDH_ACCESS_ENDPOINT = os.getenv("LDH_ACCESS_ENDPOINT", "https://genboree.org/ldh") LDH_MAVE_ACCESS_ENDPOINT = f"{LDH_ACCESS_ENDPOINT}/{LDH_ENTITY_NAME}/id" - -LINKED_DATA_RETRY_THRESHOLD = 0.95 diff --git a/src/mavedb/lib/clingen/services.py b/src/mavedb/lib/clingen/services.py index 0450d61d8..7bf7e8542 100644 --- a/src/mavedb/lib/clingen/services.py +++ b/src/mavedb/lib/clingen/services.py @@ -4,12 +4,11 @@ import time from datetime import datetime from typing import Optional, Union -from urllib import parse import requests from jose import jwt -from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.logging.context import format_raised_exception_info_as_dict, logging_context, save_to_logging_context from mavedb.lib.types.clingen import ClinGenAllele, ClinGenSubmissionError, LdhSubmission from mavedb.lib.utils import batched @@ -279,50 +278,6 @@ def _existing_jwt(self) -> Optional[str]: return None -def get_clingen_variation(urn: str) -> Optional[dict]: - """ - Fetches ClinGen variation data for a given URN (Uniform Resource Name) from the Linked Data Hub. - - Args: - urn (str): The URN of the variation to fetch. - - Returns: - Optional[dict]: A dictionary containing the variation data if the request is successful, - or None if the request fails. - """ - response = requests.get( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - if response.status_code == 200: - return response.json() - else: - logger.error(f"Failed to fetch data for URN {urn}: {response.status_code} - {response.text}") - return None - - -def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[str]: - """ - Extracts the ClinGen allele ID from a given variation dictionary. - - Args: - variation (Optional[dict]): A dictionary containing variation data, otherwise None. - - Returns: - Optional[str]: The ClinGen allele ID if found, otherwise None. - """ - if not variation: - return None - - try: - return variation["data"]["ldFor"]["Variant"][0]["entId"] - except (KeyError, IndexError) as exc: - save_to_logging_context(format_raised_exception_info_as_dict(exc)) - logger.error("Failed to extract ClinGen allele ID from variation data.", extra=logging_context()) - return None - - def get_allele_registry_associations( content_submissions: list[str], submission_response: list[Union[ClinGenAllele, ClinGenSubmissionError]] ) -> dict[str, str]: diff --git a/src/mavedb/lib/clinvar/__init__.py b/src/mavedb/lib/clinvar/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/lib/clinvar/constants.py b/src/mavedb/lib/clinvar/constants.py new file mode 100644 index 000000000..e70c4fee2 --- /dev/null +++ b/src/mavedb/lib/clinvar/constants.py @@ -0,0 +1,31 @@ +import os +from pathlib import Path + +from urllib3.util.retry import Retry + +TSV_VARIANT_ARCHIVE_BASE_URL = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive" + +NCBI_REQUEST_HEADERS = { + "User-Agent": "MaveDB/1.0 (https://mavedb.org)", +} +"""NCBI's FTP servers aggressively throttle requests with non-descriptive User-Agent headers, returning 503. +MaveDB sets a custom User-Agent to identify itself and avoid unnecessary throttling when fetching ClinVar data. +""" + +CLINVAR_CACHE_DIR = Path(os.getenv("CLINVAR_CACHE_DIR", Path.home() / ".cache" / "mavedb" / "clinvar")) +"""File-based cache directory for ClinVar TSV files. These files are large (5-50+ MB) so we store them on disk instead of Redis. By default, this is set to a user-specific cache directory under the home directory, but it can be overridden by setting the CLINVAR_CACHE_DIR environment variable. +""" + +NCBI_RETRY_STRATEGY = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504], +) +"""Retries protect against transient NCBI failures (brief 500s, connection resets). +Now that ClinVar versions are fetched sequentially (not concurrently), aggressive +backoff for throttling is unnecessary — a modest retry with short backoff suffices. +""" + +CLINVAR_FIELDS_TO_KEEP = ("GeneSymbol", "ClinicalSignificance", "ReviewStatus") +"""Only these fields are extracted from each ClinVar TSV row and cached. The full TSV has ~30 columns; trimming to only what we need shrinks the cached pickle from hundreds of MB to tens of MB and speeds up load times. +""" diff --git a/src/mavedb/lib/clinvar/utils.py b/src/mavedb/lib/clinvar/utils.py new file mode 100644 index 000000000..689e369ea --- /dev/null +++ b/src/mavedb/lib/clinvar/utils.py @@ -0,0 +1,198 @@ +import asyncio +import csv +import gzip +import hashlib +import io +import logging +import pickle +import sys +from datetime import datetime +from pathlib import Path +from typing import Dict + +import requests +from filelock import FileLock +from requests.adapters import HTTPAdapter + +from mavedb.lib.clinvar.constants import ( + CLINVAR_CACHE_DIR, + CLINVAR_FIELDS_TO_KEEP, + NCBI_REQUEST_HEADERS, + NCBI_RETRY_STRATEGY, + TSV_VARIANT_ARCHIVE_BASE_URL, +) + +_FIELDS_HASH = hashlib.sha256("|".join(CLINVAR_FIELDS_TO_KEEP).encode()).hexdigest()[:8] +"""Short hash of the kept fields, embedded in the cache filename so that adding/removing fields automatically invalidates stale caches. This ensures that if we change which fields we keep from the ClinVar TSV, we won't accidentally use old cached data that doesn't have the new fields.""" + +logger = logging.getLogger(__name__) + + +def _ncbi_session() -> requests.Session: + session = requests.Session() + session.headers.update(NCBI_REQUEST_HEADERS) + adapter = HTTPAdapter(max_retries=NCBI_RETRY_STRATEGY) + session.mount("https://", adapter) + session.mount("http://", adapter) + return session + + +def validate_clinvar_variant_summary_date(month: int, year: int) -> None: + """ + Validates the provided month and year for fetching ClinVar variant summary data. + + Ensures that: + - The year is not earlier than 2015 (ClinVar archived data is only available from 2015 onwards). + - The year is not in the future. + - If the year is the current year, the month is not in the future. + + Raises: + ValueError: If the provided year is before 2015, in the future, or if the month is in the future for the current year. + + Args: + month (int): The month to validate (1-12). + year (int): The year to validate. + """ + current_year = datetime.now().year + current_month = datetime.now().month + + if month < 1 or month > 12: + raise ValueError("Month must be an integer between 1 and 12.") + + if year < 2015 or (year == 2015 and month < 2): + raise ValueError("ClinVar archived data is only available from February 2015 onwards.") + elif year > current_year: + raise ValueError("Cannot fetch ClinVar data for future years.") + elif year == current_year and month > current_month: + raise ValueError("Cannot fetch ClinVar data for future months.") + + +async def fetch_clinvar_variant_data(month: int, year: int) -> Dict[str, Dict[str, str]]: + """ + Fetch, parse, and cache ClinVar variant summary data for a given month/year. + + Downloads the gzipped TSV from NCBI (with retry), parses it, trims each row + to only the fields we need (see ``CLINVAR_FIELDS_TO_KEEP``), and caches the + resulting dict as a pickle file on disk. Both download and parse run in an + executor to avoid blocking the event loop — the modern 350 MB+ files take + significant CPU time to decompress and parse. + + On subsequent calls the cached pickle is loaded directly (also in an executor), + skipping both the network fetch and the expensive parse. + + Args: + month: The month for which to fetch the variant summary (1-12). + year: The year for which to fetch the variant summary. + + Returns: + A dict mapping AlleleID (str) to a dict of the kept fields, e.g. + ``{"VCV123": {"GeneSymbol": "BRCA1", "ClinicalSignificance": "Pathogenic", "ReviewStatus": "..."}}``. + + Raises: + requests.RequestException: If the file cannot be downloaded from either location. + ValueError: If the provided month or year is invalid. + """ + validate_clinvar_variant_summary_date(month, year) + + cache_file = CLINVAR_CACHE_DIR / f"variant_summary_{year}-{month:02d}.parsed.{_FIELDS_HASH}.pkl" + + # Archival ClinVar files are immutable — cache never expires. + if cache_file.exists(): + logger.debug(f"Cache hit for parsed ClinVar {year}-{month:02d}") + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, _load_parsed_cache, cache_file) + + logger.debug(f"Cache miss — fetching and parsing ClinVar {year}-{month:02d}") + + # ClinVar stores recent files at the top level and older files in + # year-based subdirectories. The cadence at which files are moved is not + # documented, so we try both locations with a preference for the top-level. + url_top_level = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/variant_summary_{year}-{month:02d}.txt.gz" + url_archive = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/{year}/variant_summary_{year}-{month:02d}.txt.gz" + + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, _fetch_parse_and_cache, url_top_level, url_archive, cache_file, year, month) + + +def _load_parsed_cache(cache_file: Path) -> Dict[str, Dict[str, str]]: + """Load a previously cached parsed dict from a pickle file.""" + with open(cache_file, "rb") as f: + return pickle.load(f) # noqa: S301 — trusted local cache written by _fetch_parse_and_cache + + +def _fetch_parse_and_cache( + url_top_level: str, + url_archive: str, + cache_file: Path, + year: int, + month: int, +) -> Dict[str, Dict[str, str]]: + """Download ClinVar TSV, parse to a trimmed dict, and cache as pickle. + + Runs in an executor — all operations here are blocking (network I/O + CPU). + + A per-version file lock prevents two concurrent workers from downloading + and parsing the same version simultaneously, which would double peak memory + usage. The second worker acquires the lock after the first finishes and + writes the cache, then finds the cache file already present and returns + early without re-downloading. + """ + CLINVAR_CACHE_DIR.mkdir(parents=True, exist_ok=True) + lock_file = CLINVAR_CACHE_DIR / f"variant_summary_{year}-{month:02d}.lock" + + with FileLock(lock_file): + # Re-check cache inside the lock — another worker may have populated it + # while we were waiting. + if cache_file.exists(): + logger.debug(f"Cache hit (post-lock) for parsed ClinVar {year}-{month:02d}") + return _load_parsed_cache(cache_file) + + session = _ncbi_session() + try: + response = session.get(url_top_level, stream=True) + response.raise_for_status() + except requests.exceptions.HTTPError: + response = session.get(url_archive, stream=True) + response.raise_for_status() + + # Stream the compressed response directly into the gzip decompressor + # rather than loading all bytes into memory first. On recent ClinVar + # files the compressed payload is 50–350 MB; buffering it as bytes and + # then calling readlines() on the decompressed stream would peak at + # 2–3 GB per job. Streaming + lazy CSV iteration keeps peak memory to + # the size of the trimmed output dict (tens of MB). + buf = io.BytesIO() + for chunk in response.iter_content(chunk_size=1 << 20): # 1 MB chunks + buf.write(chunk) + buf.seek(0) + + # Parse the gzipped TSV, keeping only the fields we actually use. + # Some old ClinVar files have fields larger than the default csv limit. + default_csv_field_size_limit = csv.field_size_limit() + try: + csv.field_size_limit(sys.maxsize) + # Iterate lazily — avoids materialising all decompressed lines + # as a list (which would be 1.5–2 GB for a modern TSV). + with gzip.open(filename=buf, mode="rt") as f: + reader = csv.DictReader(f, delimiter="\t") # type: ignore + data: Dict[str, Dict[str, str]] = { + str(row["#AlleleID"]): {field: row[field] for field in CLINVAR_FIELDS_TO_KEEP} for row in reader + } + finally: + csv.field_size_limit(default_csv_field_size_limit) + + # Cache the parsed + trimmed dict to disk so subsequent calls skip both + # the network fetch and the expensive parse. + with open(cache_file, "wb") as f: + pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) + + logger.info(f"Cached parsed ClinVar {year}-{month:02d} to {cache_file} ({len(data)} alleles)") + + # Remove stale cache files for this month/year with a different fields hash. + stale_prefix = f"variant_summary_{year}-{month:02d}.parsed." + for stale in CLINVAR_CACHE_DIR.glob(f"{stale_prefix}*.pkl"): + if stale != cache_file: + stale.unlink(missing_ok=True) + logger.debug(f"Removed stale cache file {stale}") + + return data diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index 8734becba..416ff8b3c 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -168,6 +168,12 @@ class NonexistentMappingResultsError(ValueError): pass +class NonexistentMappingScoresError(ValueError): + """Raised when score set mapping results do not contain mapping scores""" + + pass + + class NonexistentMappingReferenceError(ValueError): """Raised when score set mapping results do not contain a valid reference sequence""" @@ -202,3 +208,21 @@ class UniProtPollingEnqueueError(ValueError): """Raised when a UniProt ID polling job fails to be enqueued despite appearing as if it should have been""" pass + + +class LDHSubmissionFailureError(Exception): + """Raised when submission to ClinGen Linked Data Hub (LDH) fails for all submissions.""" + + pass + + +class PipelineNotFoundError(Exception): + """Raised when a pipeline associated with a job is not found.""" + + pass + + +class NoMappedVariantsError(Exception): + """Raised when no variants were mapped during the variant mapping process.""" + + pass diff --git a/src/mavedb/lib/gnomad.py b/src/mavedb/lib/gnomad.py index 02a7da2d2..9bfa0fec9 100644 --- a/src/mavedb/lib/gnomad.py +++ b/src/mavedb/lib/gnomad.py @@ -1,19 +1,21 @@ +import logging import os import re -import logging from typing import Any, Sequence, Union -from sqlalchemy import text, select, Row +from sqlalchemy import Connection, Row, select, text from sqlalchemy.orm import Session +from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.utils import batched -from mavedb.db.athena import engine as athena_engine +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant GNOMAD_DB_NAME = "gnomAD" -GNOMAD_DATA_VERSION = os.getenv("GNOMAD_DATA_VERSION") +GNOMAD_DATA_VERSION = os.getenv("GNOMAD_DATA_VERSION", "v4.1") # e.g., "v4.1" logger = logging.getLogger(__name__) @@ -66,7 +68,9 @@ def allele_list_from_list_like_string(alleles_string: str) -> list[str]: return alleles -def gnomad_variant_data_for_caids(caids: Sequence[str]) -> Sequence[Row[Any]]: # pragma: no cover +def gnomad_variant_data_for_caids( + athena_session: Connection, caids: Sequence[str] +) -> Sequence[Row[Any]]: # pragma: no cover """ Fetches variant rows from the gnomAD table for a list of CAIDs. Athena has a maximum character limit of 262144 in queries. CAIDs are about 12 characters long on average + 4 for two quotes, a comma and a space. Chunk our list @@ -94,36 +98,33 @@ def gnomad_variant_data_for_caids(caids: Sequence[str]) -> Sequence[Row[Any]]: caid_strs = [",".join(f"'{caid}'" for caid in chunk) for chunk in chunked_caids] save_to_logging_context({"num_caids": len(caids), "num_chunks": len(caid_strs)}) - with athena_engine.connect() as athena_connection: - logger.debug(msg="Connected to Athena", extra=logging_context()) - - result_rows: list[Row[Any]] = [] - for chunk_index, caid_str in enumerate(caid_strs): - athena_query = f""" - SELECT - "locus.contig", - "locus.position", - "alleles", - "caid", - "joint.freq.all.ac", - "joint.freq.all.an", - "joint.fafmax.faf95_max_gen_anc", - "joint.fafmax.faf95_max" - FROM - {gnomad_table_name()} - WHERE - caid IN ({caid_str}) - """ - logger.debug( - msg=f"Fetching gnomAD variants from Athena (batch {chunk_index}) with query:\n{athena_query}", - extra=logging_context(), - ) + result_rows: list[Row[Any]] = [] + for chunk_index, caid_str in enumerate(caid_strs): + athena_query = f""" + SELECT + "locus.contig", + "locus.position", + "alleles", + "caid", + "joint.freq.all.ac", + "joint.freq.all.an", + "joint.fafmax.faf95_max_gen_anc", + "joint.fafmax.faf95_max" + FROM + {gnomad_table_name()} + WHERE + caid IN ({caid_str}) + """ + logger.debug( + msg=f"Fetching gnomAD variants from Athena (batch {chunk_index}) with query:\n{athena_query}", + extra=logging_context(), + ) - result = athena_connection.execute(text(athena_query)) - rows = result.fetchall() - result_rows.extend(rows) + result = athena_session.execute(text(athena_query)) + rows = result.fetchall() + result_rows.extend(rows) - logger.debug(f"Fetched {len(rows)} gnomAD variants from Athena (batch {chunk_index}).") + logger.debug(f"Fetched {len(rows)} gnomAD variants from Athena (batch {chunk_index}).") save_to_logging_context({"num_gnomad_variant_rows_fetched": len(result_rows)}) logger.debug(msg="Done fetching gnomAD variants from Athena", extra=logging_context()) @@ -146,6 +147,7 @@ def link_gnomad_variants_to_mapped_variants( logger.debug(msg="Linking gnomAD variants to mapped variants", extra=logging_context()) linked_gnomad_variants = 0 + annotation_manager = AnnotationStatusManager(db) for index, row in enumerate(gnomad_variant_data, start=1): logger.info( msg=f"Processing gnomAD variant row {index}/{len(gnomad_variant_data)}: {row.caid}", extra=logging_context() @@ -217,6 +219,18 @@ def link_gnomad_variants_to_mapped_variants( linked_gnomad_variants += 1 db.add(gnomad_variant) + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.GNOMAD_ALLELE_FREQUENCY, + version=GNOMAD_DATA_VERSION, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "annotation_metadata": { + "gnomad_db_identifier": gnomad_variant.db_identifier, + } + }, + current=True, + ) logger.debug( msg=f"Linked gnomAD variant {gnomad_variant.db_identifier} to mapped variant {mapped_variant.id} ({mapped_variant.clingen_allele_id})", @@ -227,6 +241,8 @@ def link_gnomad_variants_to_mapped_variants( f"Linked {len(mapped_variants_with_caids)} mapped variants with CAID {row.caid} to gnomAD variant {gnomad_identifier_for_variant}. ({index}/{len(gnomad_variant_data)})" ) + annotation_manager.flush() + save_to_logging_context({"linked_gnomad_variants": linked_gnomad_variants}) logger.info( msg=f"Linked a total of {linked_gnomad_variants} gnomAD variants to mapped variants.", diff --git a/src/mavedb/lib/logging/canonical.py b/src/mavedb/lib/logging/canonical.py index 430d1f913..bba7beb2d 100644 --- a/src/mavedb/lib/logging/canonical.py +++ b/src/mavedb/lib/logging/canonical.py @@ -9,6 +9,7 @@ from mavedb import __version__ from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.logging.models import LogType, Source +from mavedb.lib.types.workflow import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -27,6 +28,10 @@ async def log_job(ctx: dict) -> None: if not result: logger.warning(msg=f"Job finished, but could not retrieve a job result for job {job_id}.", extra=log_context) else: + job_result = result.result + if isinstance(job_result, JobExecutionOutcome): + job_result = job_result.to_dict() + log_context = { **log_context, **{ @@ -36,7 +41,7 @@ async def log_job(ctx: dict) -> None: "job_name": result.function, "job_attempt": result.job_try, "arq_success": result.success, - "job_result": result.result, + "job_result": job_result, }, } diff --git a/src/mavedb/lib/logging/context.py b/src/mavedb/lib/logging/context.py index 6771f7606..075efb586 100644 --- a/src/mavedb/lib/logging/context.py +++ b/src/mavedb/lib/logging/context.py @@ -55,15 +55,7 @@ def save_to_logging_context(ctx: dict) -> dict: return {} for k, v in ctx.items(): - # Don't overwrite existing context mappings but create a list if a duplicated key is added. - if k in context: - existing_ctx = context[k] - if isinstance(existing_ctx, list): - context[k].append(v) - else: - context[k] = [existing_ctx, v] - else: - context[k] = v + context[k] = v return context.data diff --git a/src/mavedb/lib/mapping.py b/src/mavedb/lib/mapping.py index d3915f53e..0f601e85a 100644 --- a/src/mavedb/lib/mapping.py +++ b/src/mavedb/lib/mapping.py @@ -9,6 +9,8 @@ "c": "cdna", } +EXCLUDED_PREMAPPED_ANNOTATION_KEYS = {"sequence"} + class VRSMap: url: str diff --git a/src/mavedb/lib/slack.py b/src/mavedb/lib/slack.py index 71f16aaec..2bf3a34d6 100644 --- a/src/mavedb/lib/slack.py +++ b/src/mavedb/lib/slack.py @@ -7,9 +7,10 @@ from slack_sdk.webhook import WebhookClient - logger = logging.getLogger(__name__) +_BLOCK_TEXT_MAX = 2000 + def find_traceback_locations(): _, _, tb = sys.exc_info() @@ -21,32 +22,139 @@ def find_traceback_locations(): ] -def send_slack_message(text: str): +def _send_slack_blocks(fallback_text: str, blocks: list[dict]) -> None: + """Send a Slack message with Block Kit formatting. Falls back to print when no webhook URL is set.""" slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL") - if slack_webhook_url is not None and len(slack_webhook_url) > 0: + if slack_webhook_url: client = WebhookClient(url=slack_webhook_url) - client.send( - text=text, - blocks=[ - { - "type": "section", - "text": {"type": "plain_text", "text": text}, - } - ], - ) + client.send(text=fallback_text, blocks=blocks) else: - print(f"EXCEPTION_HANDLER: {text}") + print(f"SLACK: {fallback_text}") + + +def send_slack_message(text: str): + _send_slack_blocks( + fallback_text=text, + blocks=[{"type": "section", "text": {"type": "plain_text", "text": text}}], + ) def send_slack_error(err, request=None): - text = {"type": err.__class__.__name__, "exception": str(err), "location": find_traceback_locations()} + try: + text = {"type": err.__class__.__name__, "exception": str(err), "location": find_traceback_locations()} + + if request: + text["client"] = str(request.client.host) + text["request"] = f"{request.method} {request.url}" + + text = json.dumps(text) + send_slack_message(text) + except Exception: + logger.critical("Failed to send Slack error notification", exc_info=True) + - if request: - text["client"] = str(request.client.host) - text["request"] = f"{request.method} {request.url}" +def _retry_status_text(retry_count: int, max_retries: int, will_retry: bool) -> str: + """Format a human-readable retry status string for Slack notifications. + + retry_count is 0-indexed (0 = first attempt). total attempts = max_retries + 1. + """ + attempt = retry_count + 1 + total = max_retries + 1 + if will_retry: + return f"Attempt {attempt} of {total} — will retry" + + return f"Attempt {attempt} of {total} — this job will not be retried" + + +def send_slack_job_failure( + job_urn: str, + job_function: str, + reason: str, + failure_category: str, + retry_count: int = 0, + max_retries: int = 0, + will_retry: bool = False, +) -> None: + """Send a structured Slack alert for a controlled job failure (FAILED outcome).""" + try: + retry_text = _retry_status_text(retry_count, max_retries, will_retry) + blocks: list[dict] = [ + {"type": "header", "text": {"type": "plain_text", "text": "⚠️ Job Failed"}}, + { + "type": "section", + "fields": [ + {"type": "mrkdwn", "text": f"*Job URN*\n`{job_urn}`"}, + {"type": "mrkdwn", "text": f"*Function*\n`{job_function}`"}, + {"type": "mrkdwn", "text": f"*Category*\n{failure_category or 'unknown'}"}, + {"type": "mrkdwn", "text": f"*Retry*\n{retry_text}"}, + ], + }, + {"type": "divider"}, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*Reason*\n{reason or 'No reason provided'}"[:_BLOCK_TEXT_MAX], + }, + }, + ] + fallback = f"Job Failed: {job_urn} ({job_function}) — {reason} [{retry_text}]" + _send_slack_blocks(fallback, blocks) + except Exception: + logger.critical("Failed to send Slack job failure notification", exc_info=True) + + +def send_slack_job_error( + job_urn: str, + job_function: str, + err: Exception, + failure_category: str = "", + retry_count: int = 0, + max_retries: int = 0, + will_retry: bool = False, +) -> None: + """Send a structured Slack alert for an unhandled job exception (ERRORED outcome).""" + try: + locations = find_traceback_locations() + location_lines = [f"`{fn}:{lineno}` in `{name}`" for fn, lineno, name in locations] + retry_text = _retry_status_text(retry_count, max_retries, will_retry) + + blocks: list[dict] = [ + {"type": "header", "text": {"type": "plain_text", "text": "\U0001f6a8 Job Errored"}}, + { + "type": "section", + "fields": [ + {"type": "mrkdwn", "text": f"*Job URN*\n`{job_urn}`"}, + {"type": "mrkdwn", "text": f"*Function*\n`{job_function}`"}, + {"type": "mrkdwn", "text": f"*Exception*\n`{err.__class__.__name__}`"}, + {"type": "mrkdwn", "text": f"*Category*\n{failure_category or 'unknown'}"}, + {"type": "mrkdwn", "text": f"*Retry*\n{retry_text}"}, + ], + }, + {"type": "divider"}, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*Message*\n```{str(err)}```"[:_BLOCK_TEXT_MAX], + }, + }, + ] + if location_lines: + blocks.append( + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": ("*Location*\n" + "\n".join(location_lines))[:_BLOCK_TEXT_MAX], + }, + } + ) - text = json.dumps(text) - send_slack_message(text) + fallback = f"Job Errored: {job_urn} ({job_function}) — {err.__class__.__name__}: {err} [{retry_text}]" + _send_slack_blocks(fallback, blocks) + except Exception: + logger.critical("Failed to send Slack job error notification", exc_info=True) def log_and_send_slack_message(msg: str, ctx: dict[str, Any], level: int): diff --git a/src/mavedb/lib/target_genes.py b/src/mavedb/lib/target_genes.py index 61f206534..88ae5e29d 100644 --- a/src/mavedb/lib/target_genes.py +++ b/src/mavedb/lib/target_genes.py @@ -187,3 +187,67 @@ def search_target_genes( ) return target_genes + + +def get_target_coding_info(score_set: ScoreSet) -> tuple[bool, Optional[str]]: + """Extract target coding status and transcript accession for a single-target score set. + + Determines whether the score set target is protein-coding and identifies + the transcript accession to use for HGVS lookups. For accession-based targets, + uses the accession if it's an NM or ENST transcript. For sequence-based targets, + prefers cDNA accession from post-mapped metadata. + + Args: + score_set: The ScoreSet to analyze. + + Returns: + Tuple of (target_is_coding, transcript_accession). transcript_accession + may be None even for coding targets if no transcript could be determined. + + Raises: + NotImplementedError: If the score set has multiple targets. + ValueError: If ambiguous cDNA accessions are found in post-mapped metadata. + """ + # TODO#712: Support multi-target score sets. Each variant's hgvs prefix + # (e.g. "TARGET_NAME:c.1A>G") identifies which target it belongs to. + # This function should return a dict[str, tuple[bool, Optional[str]]] + # keyed by target name, and the job loop should resolve per-variant. + if len(score_set.target_genes) != 1: + raise NotImplementedError("Populating mapped HGVS for multi-target score sets is not yet supported.") + + target = score_set.target_genes[0] + if target.category != "protein_coding": + return False, None + + transcript_accession: Optional[str] = None + + # Accession-based: use transcript accession if it's an NM or ENST transcript + if target.target_accession and target.target_accession.accession: + if target.target_accession.accession.startswith(("NM", "ENST")): + transcript_accession = target.target_accession.accession + + # Sequence-based: prefer cDNA accession from post-mapped metadata + if target.post_mapped_metadata: + assert isinstance(target.post_mapped_metadata, dict) + cdna_accessions = target.post_mapped_metadata.get("cdna", {}).get("sequence_accessions") + if cdna_accessions: + if len(cdna_accessions) == 1: + transcript_accession = cdna_accessions[0] + else: + raise ValueError( + f"Multiple cDNA accessions found in post-mapped metadata for target {target.name} " + f"in score set {score_set.urn}. Cannot determine which to use." + ) + else: + logger.warning( + f"No cDNA accession found in post-mapped metadata for target {target.name} in score set " + f"{score_set.urn}. If variants are at the nucleotide level, will assume MANE transcript " + f"from ClinGen." + ) + else: + logger.warning( + f"No post-mapped metadata for target {target.name} in score set {score_set.urn}. " + f"Will assume MANE transcript from ClinGen for coding variant." + ) + + return True, transcript_accession diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py index 708b6c17e..451c827c7 100644 --- a/src/mavedb/lib/types/clingen.py +++ b/src/mavedb/lib/types/clingen.py @@ -1,6 +1,6 @@ from typing import Any, Literal, Optional, TypedDict -from typing_extensions import NotRequired +from typing_extensions import NotRequired, TypeGuard # See: https://ldh.genome.network/docs/ldh/submit.html#content-submission-body @@ -164,3 +164,7 @@ class ClinGenAlleleDefinition(TypedDict): "position": str, }, ) + + +def is_car_submission_error(err: ClinGenAllele | ClinGenSubmissionError) -> TypeGuard[ClinGenSubmissionError]: + return "errorType" in err and "hgvs" in err diff --git a/src/mavedb/lib/types/workflow.py b/src/mavedb/lib/types/workflow.py new file mode 100644 index 000000000..459d4337d --- /dev/null +++ b/src/mavedb/lib/types/workflow.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, NotRequired, TypedDict + +from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus + + +@dataclass +class JobExecutionOutcome: + """Result of a job execution, returned by job functions to the management layer. + + Use factory methods to construct instances rather than direct construction: + - ``JobExecutionOutcome.succeeded()`` — job completed successfully + - ``JobExecutionOutcome.failed()`` — controlled business logic failure + - ``JobExecutionOutcome.errored()`` — unhandled exception / system crash + - ``JobExecutionOutcome.skipped()`` — job intentionally not executed + """ + + status: JobStatus + data: dict[str, Any] + error: str | None + exception: Exception | None + failure_category: FailureCategory | None = None + + @classmethod + def succeeded(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Job completed successfully.""" + return cls(status=JobStatus.SUCCEEDED, data=data or {}, error=None, exception=None) + + @classmethod + def failed( + cls, reason: str, data: dict[str, Any] | None = None, failure_category: FailureCategory | None = None + ) -> JobExecutionOutcome: + """Controlled failure — job determined the outcome was unsuccessful.""" + return cls( + status=JobStatus.FAILED, data=data or {}, error=reason, exception=None, failure_category=failure_category + ) + + @classmethod + def errored( + cls, exception: Exception, data: dict[str, Any] | None = None, failure_category: FailureCategory | None = None + ) -> JobExecutionOutcome: + """Unhandled exception — job crashed.""" + return cls( + status=JobStatus.ERRORED, + data=data or {}, + error=str(exception), + exception=exception, + failure_category=failure_category, + ) + + @classmethod + def skipped(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Job intentionally not executed.""" + return cls(status=JobStatus.SKIPPED, data=data or {}, error=None, exception=None) + + def to_dict(self) -> dict[str, Any]: + """Return a JSON-serializable dictionary representation. + + Excludes the ``exception`` field since Exception objects are not + JSON-serializable. Use this for logging, ARQ result storage, and + any context where a plain dict is needed. + """ + return { + "status": self.status.value, + "data": self.data, + "error": self.error, + "failure_category": self.failure_category.value if self.failure_category else None, + } + + +class JobDefinition(TypedDict): + key: str + type: str + function: str + params: dict[str, Any] + dependencies: list[tuple[str, DependencyType]] + retry_delay_seconds: NotRequired[int] + + +class PipelineDefinition(TypedDict): + description: str + job_definitions: list[JobDefinition] diff --git a/src/mavedb/lib/urns.py b/src/mavedb/lib/urns.py index e3903ac84..55a59e707 100644 --- a/src/mavedb/lib/urns.py +++ b/src/mavedb/lib/urns.py @@ -153,3 +153,25 @@ def generate_calibration_urn(): :return: A new calibration URN """ return f"urn:mavedb:calibration-{uuid4()}" + + +def generate_pipeline_urn(): + """ + Generate a new URN for a pipeline. + + Pipeline URNs include a 16-digit UUID. + + :return: A new pipeline URN + """ + return f"urn:mavedb:pipeline-{uuid4()}" + + +def generate_job_run_urn(): + """ + Generate a new URN for a job run. + + Job run URNs include a 16-digit UUID. + + :return: A new job run URN + """ + return f"urn:mavedb:job-{uuid4()}" diff --git a/src/mavedb/lib/variant_translations.py b/src/mavedb/lib/variant_translations.py new file mode 100644 index 000000000..ec17cc9c7 --- /dev/null +++ b/src/mavedb/lib/variant_translations.py @@ -0,0 +1,35 @@ +"""Variant translation utilities for managing PA<->CA allele relationships. + +This module provides database operations for the variant_translations table, +which stores relationships between protein allele (PA) and nucleotide allele (CA) +ClinGen IDs. +""" + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.variant_translation import VariantTranslation + + +def upsert_variant_translations(db: Session, translations: list[tuple[str, str]]) -> tuple[int, int]: + """Insert VariantTranslation rows for (aa, nt) pairs that don't already exist. + + Returns (created, existing) counts. + """ + created = 0 + existing = 0 + for aa_clingen_id, nt_clingen_id in translations: + found = db.scalars( + select(VariantTranslation).where( + VariantTranslation.aa_clingen_id == aa_clingen_id, + VariantTranslation.nt_clingen_id == nt_clingen_id, + ) + ).one_or_none() + + if found: + existing += 1 + else: + db.add(VariantTranslation(aa_clingen_id=aa_clingen_id, nt_clingen_id=nt_clingen_id)) + created += 1 + + return created, existing diff --git a/src/mavedb/lib/vep.py b/src/mavedb/lib/vep.py new file mode 100644 index 000000000..f31087451 --- /dev/null +++ b/src/mavedb/lib/vep.py @@ -0,0 +1,169 @@ +"""VEP (Variant Effect Predictor) library functions for functional consequence prediction.""" + +import asyncio +import functools +import logging +from typing import Optional, Sequence + +from mavedb.lib.utils import request_with_backoff + +logger = logging.getLogger(__name__) + +ENSEMBL_API_URL = "https://rest.ensembl.org" + +# List of all possible VEP consequences, in order from most to least severe +VEP_CONSEQUENCES = [ + "transcript_ablation", + "splice_acceptor_variant", + "splice_donor_variant", + "stop_gained", + "frameshift_variant", + "stop_lost", + "start_lost", + "transcript_amplification", + "inframe_insertion", + "inframe_deletion", + "missense_variant", + "disruptive_inframe_insertion", + "disruptive_inframe_deletion", + "protein_altering_variant", + "splice_region_variant", + "incomplete_terminal_codon_variant", + "start_retained", + "stop_retained", + "synonymous_variant", + "coding_sequence_variant", + "mature_miRNA_variant", + "5_prime_UTR_premature_start_codon_gain_variant", + "5_prime_UTR_variant", + "3_prime_UTR_variant", + "non_coding_transcript_exon_variant", + "non_coding_exon_variant", + "non_coding_transcript_variant", + "nc_transcript_variant", + "upstream_gene_variant", + "downstream_gene_variant", + "TFBS_ablation", + "TFBS_amplification", + "TF_binding_site_variant", + "regulatory_region_ablation", + "enhancer_ablation", + "regulatory_region_amplification", + "enhancer_amplification", + "regulatory_region_variant", + "feature_elongation", + "regulatory_region", + "TFBS", + "feature_truncation", + "exon_variant", + "disruptive_inframe_deletion", + "gene_variant", + "variant_affecting_coding_sequence_conservation", + "variant_affecting_genome_assembly_quality", + "variant_of_unknown_significance", + "sequence_variant", + "rare_amino_acid_variant", + "splice_region_variant", + "downstream_gene_variant", + "upstream_gene_variant", + "intron_variant", + "intergenic_variant", +] + + +async def run_variant_recoder(missing_hgvs: Sequence[str]) -> dict[str, list[str]]: + """Call the Variant Recoder API and return a mapping from input HGVS strings to genomic HGVS strings. + + Args: + missing_hgvs (Sequence[str]): List of HGVS strings to recode. + + Returns: + dict[str, list[str]]: Mapping of input HGVS to list of genomic HGVS strings (hgvsg). + + Raises: + VEPProcessingError: If the API request fails. + """ + headers = {"Content-Type": "application/json", "Accept": "application/json"} + # request_with_backoff is synchronous (requests lib + time.sleep backoff); run_in_executor + # keeps the event loop free during the full request + any retry wait time. + loop = asyncio.get_running_loop() + response = await loop.run_in_executor( + None, + functools.partial( + request_with_backoff, + method="POST", + url=f"{ENSEMBL_API_URL}/variant_recoder/human", + headers=headers, + json={"ids": list(missing_hgvs)}, + ), + ) + hgvs_to_genomic: dict[str, list[str]] = {} + # request_with_backoff handles http errors, so no need to check response status + data = response.json() + for entry in data: + hgvs_string = entry.get("input") + if not hgvs_string: + continue + genomic_hgvs_list = [] + for variant, variant_data in entry.items(): + if variant == "input": + continue + genomic_strings = variant_data.get("hgvsg") if isinstance(variant_data, dict) else None + if genomic_strings: + for genomic_hgvs in genomic_strings: + if genomic_hgvs.startswith("NC_"): + genomic_hgvs_list.append(genomic_hgvs) + if genomic_hgvs_list: + hgvs_to_genomic[hgvs_string] = genomic_hgvs_list + + return hgvs_to_genomic + + +async def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optional[str]]: + """Get VEP functional consequences for a batch of HGVS strings. + + Submits HGVS strings to the Ensembl VEP API and retrieves functional consequence + predictions. For any HGVS strings not found in the initial VEP response, attempts + to recode them using Variant Recoder and retries with VEP. + + Args: + hgvs_strings (Sequence[str]): List of HGVS strings to process (max 200 per call). + + Returns: + dict[str, Optional[str]]: Mapping of HGVS string to functional consequence. + If no consequence found, maps to None. + + Raises: + VEPProcessingError: If VEP API processing fails critically. + """ + if len(hgvs_strings) > 200: + raise ValueError( + "VEP API can process a maximum of 200 HGVS strings per request. This function does not handle batching." + ) + + headers = {"Content-Type": "application/json", "Accept": "application/json"} + result: dict[str, Optional[str]] = {} + + # request_with_backoff is synchronous (requests lib + time.sleep backoff); run_in_executor + # keeps the event loop free during the full request + any retry wait time. + loop = asyncio.get_running_loop() + response = await loop.run_in_executor( + None, + functools.partial( + request_with_backoff, + method="POST", + url=f"{ENSEMBL_API_URL}/vep/human/hgvs", + headers=headers, + json={"hgvs_notations": list(hgvs_strings)}, + ), + ) + + # request_with_backoff handles http errors, so no need to check response status + data = response.json() + for entry in data: + hgvs = entry.get("input") + most_severe_consequence = entry.get("most_severe_consequence") + if hgvs: + result[hgvs] = most_severe_consequence + + return result diff --git a/src/mavedb/lib/workflow/__init__.py b/src/mavedb/lib/workflow/__init__.py new file mode 100644 index 000000000..65be13860 --- /dev/null +++ b/src/mavedb/lib/workflow/__init__.py @@ -0,0 +1,9 @@ +from .definitions import PIPELINE_DEFINITIONS +from .job_factory import JobFactory +from .pipeline_factory import PipelineFactory + +__all__ = [ + "JobFactory", + "PipelineFactory", + "PIPELINE_DEFINITIONS", +] diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py new file mode 100644 index 000000000..2c6fe0c48 --- /dev/null +++ b/src/mavedb/lib/workflow/definitions.py @@ -0,0 +1,164 @@ +from mavedb.lib.types.workflow import JobDefinition, PipelineDefinition +from mavedb.models.enums.job_pipeline import DependencyType, JobType + +# As a general rule, job keys should match function names for clarity. In some cases of +# repeated jobs, a suffix may be added to the key for uniqueness. + + +def annotation_pipeline_job_definitions() -> list[JobDefinition]: + return [ + { + "key": "submit_score_set_mappings_to_car", + "function": "submit_score_set_mappings_to_car", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "warm_clingen_cache", + "function": "warm_clingen_cache", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "link_gnomad_variants", + "function": "link_gnomad_variants", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "submit_uniprot_mapping_jobs_for_score_set", + "function": "submit_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "poll_uniprot_mapping_jobs_for_score_set", + "function": "poll_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "mapping_jobs": {}, # Required param to be filled in at runtime by previous job + }, + "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], + # UniProt ID mapping results are typically ready within seconds to minutes. A 30-second + # retry delay prevents hammering the API while still polling frequently enough to be timely. + "retry_delay_seconds": 30, + }, + # Consolidated ClinVar refresh: a single job iterates all archival versions internally + { + "key": "refresh_clinvar_controls", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "populate_hgvs_for_score_set", + "function": "populate_hgvs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "populate_vep_for_score_set", + "function": "populate_vep_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "populate_variant_translations_for_score_set", + "function": "populate_variant_translations_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], + }, + ] + + +PIPELINE_DEFINITIONS: dict[str, PipelineDefinition] = { + "validate_map_annotate_score_set": { + "description": "Pipeline to validate, map, and annotate variants for a score set.", + "job_definitions": [ + { + "key": "create_variants_for_score_set", + "function": "create_variants_for_score_set", + "type": JobType.VARIANT_CREATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + "scores_file_key": None, # Required param to be filled in at runtime + "counts_file_key": None, # Required param to be filled in at runtime + "score_columns_metadata": None, # Required param to be filled in at runtime + "count_columns_metadata": None, # Required param to be filled in at runtime + }, + "dependencies": [], + }, + { + "key": "map_variants_for_score_set", + "function": "map_variants_for_score_set", + "type": JobType.VARIANT_MAPPING, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("create_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + *annotation_pipeline_job_definitions(), + ], + }, + "map_annotate_score_set": { + "description": "Pipeline to map and annotate variants for a score set (assumes variants are already created).", + "job_definitions": [ + { + "key": "map_variants_for_score_set", + "function": "map_variants_for_score_set", + "type": JobType.VARIANT_MAPPING, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [], + }, + *annotation_pipeline_job_definitions(), + ], + }, + "annotate_score_set": { + "description": "Pipeline to annotate variants for a score set.", + "job_definitions": annotation_pipeline_job_definitions(), + }, + # Add more pipelines here +} diff --git a/src/mavedb/lib/workflow/job_factory.py b/src/mavedb/lib/workflow/job_factory.py new file mode 100644 index 000000000..151cc3b57 --- /dev/null +++ b/src/mavedb/lib/workflow/job_factory.py @@ -0,0 +1,103 @@ +from copy import deepcopy +from typing import Optional + +from sqlalchemy.orm import Session + +from mavedb import __version__ as mavedb_version +from mavedb.lib.types.workflow import JobDefinition +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun + + +class JobFactory: + """ + JobFactory is responsible for creating and persisting JobRun instances based on + provided job definitions and pipeline parameters. + + Attributes: + session (Session): The SQLAlchemy session used for database operations. + + Methods: + create_job_run(job_def: JobDefinition, pipeline_id: Optional[int], user_id: int, correlation_id: str, pipeline_params: dict) -> JobRun:""" + + def __init__(self, session: Session): + self.session = session + + def create_job_run( + self, job_def: JobDefinition, correlation_id: str, pipeline_params: dict, pipeline_id: Optional[int] = None + ) -> JobRun: + """ + Creates and persists a new JobRun instance based on the provided job definition and pipeline parameters. + + Args: + job_def (JobDefinition): The job definition containing job type, function, and parameter template. + pipeline_id (Optional[int]): The ID of the pipeline this job run is associated with. + correlation_id (str): A unique identifier for correlating this job run with external systems or logs. + pipeline_params (dict): A dictionary of parameters to fill in required job parameters and allow for extensibility. + + Returns: + JobRun: The newly created JobRun instance (not yet committed to the database). + + Raises: + ValueError: If any required parameter defined in the job definition is missing from pipeline_params. + """ + job_params = deepcopy(job_def["params"]) + + # Fill in required params from pipeline_params + for key in job_params: + if job_params[key] is None: + if key not in pipeline_params: + raise ValueError(f"Missing required param: {key}") + job_params[key] = pipeline_params[key] + + job_run = JobRun( + job_type=job_def["type"], + job_function=job_def["function"], + job_params=job_params, + pipeline_id=pipeline_id, + mavedb_version=mavedb_version, + correlation_id=correlation_id, + retry_delay_seconds=job_def.get("retry_delay_seconds"), + ) # type: ignore[call-arg] + + self.session.add(job_run) + return job_run + + def create_job_dependency( + self, + parent_job_run_id: int, + child_job_run_id: int, + dependency_type: DependencyType = DependencyType.SUCCESS_REQUIRED, + ) -> JobDependency: + """ + Creates and persists a JobDependency instance linking a parent job run to a child job run. + + Args: + parent_job_run_id (int): The ID of the parent job run. + child_job_run_id (int): The ID of the child job run. + dependency_type (DependencyType): The type of dependency (default is SUCCESS_REQUIRED). + + Returns: + JobDependency: The newly created JobDependency instance (not yet committed to the database). + + Raises: + ValueError: If the parent or child job run IDs do not exist in the database. + """ + + # Validate that the parent and child job runs exist + parent_exists = self.session.query(JobRun.id).filter(JobRun.id == parent_job_run_id).first() is not None + child_exists = self.session.query(JobRun.id).filter(JobRun.id == child_job_run_id).first() is not None + if not parent_exists: + raise ValueError(f"Parent job run ID {parent_job_run_id} does not exist.") + if not child_exists: + raise ValueError(f"Child job run ID {child_job_run_id} does not exist.") + + job_dependency = JobDependency( + id=child_job_run_id, + depends_on_job_id=parent_job_run_id, + dependency_type=dependency_type, + ) # type: ignore[call-arg] + + self.session.add(job_dependency) + return job_dependency diff --git a/src/mavedb/lib/workflow/pipeline_factory.py b/src/mavedb/lib/workflow/pipeline_factory.py new file mode 100644 index 000000000..42ec1e00f --- /dev/null +++ b/src/mavedb/lib/workflow/pipeline_factory.py @@ -0,0 +1,116 @@ +from sqlalchemy.orm import Session + +from mavedb import __version__ as mavedb_version +from mavedb.lib.logging.context import correlation_id_for_context +from mavedb.lib.workflow.definitions import PIPELINE_DEFINITIONS +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.enums.job_pipeline import JobType +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.user import User + + +class PipelineFactory: + """ + PipelineFactory is responsible for creating Pipeline instances and their associated JobRun and JobDependency records in the database. + + Attributes: + session (Session): The SQLAlchemy session used for database operations. + + Methods: + __init__(session: Session): + Initializes the PipelineFactory with a database session. + + create_pipeline( + pipeline_name: str, + pipeline_description: Optional[str], + creating_user: User, + pipeline_params: dict + ) -> Pipeline: + Creates a new Pipeline along with its JobRun and JobDependency records, + commits them to the database, and returns the created Pipeline object. + """ + + def __init__(self, session: Session): + self.session = session + + def create_pipeline( + self, pipeline_name: str, creating_user: User, pipeline_params: dict + ) -> tuple[Pipeline, JobRun]: + """ + Creates a new Pipeline instance along with its associated JobRun and JobDependency records. + + Args: + pipeline_name (str): The name of the pipeline to create. + pipeline_description (Optional[str]): A description for the pipeline. + creating_user (User): The user object representing the user creating the pipeline. + pipeline_params (dict): Additional parameters for pipeline creation, such as correlation_id. + + Returns: + Pipeline: The created Pipeline object. + JobRun: The JobRun object representing the start of the pipeline. + + Raises: + KeyError: If the specified pipeline_name is not found in PIPELINE_DEFINITIONS. + Exception: If there is an error during database operations. + + Side Effects: + - Adds and commits new Pipeline, JobRun, and JobDependency records to the database session. + """ + pipeline_def = PIPELINE_DEFINITIONS[pipeline_name] + jobs = pipeline_def["job_definitions"] + job_runs: dict[str, JobRun] = {} + + correlation_id = pipeline_params.get("correlation_id", correlation_id_for_context()) + + pipeline = Pipeline( + name=pipeline_name, + description=pipeline_def["description"], + correlation_id=correlation_id, + created_by_user_id=creating_user.id, + mavedb_version=mavedb_version, + ) # type: ignore[call-arg] + self.session.add(pipeline) + self.session.flush() # To get pipeline.id + + start_pipeline_job = JobRun( + job_type=JobType.PIPELINE_MANAGEMENT, + job_function="start_pipeline", + job_params={}, + pipeline_id=pipeline.id, + mavedb_version=mavedb_version, + correlation_id=correlation_id, + ) # type: ignore[call-arg] + self.session.add(start_pipeline_job) + self.session.flush() # to get start_pipeline_job.id + + job_factory = JobFactory(self.session) + for job_def in jobs: + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=pipeline.id, + correlation_id=correlation_id, + pipeline_params=pipeline_params, + ) + job_runs[job_def["key"]] = job_run + + self.session.flush() # to get job_run IDs + + for job_def in jobs: + job_deps = job_def["dependencies"] + + job_run = job_runs[job_def["key"]] + for dep_key, dependency_type in job_deps: + dep_job_run = job_runs[dep_key] + + dep_job = JobDependency( + id=job_run.id, + depends_on_job_id=dep_job_run.id, + dependency_type=dependency_type, + ) # type: ignore[call-arg] + + self.session.add(dep_job) + + self.session.commit() + return pipeline, start_pipeline_job diff --git a/src/mavedb/lib/workflow/py.typed b/src/mavedb/lib/workflow/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/models/__init__.py b/src/mavedb/models/__init__.py index 1a20b7924..7e5f31513 100644 --- a/src/mavedb/models/__init__.py +++ b/src/mavedb/models/__init__.py @@ -11,9 +11,12 @@ "experiment_set", "genome_identifier", "gnomad_variant", + "job_dependency", + "job_run", "legacy_keyword", "license", "mapped_variant", + "pipeline", "publication_identifier", "published_variant", "raw_read_identifier", @@ -31,6 +34,7 @@ "uniprot_identifier", "uniprot_offset", "user", + "variant_annotation_status", "variant", "variant_translation", ] diff --git a/src/mavedb/models/enums/__init__.py b/src/mavedb/models/enums/__init__.py index e69de29bb..80c3a7de1 100644 --- a/src/mavedb/models/enums/__init__.py +++ b/src/mavedb/models/enums/__init__.py @@ -0,0 +1,25 @@ +""" +Enums used by MaveDB models. +""" + +from .contribution_role import ContributionRole +from .job_pipeline import AnnotationStatus, DependencyType, FailureCategory, JobStatus, PipelineStatus +from .mapping_state import MappingState +from .processing_state import ProcessingState +from .score_calibration_relation import ScoreCalibrationRelation +from .target_category import TargetCategory +from .user_role import UserRole + +__all__ = [ + "ContributionRole", + "JobStatus", + "PipelineStatus", + "DependencyType", + "FailureCategory", + "AnnotationStatus", + "MappingState", + "ProcessingState", + "ScoreCalibrationRelation", + "TargetCategory", + "UserRole", +] diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py new file mode 100644 index 000000000..b1595347b --- /dev/null +++ b/src/mavedb/models/enums/annotation_type.py @@ -0,0 +1,12 @@ +from enum import Enum + + +class AnnotationType(str, Enum): + VRS_MAPPING = "vrs_mapping" + CLINGEN_ALLELE_ID = "clingen_allele_id" + MAPPED_HGVS = "mapped_hgvs" + VARIANT_TRANSLATION = "variant_translation" + GNOMAD_ALLELE_FREQUENCY = "gnomad_allele_frequency" + CLINVAR_CONTROL = "clinvar_control" + VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" + LDH_SUBMISSION = "ldh_submission" diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py new file mode 100644 index 000000000..80ac05c51 --- /dev/null +++ b/src/mavedb/models/enums/job_pipeline.py @@ -0,0 +1,98 @@ +""" +Job and pipeline related enums. +""" + +from enum import Enum + + +class JobStatus(str, Enum): + """Status of a job execution.""" + + SUCCEEDED = "succeeded" + FAILED = "failed" + ERRORED = "errored" + PENDING = "pending" + QUEUED = "queued" + RUNNING = "running" + CANCELLED = "cancelled" + SKIPPED = "skipped" + + +class PipelineStatus(str, Enum): + """Status of a pipeline execution.""" + + SUCCEEDED = "succeeded" + FAILED = "failed" + CREATED = "created" + RUNNING = "running" + PAUSED = "paused" + CANCELLED = "cancelled" + PARTIAL = "partial" # Pipeline completed with mixed results (some succeeded, some skipped/cancelled) + + +class DependencyType(str, Enum): + """Types of job dependencies.""" + + SUCCESS_REQUIRED = "success_required" # Job only runs if dependency succeeded + COMPLETION_REQUIRED = "completion_required" # Job runs if dependency completed (success OR failure) + + +class FailureCategory(str, Enum): + """Categories of job failures for better classification and handling.""" + + # System-level failures + SYSTEM_ERROR = "system_error" + TIMEOUT = "timeout" + CONFIGURATION_ERROR = "configuration_error" + DEPENDENCY_FAILURE = "dependency_failure" + + # Data and validation failures + VALIDATION_ERROR = "validation_error" + DATA_ERROR = "data_error" + + # External service failures + NETWORK_ERROR = "network_error" + SERVICE_UNAVAILABLE = "service_unavailable" + + # Variant processing specific + VRS_MAPPING_FAILED = "vrs_mapping_failed" + + # Catch-all + UNKNOWN = "unknown" + + +class AnnotationStatus(str, Enum): + """Status of individual variant annotations.""" + + SUCCESS = "success" + FAILED = "failed" + SKIPPED = "skipped" + + +class AnnotationFailureCategory(str, Enum): + """Categories of annotation-level failures on individual variants. + + These describe WHY a specific variant's annotation failed or was skipped, + as opposed to job-level FailureCategory which describes why an entire job failed. + """ + + MISSING_IDENTIFIER = "missing_identifier" # Required identifier (e.g. ClinGen allele ID) not present on variant + UNSUPPORTED_IDENTIFIER = "unsupported_identifier" # Identifier exists but is in an unsupported format (multi-variant, unrecognized prefix) + EXTERNAL_API_ERROR = "external_api_error" # External service call failed (network error, timeout, auth, rate limit) + EXTERNAL_SERVICE_REJECTED = "external_service_rejected" # External service was reachable but explicitly rejected our submission (e.g. CAR returned InvalidHGVS) + EXTERNAL_REFERENCE_NOT_FOUND = ( + "external_reference_not_found" # Lookup succeeded but external resource doesn't exist + ) + NO_LINKED_ALLELE = "no_linked_allele" # No linked allele found in external registry (ClinVar, CA/PA translations) + UNKNOWN = "unknown" # Catch-all for uncategorized failures + + +class JobType(str, Enum): + """Types of jobs in the pipeline.""" + + VARIANT_CREATION = "variant_creation" + VARIANT_MAPPING = "variant_mapping" + MAPPED_VARIANT_ANNOTATION = "mapped_variant_annotation" + PIPELINE_MANAGEMENT = "pipeline_management" + DATA_MANAGEMENT = "data_management" + SYSTEM_MAINTENANCE = "system_maintenance" diff --git a/src/mavedb/models/job_dependency.py b/src/mavedb/models/job_dependency.py new file mode 100644 index 000000000..ac851c7d7 --- /dev/null +++ b/src/mavedb/models/job_dependency.py @@ -0,0 +1,65 @@ +""" +SQLAlchemy models for job dependencies. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums import DependencyType + +if TYPE_CHECKING: + from mavedb.models.job_run import JobRun + + +class JobDependency(Base): + """ + Defines dependencies between jobs within a pipeline. + + This table maps jobs to their pipeline and defines execution order. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. + """ + + __tablename__ = "job_dependencies" + + # The job being defined (references job_runs.id). Composite primary key with the dependency we are defining. + id: Mapped[int] = mapped_column(Integer, ForeignKey("job_runs.id", ondelete="CASCADE"), primary_key=True) + depends_on_job_id: Mapped[int] = mapped_column( + Integer, ForeignKey("job_runs.id", ondelete="CASCADE"), nullable=False, primary_key=True + ) + + # Type of dependency + dependency_type: Mapped[Optional[DependencyType]] = mapped_column(String(50), nullable=False) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + + # Flexible metadata + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column( + "metadata", MutableDict.as_mutable(JSONB), nullable=True + ) + + # Relationships + job_run: Mapped["JobRun"] = relationship("JobRun", back_populates="job_dependencies", foreign_keys=[id]) + depends_on_job: Mapped["JobRun"] = relationship("JobRun", foreign_keys=[depends_on_job_id], remote_side="JobRun.id") + + # Indexes + __table_args__ = ( + Index("ix_job_dependencies_depends_on_job_id", "depends_on_job_id"), + Index("ix_job_dependencies_created_at", "created_at"), + CheckConstraint( + "dependency_type IS NULL OR dependency_type IN ('success_required', 'completion_required')", + name="ck_job_dependencies_type_valid", + ), + ) + + def __repr__(self) -> str: + return f"" diff --git a/src/mavedb/models/job_run.py b/src/mavedb/models/job_run.py new file mode 100644 index 000000000..877eeab02 --- /dev/null +++ b/src/mavedb/models/job_run.py @@ -0,0 +1,110 @@ +""" +SQLAlchemy models for job runs. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.lib.urns import generate_job_run_urn +from mavedb.models.enums import JobStatus + +if TYPE_CHECKING: + from mavedb.models.job_dependency import JobDependency + from mavedb.models.pipeline import Pipeline + + +class JobRun(Base): + """ + Represents a single execution of a job. + + Jobs can be retried, so there may be multiple JobRun records for the same logical job. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. + """ + + __tablename__ = "job_runs" + + # Primary identification + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + urn: Mapped[str] = mapped_column(String(255), nullable=True, unique=True, default=generate_job_run_urn) + + # Job definition + job_type: Mapped[str] = mapped_column(String(100), nullable=False) + job_function: Mapped[str] = mapped_column(String(255), nullable=False) + job_params: Mapped[Optional[Dict[str, Any]]] = mapped_column(MutableDict.as_mutable(JSONB), nullable=True) + + # Execution tracking + status: Mapped[JobStatus] = mapped_column(String(50), nullable=False, default=JobStatus.PENDING) + + # Pipeline association + pipeline_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("pipelines.id", ondelete="SET NULL"), nullable=True + ) + + # Scheduling + max_retries: Mapped[int] = mapped_column(Integer, nullable=False, default=3) + retry_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + retry_delay_seconds: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + + # Timing + scheduled_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + + # Error handling + error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + error_traceback: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + failure_category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + + # Progress tracking + progress_current: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + progress_total: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + progress_message: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) + + # Correlation for tracing + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + + # Flexible metadata + metadata_: Mapped[Dict[str, Any]] = mapped_column( + "metadata", MutableDict.as_mutable(JSONB), nullable=False, server_default="{}" + ) + + # Version tracking + mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + + # Relationships + job_dependencies: Mapped[list["JobDependency"]] = relationship( + "JobDependency", back_populates="job_run", uselist=True, foreign_keys="[JobDependency.id]" + ) + pipeline: Mapped[Optional["Pipeline"]] = relationship( + "Pipeline", back_populates="job_runs", foreign_keys="[JobRun.pipeline_id]" + ) + + # Indexes + __table_args__ = ( + Index("ix_job_runs_status", "status"), + Index("ix_job_runs_job_type", "job_type"), + Index("ix_job_runs_pipeline_id", "pipeline_id"), + Index("ix_job_runs_scheduled_at", "scheduled_at"), + Index("ix_job_runs_created_at", "created_at"), + Index("ix_job_runs_correlation_id", "correlation_id"), + Index("ix_job_runs_status_scheduled", "status", "scheduled_at"), + CheckConstraint( + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'errored', 'cancelled', 'skipped')", + name="ck_job_runs_status_valid", + ), + CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), + CheckConstraint("retry_count >= 0", name="ck_job_runs_retry_count_positive"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/src/mavedb/models/pipeline.py b/src/mavedb/models/pipeline.py new file mode 100644 index 000000000..717ec24cb --- /dev/null +++ b/src/mavedb/models/pipeline.py @@ -0,0 +1,89 @@ +""" +SQLAlchemy models for job pipelines. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.lib.urns import generate_pipeline_urn +from mavedb.models.enums import PipelineStatus +from mavedb.models.job_run import JobRun + +if TYPE_CHECKING: + from mavedb.models.user import User + + +class Pipeline(Base): + """ + Represents a high-level workflow that groups related jobs. + + Examples: + - Processing a score set upload + - Batch re-annotation of variants + - Database migration workflows + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. + """ + + __tablename__ = "pipelines" + + # Primary identification + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + urn: Mapped[str] = mapped_column(String(255), nullable=True, unique=True, default=generate_pipeline_urn) + name: Mapped[str] = mapped_column(String(500), nullable=False) + description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + + # Status and lifecycle + status: Mapped[PipelineStatus] = mapped_column(String(50), nullable=False, default=PipelineStatus.CREATED) + + # Correlation for end-to-end tracing + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + + # Flexible metadata storage + metadata_: Mapped[Dict[str, Any]] = mapped_column( + "metadata", + MutableDict.as_mutable(JSONB), + nullable=False, + comment="Flexible metadata storage for pipeline-specific data", + server_default="{}", + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + + # User tracking + created_by_user_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("users.id", ondelete="SET NULL"), nullable=True + ) + + # Version tracking + mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + + # Relationships + job_runs: Mapped[List["JobRun"]] = relationship("JobRun", back_populates="pipeline", cascade="all, delete-orphan") + created_by_user: Mapped[Optional["User"]] = relationship("User", foreign_keys=[created_by_user_id]) + + # Indexes + __table_args__ = ( + Index("ix_pipelines_status", "status"), + Index("ix_pipelines_created_at", "created_at"), + Index("ix_pipelines_correlation_id", "correlation_id"), + Index("ix_pipelines_created_by_user_id", "created_by_user_id"), + CheckConstraint( + "status IN ('created', 'running', 'succeeded', 'failed', 'cancelled', 'paused', 'partial')", + name="ck_pipelines_status_valid", + ), + ) + + def __repr__(self) -> str: + return f"" diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py new file mode 100644 index 000000000..f39c47a64 --- /dev/null +++ b/src/mavedb/models/variant_annotation_status.py @@ -0,0 +1,121 @@ +""" +SQLAlchemy models for variant annotation status. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus + +if TYPE_CHECKING: + from mavedb.models.job_run import JobRun + from mavedb.models.variant import Variant + + +class VariantAnnotationStatus(Base): + """ + Tracks annotation status for individual variants. + + Allows us to see which variants failed annotation and why. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. + """ + + __tablename__ = "variant_annotation_status" + + # Primary key + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + variant_id: Mapped[int] = mapped_column(Integer, ForeignKey("variants.id", ondelete="CASCADE"), nullable=False) + annotation_type: Mapped[str] = mapped_column( + String(50), nullable=False, comment="Type of annotation: vrs, clinvar, gnomad, etc." + ) + + # Source version + version: Mapped[Optional[str]] = mapped_column( + String(50), nullable=True, comment="Version of the annotation source used (if applicable)" + ) + + # Status tracking + status: Mapped[AnnotationStatus] = mapped_column(String(50), nullable=False, comment="success, failed, skipped") + + # Error information + error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + failure_category: Mapped[Optional[AnnotationFailureCategory]] = mapped_column(String(100), nullable=True) + + # Annotation metadata (flexible JSONB for annotation results) + annotation_metadata: Mapped[Optional[Dict[str, Any]]] = mapped_column( + MutableDict.as_mutable(JSONB), nullable=True, comment="Structured metadata for the annotation result" + ) + + # Current flag + current: Mapped[bool] = mapped_column( + nullable=False, + server_default="true", + comment="Whether this is the current status for the variant and annotation type", + ) + + # Job tracking + job_run_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("job_runs.id", ondelete="SET NULL"), nullable=True + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now() + ) + + # Relationships + variant: Mapped["Variant"] = relationship("Variant") + job_run: Mapped[Optional["JobRun"]] = relationship("JobRun") + + # Indexes + __table_args__ = ( + # Indexes should be kept minimal to reduce write overhead on this large, append-only table. + # The 'current' flag is included in the index to optimize queries that filter for current=True, + # which is the common case when looking up annotation status for a variant. + Index( + "ix_variant_annotation_status_variant_type_version_current", + "variant_id", + "annotation_type", + "version", + "current", + ), + # FK index for job_run_id — needed for CASCADE deletes on job_runs + Index("ix_variant_annotation_status_job_run_id", "job_run_id"), + CheckConstraint( + "annotation_type IN ('vrs_mapping', 'clingen_allele_id', 'mapped_hgvs', 'variant_translation', 'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', 'ldh_submission')", + name="ck_variant_annotation_type_valid", + ), + CheckConstraint( + "status IN ('success', 'failed', 'skipped')", + name="ck_variant_annotation_status_valid", + ), + CheckConstraint( + "failure_category IS NULL OR failure_category IN ('missing_identifier', 'unsupported_identifier', 'external_api_error', 'external_service_rejected', 'external_reference_not_found', 'no_linked_allele', 'unknown')", + name="ck_variant_annotation_failure_category_valid", + ), + ## Although un-enforced at the DB level, we should ensure only one 'current' record per (variant_id, annotation_type, version) + ) + + def __repr__(self) -> str: + return ( + f"" + ) diff --git a/src/mavedb/routers/job_runs.py b/src/mavedb/routers/job_runs.py new file mode 100644 index 000000000..4026c98cc --- /dev/null +++ b/src/mavedb/routers/job_runs.py @@ -0,0 +1,109 @@ +"""Admin-only observability endpoints for job run inspection. + +These endpoints expose job run status, progress, and error details to operators +for diagnosing stuck or failing jobs. Permissions are currently admin-only; +finer-grained access checks can be added later when user-facing UI consumes +this data. +""" + +import logging +from datetime import datetime +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb import deps +from mavedb.lib.authorization import RoleRequirer +from mavedb.lib.logging import LoggedRoute +from mavedb.lib.logging.context import logging_context, save_to_logging_context +from mavedb.lib.types.authentication import UserData +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.enums.user_role import UserRole +from mavedb.models.job_run import JobRun +from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES, ROUTER_BASE_PREFIX +from mavedb.view_models import job_run as job_run_view + +TAG_NAME = "Job Runs" + +router = APIRouter( + prefix=f"{ROUTER_BASE_PREFIX}/job-runs", + tags=[TAG_NAME], + responses={**PUBLIC_ERROR_RESPONSES}, + route_class=LoggedRoute, +) + +metadata = { + "name": TAG_NAME, + "description": "Operator observability for background job executions.", +} + +logger = logging.getLogger(__name__) + + +@router.get( + "/", + status_code=200, + response_model=list[job_run_view.SavedJobRun], + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="List job runs", +) +def list_job_runs( + *, + db: Session = Depends(deps.get_db), + _: UserData = Depends(RoleRequirer([UserRole.admin])), + status: Optional[JobStatus] = Query(None, description="Filter by job run status."), + job_type: Optional[str] = Query(None, description="Filter by job type."), + job_function: Optional[str] = Query(None, description="Filter by job function name."), + correlation_id: Optional[str] = Query(None, description="Filter by correlation id."), + pipeline_id: Optional[int] = Query(None, description="Filter by parent pipeline id."), + created_after: Optional[datetime] = Query(None, description="Only return job runs created at or after this time."), + created_before: Optional[datetime] = Query( + None, description="Only return job runs created at or before this time." + ), + limit: int = Query(50, ge=1, le=500), + offset: int = Query(0, ge=0), +) -> list[JobRun]: + """List job runs with optional filters. Admin only.""" + query = select(JobRun) + if status is not None: + query = query.where(JobRun.status == status) + if job_type is not None: + query = query.where(JobRun.job_type == job_type) + if job_function is not None: + query = query.where(JobRun.job_function == job_function) + if correlation_id is not None: + query = query.where(JobRun.correlation_id == correlation_id) + if pipeline_id is not None: + query = query.where(JobRun.pipeline_id == pipeline_id) + if created_after is not None: + query = query.where(JobRun.created_at >= created_after) + if created_before is not None: + query = query.where(JobRun.created_at <= created_before) + + query = query.order_by(JobRun.created_at.desc()).limit(limit).offset(offset) + return list(db.scalars(query).all()) + + +@router.get( + "/{urn}", + status_code=200, + response_model=job_run_view.JobRunDetail, + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="Show job run with full error details", +) +def show_job_run( + *, + urn: str, + db: Session = Depends(deps.get_db), + _: UserData = Depends(RoleRequirer([UserRole.admin])), +) -> JobRun: + """Fetch a single job run by URN, including error traceback. Admin only.""" + save_to_logging_context({"requested_job_run_urn": urn}) + job_run = db.scalars(select(JobRun).where(JobRun.urn == urn)).one_or_none() + if job_run is None: + logger.warning(msg="Could not show job run; job run does not exist.", extra=logging_context()) + raise HTTPException(status_code=404, detail=f"job run with URN {urn} not found") + + return job_run diff --git a/src/mavedb/routers/pipelines.py b/src/mavedb/routers/pipelines.py new file mode 100644 index 000000000..d968537e6 --- /dev/null +++ b/src/mavedb/routers/pipelines.py @@ -0,0 +1,121 @@ +"""Admin-only observability endpoints for pipeline inspection. + +These endpoints expose pipeline status, progress, and listings to operators so +they can diagnose stuck or failing pipelines without direct database access. +Permissions are currently admin-only; finer-grained access checks can be added +later when user-facing UI consumes this data. +""" + +import logging +from datetime import datetime +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb import deps +from mavedb.lib.authorization import RoleRequirer +from mavedb.lib.logging import LoggedRoute +from mavedb.lib.logging.context import logging_context, save_to_logging_context +from mavedb.lib.types.authentication import UserData +from mavedb.models.enums.job_pipeline import PipelineStatus +from mavedb.models.enums.user_role import UserRole +from mavedb.models.pipeline import Pipeline +from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES, ROUTER_BASE_PREFIX +from mavedb.view_models import pipeline as pipeline_view +from mavedb.worker.lib.managers.exceptions import DatabaseConnectionError, PipelineStateError +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + +TAG_NAME = "Pipelines" + +router = APIRouter( + prefix=f"{ROUTER_BASE_PREFIX}/pipelines", + tags=[TAG_NAME], + responses={**PUBLIC_ERROR_RESPONSES}, + route_class=LoggedRoute, +) + +metadata = { + "name": TAG_NAME, + "description": "Operator observability for background pipeline executions.", +} + +logger = logging.getLogger(__name__) + + +@router.get( + "/", + status_code=200, + response_model=list[pipeline_view.SavedPipeline], + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="List pipelines", +) +def list_pipelines( + *, + db: Session = Depends(deps.get_db), + _: UserData = Depends(RoleRequirer([UserRole.admin])), + status: Optional[PipelineStatus] = Query(None, description="Filter by pipeline status."), + name: Optional[str] = Query(None, description="Filter by pipeline name (exact match)."), + correlation_id: Optional[str] = Query(None, description="Filter by correlation id."), + created_by_user_id: Optional[int] = Query(None, description="Filter by creating user id."), + created_after: Optional[datetime] = Query(None, description="Only return pipelines created at or after this time."), + created_before: Optional[datetime] = Query( + None, description="Only return pipelines created at or before this time." + ), + limit: int = Query(50, ge=1, le=500), + offset: int = Query(0, ge=0), +) -> list[Pipeline]: + """List pipelines with optional filters. Admin only.""" + query = select(Pipeline) + if status is not None: + query = query.where(Pipeline.status == status) + if name is not None: + query = query.where(Pipeline.name == name) + if correlation_id is not None: + query = query.where(Pipeline.correlation_id == correlation_id) + if created_by_user_id is not None: + query = query.where(Pipeline.created_by_user_id == created_by_user_id) + if created_after is not None: + query = query.where(Pipeline.created_at >= created_after) + if created_before is not None: + query = query.where(Pipeline.created_at <= created_before) + + query = query.order_by(Pipeline.created_at.desc()).limit(limit).offset(offset) + return list(db.scalars(query).all()) + + +@router.get( + "/{urn}", + status_code=200, + response_model=pipeline_view.PipelineDetail, + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="Show pipeline with progress", +) +def show_pipeline( + *, + urn: str, + db: Session = Depends(deps.get_db), + _: UserData = Depends(RoleRequirer([UserRole.admin])), +) -> pipeline_view.PipelineDetail: + """Fetch a single pipeline by URN including job progress statistics. Admin only.""" + save_to_logging_context({"requested_pipeline_urn": urn}) + pipeline = db.scalars(select(Pipeline).where(Pipeline.urn == urn)).one_or_none() + if pipeline is None: + logger.warning(msg="Could not show pipeline; pipeline does not exist.", extra=logging_context()) + raise HTTPException(status_code=404, detail=f"pipeline with URN {urn} not found") + + # PipelineManager is reused here rather than duplicating progress aggregation logic. + # Redis is not required for read-only progress aggregation, so None is acceptable if somewhat hacky. + manager = PipelineManager(db=db, redis=None, pipeline_id=pipeline.id) # type: ignore[arg-type] + try: + progress = manager.get_pipeline_progress() + except (DatabaseConnectionError, PipelineStateError) as exc: + logger.exception(msg="Failed to compute pipeline progress.", extra=logging_context()) + raise HTTPException(status_code=500, detail=str(exc)) + + saved = pipeline_view.SavedPipeline.model_validate(pipeline) + return pipeline_view.PipelineDetail( + **saved.model_dump(by_alias=False), + progress=pipeline_view.PipelineProgress(**progress), + ) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 47532cd31..b2076cc4a 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1,3 +1,4 @@ +import io import json import logging import time @@ -20,6 +21,7 @@ from sqlalchemy.orm import Session, contains_eager from mavedb import deps +from mavedb.data_providers.services import CSV_UPLOAD_S3_BUCKET_NAME, s3_client from mavedb.lib.annotation.annotate import ( variant_functional_impact_statement, variant_pathogenicity_evidence, @@ -66,6 +68,7 @@ generate_experiment_urn, generate_score_set_urn, ) +from mavedb.lib.workflow.pipeline_factory import PipelineFactory from mavedb.models.clinical_control import ClinicalControl from mavedb.models.contributor import Contributor from mavedb.models.enums.processing_state import ProcessingState @@ -94,6 +97,7 @@ from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata from mavedb.view_models.search import ScoreSetsSearch, ScoreSetsSearchFilterOptionsResponse, ScoreSetsSearchResponse from mavedb.view_models.target_gene import TargetGeneCreate +from mavedb.worker.lib.managers.utils import arq_job_id TAG_NAME = "Score Sets" logger = logging.getLogger(__name__) @@ -111,6 +115,7 @@ async def enqueue_variant_creation( new_score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, new_count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, worker: ArqRedis, + db: Session, ) -> None: assert item.dataset_columns is not None @@ -136,25 +141,84 @@ async def enqueue_variant_creation( variants_to_csv_rows(item.variants, columns=count_columns, namespaced=False) ).replace("NA", np.NaN) - # Await the insertion of this job into the worker queue, not the job itself. - # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. - job = await worker.enqueue_job( - "create_variants_for_score_set", - correlation_id_for_context(), - item.id, - user_data.user.id, - existing_scores_df if new_scores_df is None else new_scores_df, - existing_counts_df if new_counts_df is None else new_counts_df, - item.dataset_columns.get("score_columns_metadata") - if new_score_columns_metadata is None - else new_score_columns_metadata, - item.dataset_columns.get("count_columns_metadata") - if new_count_columns_metadata is None - else new_count_columns_metadata, - ) - if job is not None: - save_to_logging_context({"worker_job_id": job.job_id}) - logger.info(msg="Enqueued variant creation job.", extra=logging_context()) + scores_file_to_upload = existing_scores_df if new_scores_df is None else new_scores_df + counts_file_to_upload = existing_counts_df if new_counts_df is None else new_counts_df + + scores_file_key = None + counts_file_key = None + if scores_file_to_upload is not None or counts_file_to_upload is not None: + timestamp = date.today().isoformat() + unique_id = str(int(time.time() * 1000)) + user_id = user_data.user.id + score_set_id = item.id + + s3 = s3_client() + + if scores_file_to_upload is not None: + save_to_logging_context({"num_scores": len(scores_file_to_upload)}) + scores_file_key = f"{score_set_id}/{user_id}/{timestamp}-{unique_id}-scores.csv" + s3.upload_fileobj( + Fileobj=io.BytesIO(scores_file_to_upload.to_csv(index=False).encode("utf-8")), + Bucket=CSV_UPLOAD_S3_BUCKET_NAME, + Key=scores_file_key, + ) + + if counts_file_to_upload is not None: + save_to_logging_context({"num_counts": len(counts_file_to_upload)}) + counts_file_key = f"{score_set_id}/{user_id}/{timestamp}-{unique_id}-counts.csv" + s3.upload_fileobj( + Fileobj=io.BytesIO(counts_file_to_upload.to_csv(index=False).encode("utf-8")), + Bucket=CSV_UPLOAD_S3_BUCKET_NAME, + Key=counts_file_key, + ) + + try: + pipeline_factory = PipelineFactory(session=db) + pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( + pipeline_name="validate_map_annotate_score_set", + creating_user=user_data.user, + pipeline_params={ + "correlation_id": correlation_id_for_context(), + "score_set_id": item.id, + "updater_id": user_data.user.id, + "scores_file_key": scores_file_key, + "counts_file_key": counts_file_key, + "score_columns_metadata": item.dataset_columns.get("score_columns_metadata") + if new_score_columns_metadata is None + else new_score_columns_metadata, + "count_columns_metadata": item.dataset_columns.get("count_columns_metadata") + if new_count_columns_metadata is None + else new_count_columns_metadata, + }, + ) + + # Await the insertion of this job into the worker queue, not the job itself. + # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. + job = await worker.enqueue_job( + pipeline_entrypoint.job_function, pipeline_entrypoint.id, _job_id=arq_job_id(pipeline_entrypoint) + ) + if job is not None: + save_to_logging_context({"worker_job_id": job.job_id}) + logger.info( + msg="Enqueued validate_map_annotate_score_set pipeline (job_id: {}).".format(job.job_id), + extra=logging_context(), + ) + except Exception: + # Clean up any S3 files uploaded during this call to avoid orphaned objects when the + # pipeline could not be created or enqueued. + keys_to_delete = [k for k in [scores_file_key, counts_file_key] if k is not None] + if keys_to_delete: + try: + s3_client().delete_objects( + Bucket=CSV_UPLOAD_S3_BUCKET_NAME, + Delete={"Objects": [{"Key": k} for k in keys_to_delete]}, + ) + except Exception: + logger.error( + msg="Failed to clean up orphaned S3 files after pipeline enqueue failure.", + extra=logging_context(), + ) + raise class ScoreSetUpdateResult(TypedDict): @@ -1875,15 +1939,33 @@ async def upload_score_set_variant_data( logger.info(msg="Enqueuing variant creation job.", extra=logging_context()) - await enqueue_variant_creation( - item=item, - user_data=user_data, - new_scores_df=score_set_variants_data["scores_df"], - new_counts_df=score_set_variants_data["counts_df"], - new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata", {}), - new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata", {}), - worker=worker, - ) + try: + await enqueue_variant_creation( + item=item, + user_data=user_data, + new_scores_df=score_set_variants_data["scores_df"], + new_counts_df=score_set_variants_data["counts_df"], + new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata", {}), + new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata", {}), + worker=worker, + db=db, + ) + except Exception: + logger.error( + msg="Failed to enqueue variant creation pipeline; resetting score set processing state.", + extra=logging_context(), + ) + try: + db.rollback() + item.processing_state = ProcessingState.failed + db.add(item) + db.commit() + except Exception: + logger.error( + msg="Failed to reset score set processing state after pipeline enqueue failure.", + extra=logging_context(), + ) + raise HTTPException(status_code=500, detail="Failed to enqueue variant processing pipeline.") db.add(item) db.commit() @@ -2037,19 +2119,37 @@ async def update_score_set_with_variants( updatedItem.processing_state = ProcessingState.processing logger.info(msg="Enqueuing variant creation job.", extra=logging_context()) - await enqueue_variant_creation( - item=updatedItem, - user_data=user_data, - worker=worker, - new_scores_df=score_set_variants_data["scores_df"], - new_counts_df=score_set_variants_data["counts_df"], - new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata") - if did_score_columns_metadata_change - else existing_score_columns_metadata, - new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata") - if did_count_columns_metadata_change - else existing_count_columns_metadata, - ) + try: + await enqueue_variant_creation( + item=updatedItem, + user_data=user_data, + worker=worker, + new_scores_df=score_set_variants_data["scores_df"], + new_counts_df=score_set_variants_data["counts_df"], + new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata") + if did_score_columns_metadata_change + else existing_score_columns_metadata, + new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata") + if did_count_columns_metadata_change + else existing_count_columns_metadata, + db=db, + ) + except Exception: + logger.error( + msg="Failed to enqueue variant creation pipeline; resetting score set processing state.", + extra=logging_context(), + ) + try: + db.rollback() + updatedItem.processing_state = ProcessingState.failed + db.add(updatedItem) + db.commit() + except Exception: + logger.error( + msg="Failed to reset score set processing state after pipeline enqueue failure.", + extra=logging_context(), + ) + raise HTTPException(status_code=500, detail="Failed to enqueue variant processing pipeline.") db.add(updatedItem) db.commit() @@ -2096,7 +2196,29 @@ async def update_score_set( updatedItem.processing_state = ProcessingState.processing logger.info(msg="Enqueuing variant creation job.", extra=logging_context()) - await enqueue_variant_creation(item=updatedItem, user_data=user_data, worker=worker) + try: + await enqueue_variant_creation( + item=updatedItem, + user_data=user_data, + worker=worker, + db=db, + ) + except Exception: + logger.error( + msg="Failed to enqueue variant creation pipeline; resetting score set processing state.", + extra=logging_context(), + ) + try: + db.rollback() + updatedItem.processing_state = ProcessingState.failed + db.add(updatedItem) + db.commit() + except Exception: + logger.error( + msg="Failed to reset score set processing state after pipeline enqueue failure.", + extra=logging_context(), + ) + raise HTTPException(status_code=500, detail="Failed to enqueue variant processing pipeline.") db.add(updatedItem) db.commit() diff --git a/src/mavedb/scripts/clingen_car_submission.py b/src/mavedb/scripts/clingen_car_submission.py deleted file mode 100644 index 0c0e7bc4c..000000000 --- a/src/mavedb/scripts/clingen_car_submission.py +++ /dev/null @@ -1,134 +0,0 @@ -import logging -from typing import Sequence - -import click -from sqlalchemy import select -from sqlalchemy.orm import Session - -from mavedb.lib.clingen.constants import CAR_SUBMISSION_ENDPOINT -from mavedb.lib.clingen.services import ClinGenAlleleRegistryService, get_allele_registry_associations -from mavedb.lib.variants import get_hgvs_from_post_mapped -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.scripts.environment import with_database_session - -logger = logging.getLogger(__name__) - - -def submit_urns_to_car(db: Session, urns: Sequence[str], debug: bool) -> list[str]: - if not CAR_SUBMISSION_ENDPOINT: - logger.error("`CAR_SUBMISSION_ENDPOINT` is not set. Please check your configuration.") - return [] - - car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) - submitted_entities = [] - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen CAR.") - urns = urns[:1] - - for idx, urn in enumerate(urns): - logger.info(f"Processing URN: {urn}. (Scoreset {idx + 1}/{len(urns)})") - try: - score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() - if not score_set: - logger.warning(f"No score set found for URN: {urn}") - continue - - logger.info(f"Submitting mapped variants to CAR service for score set with URN: {urn}") - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant, MappedVariant.variant_id == Variant.id) - .join(ScoreSet) - .where(ScoreSet.urn == urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_objects: - logger.warning(f"No mapped variants found for score set with URN: {urn}") - continue - - if debug: - logger.debug(f"Debug mode enabled. Submitting only one variant to ClinGen CAR for URN: {urn}") - variant_objects = variant_objects[:1] - - logger.debug(f"Preparing {len(variant_objects)} mapped variants for CAR submission") - hgvs_to_mapped_variant: dict[str, list[int]] = {} - for variant, mapped_variant in variant_objects: - hgvs = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - if hgvs and hgvs not in hgvs_to_mapped_variant: - hgvs_to_mapped_variant[hgvs] = [mapped_variant.id] - elif hgvs and hgvs in hgvs_to_mapped_variant: - hgvs_to_mapped_variant[hgvs].append(mapped_variant.id) - else: - logger.warning(f"No HGVS string found for mapped variant {variant.urn}") - - if not hgvs_to_mapped_variant: - logger.warning(f"No HGVS strings to submit for URN: {urn}") - continue - - logger.info(f"Submitting {len(hgvs_to_mapped_variant)} HGVS strings to CAR service for URN: {urn}") - response = car_service.dispatch_submissions(list(hgvs_to_mapped_variant.keys())) - - if not response: - logger.error(f"CAR submission failed for URN: {urn}") - else: - logger.info(f"Successfully submitted to CAR for URN: {urn}") - # Associate CAIDs with mapped variants - associations = get_allele_registry_associations(list(hgvs_to_mapped_variant.keys()), response) - for hgvs, caid in associations.items(): - mapped_variant_ids = hgvs_to_mapped_variant.get(hgvs, []) - for mv_id in mapped_variant_ids: - mapped_variant = db.scalar(select(MappedVariant).where(MappedVariant.id == mv_id)) - if not mapped_variant: - logger.warning(f"Mapped variant with ID {mv_id} not found for HGVS {hgvs}.") - continue - - mapped_variant.clingen_allele_id = caid - db.add(mapped_variant) - - submitted_entities.extend([variant.urn for variant, _ in variant_objects]) - - except Exception as e: - logger.error(f"Error processing URN {urn}", exc_info=e) - - return submitted_entities - - -@click.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) -@click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) -@click.option("--debug", help="Enable debug mode. This will send only one request at most to ClinGen CAR", is_flag=True) -def submit_car_urns_command( - db: Session, - urns: Sequence[str], - all: bool, - suppress_output: bool, - debug: bool, -) -> None: - """ - Submit data to ClinGen Allele Registry for mapped variant CAID generation for the given URNs. - """ - if urns and all: - logger.error("Cannot provide both URNs and --all option.") - return - - if all: - urns = db.scalars(select(ScoreSet.urn)).all() # type: ignore - - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - submitted_variant_urns = submit_urns_to_car(db, urns, debug) - - if not suppress_output: - print(", ".join(submitted_variant_urns)) - - -if __name__ == "__main__": - submit_car_urns_command() diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py deleted file mode 100644 index 94f16520b..000000000 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ /dev/null @@ -1,197 +0,0 @@ -import click -import logging -import re -from typing import Optional, Sequence - -from sqlalchemy import and_, select -from sqlalchemy.orm import Session - -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.models.mapped_variant import MappedVariant -from mavedb.scripts.environment import with_database_session -from mavedb.lib.clingen.services import ClinGenLdhService -from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_ENDPOINT -from mavedb.lib.clingen.content_constructors import construct_ldh_submission -from mavedb.lib.variants import get_hgvs_from_post_mapped - -logger = logging.getLogger(__name__) - -intronic_variant_with_reference_regex = re.compile(r":c\..*[+-]") -variant_with_reference_regex = re.compile(r":") - - -def submit_urns_to_clingen( - db: Session, urns: Sequence[str], unlinked_only: bool, prefer_unmapped_hgvs: bool, debug: bool -) -> list[str]: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) - ldh_service.authenticate() - - submitted_entities = [] - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") - urns = urns[:1] - - for idx, urn in enumerate(urns): - logger.info(f"Processing URN: {urn}. (Scoreset {idx + 1}/{len(urns)})") - - try: - score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() - if not score_set: - logger.warning(f"No score set found for URN: {urn}") - continue - - logger.info(f"Submitting mapped variants to LDH service for score set with URN: {urn}") - mapped_variant_join_clause = and_( - MappedVariant.variant_id == Variant.id, - MappedVariant.post_mapped.is_not(None), - MappedVariant.current.is_(True), - ) - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant, mapped_variant_join_clause, isouter=True) - .join(ScoreSet) - .where(ScoreSet.urn == urn) - ).all() - - if not variant_objects: - logger.warning(f"No mapped variants found for score set with URN: {urn}") - continue - - logger.debug(f"Preparing {len(variant_objects)} mapped variants for submission") - - variant_content: list[tuple[str, Variant, Optional[MappedVariant]]] = [] - for variant, mapped_variant in variant_objects: - if mapped_variant is None: - if variant.hgvs_nt is not None and intronic_variant_with_reference_regex.search(variant.hgvs_nt): - # Use the hgvs_nt string for unmapped intronic variants. This is because our mapper does not yet - # support mapping intronic variants. - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for unmapped intronic variant {variant.urn}: {variation}") - elif variant.hgvs_nt is not None and variant_with_reference_regex.search(variant.hgvs_nt): - # Use the hgvs_nt string for other unmapped NT variants in accession-based score sets. - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for unmapped non-intronic variant {variant.urn}: {variation}") - elif variant.hgvs_pro is not None and variant_with_reference_regex.search(variant.hgvs_pro): - # Use the hgvs_pro string for unmapped PRO variants in accession-based score sets. - variation = variant.hgvs_pro - if variation: - logger.info(f"Using hgvs_pro for unmapped non-intronic variant {variant.urn}: {variation}") - else: - logger.warning( - f"No variation found for unmapped variant {variant.urn} (nt: {variant.hgvs_nt}, aa: {variant.hgvs_pro}, splice: {variant.hgvs_splice})." - ) - continue - else: - if unlinked_only and mapped_variant.clingen_allele_id: - continue - # If the script was run with the --prefer-unmapped-hgvs flag, use the hgvs_nt string rather than the - # mapped variant, as long as the variant is accession-based. - if ( - prefer_unmapped_hgvs - and variant.hgvs_nt is not None - and variant_with_reference_regex.search(variant.hgvs_nt) - ): - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for mapped variant {variant.urn}: {variation}") - elif ( - prefer_unmapped_hgvs - and variant.hgvs_pro is not None - and variant_with_reference_regex.search(variant.hgvs_pro) - ): - variation = variant.hgvs_pro - if variation: - logger.info( - f"Using hgvs_pro for mapped variant {variant.urn}: {variation}" - ) # continue # TEMPORARY. Only submit unmapped variants. - else: - variation = get_hgvs_from_post_mapped(mapped_variant) - if variation: - logger.info(f"Using mapped variant for {variant.urn}: {variation}") - - if not variation: - logger.warning( - f"No variation found for mapped variant {variant.urn} (nt: {variant.hgvs_nt}, aa: {variant.hgvs_pro}, splice: {variant.hgvs_splice})." - ) - continue - - variant_content.append((variation, variant, mapped_variant)) - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") - variant_content = variant_content[:1] - - logger.debug(f"Constructing LDH submission for {len(variant_content)} variants") - submission_content = construct_ldh_submission(variant_content) - submission_successes, submission_failures = ldh_service.dispatch_submissions( - submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE - ) - - if submission_failures: - logger.error(f"Failed to submit some variants for URN: {urn}") - else: - logger.info(f"Successfully submitted all variants for URN: {urn}") - - submitted_entities.extend([variant.urn for _, variant, _ in variant_content]) - - except Exception as e: - logger.error(f"Error processing URN {urn}", exc_info=e) - - # TODO#372: non-nullable urns. - return submitted_entities # type: ignore - - -@click.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) -@click.option( - "--unlinked", - default=False, - help="Only submit variants that have not already been linked to ClinGen alleles.", - is_flag=True, -) -@click.option( - "--prefer-unmapped-hgvs", - default=False, - help="If the unmapped HGVS string is accession-based, use it in the submission instead of the mapped variant.", - is_flag=True, -) -@click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) -@click.option("--debug", help="Enable debug mode. This will send only one request at most to ClinGen", is_flag=True) -def submit_clingen_urns_command( - db: Session, - urns: Sequence[str], - all: bool, - unlinked: bool, - prefer_unmapped_hgvs: bool, - suppress_output: bool, - debug: bool, -) -> None: - """ - Submit data to ClinGen for mapped variant allele ID generation for the given URNs. - """ - if urns and all: - logger.error("Cannot provide both URNs and --all option.") - return - - if all: - # TODO#372: non-nullable urns. - urns = db.scalars(select(ScoreSet.urn)).all() # type: ignore - - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - submitted_variant_urns = submit_urns_to_clingen(db, urns, unlinked, prefer_unmapped_hgvs, debug) - - if not suppress_output: - print(", ".join(submitted_variant_urns)) - - -if __name__ == "__main__": - submit_clingen_urns_command() diff --git a/src/mavedb/scripts/environment.py b/src/mavedb/scripts/environment.py index 66bdbb78b..831da7a45 100644 --- a/src/mavedb/scripts/environment.py +++ b/src/mavedb/scripts/environment.py @@ -4,16 +4,14 @@ import enum import logging -import click from functools import wraps - +import asyncclick as click from sqlalchemy.orm import configure_mappers from mavedb import deps from mavedb.models import * # noqa: F403 - logger = logging.getLogger(__name__) diff --git a/src/mavedb/scripts/job_runs.py b/src/mavedb/scripts/job_runs.py new file mode 100644 index 000000000..1ff0fdce7 --- /dev/null +++ b/src/mavedb/scripts/job_runs.py @@ -0,0 +1,176 @@ +"""Operator-facing CLI for inspecting job run state. + +Usage: + # List all recent job runs + poetry run python -m mavedb.scripts.job_runs list-job-runs + + # Filter by status and job type + poetry run python -m mavedb.scripts.job_runs list-job-runs --status failed --job-type variant_mapping + + # Show a single job run with full error details + poetry run python -m mavedb.scripts.job_runs show-job-run urn:mavedb-job: +""" + +import json +import logging +from datetime import datetime +from typing import Optional + +import asyncclick as click +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.scripts.environment import script_environment, with_database_session + +logger = logging.getLogger(__name__) + + +def _format_dt(dt: Optional[datetime]) -> str: + return dt.isoformat() if dt else "-" + + +@script_environment.command(name="list-job-runs") +@with_database_session +@click.option( + "--status", + type=click.Choice([s.value for s in JobStatus]), + default=None, + help="Filter by job run status.", +) +@click.option("--job-type", default=None, help="Filter by job type.") +@click.option("--job-function", default=None, help="Filter by job function name.") +@click.option("--correlation-id", default=None, help="Filter by correlation id.") +@click.option("--pipeline-id", type=int, default=None, help="Filter by parent pipeline id.") +@click.option("--limit", type=int, default=50, show_default=True, help="Maximum rows to return.") +@click.option("--json", "as_json", is_flag=True, help="Emit results as JSON.") +def list_job_runs( + db: Session, + status: Optional[str], + job_type: Optional[str], + job_function: Optional[str], + correlation_id: Optional[str], + pipeline_id: Optional[int], + limit: int, + as_json: bool, +) -> None: + """List job runs with optional filters.""" + query = select(JobRun) + if status: + query = query.where(JobRun.status == status) + if job_type: + query = query.where(JobRun.job_type == job_type) + if job_function: + query = query.where(JobRun.job_function == job_function) + if correlation_id: + query = query.where(JobRun.correlation_id == correlation_id) + if pipeline_id is not None: + query = query.where(JobRun.pipeline_id == pipeline_id) + + query = query.order_by(JobRun.created_at.desc()).limit(limit) + job_runs = db.scalars(query).all() + + if as_json: + rows = [ + { + "id": j.id, + "urn": j.urn, + "status": j.status, + "job_type": j.job_type, + "job_function": j.job_function, + "correlation_id": j.correlation_id, + "pipeline_id": j.pipeline_id, + "retry_count": j.retry_count, + "failure_category": j.failure_category, + "created_at": _format_dt(j.created_at), + "started_at": _format_dt(j.started_at), + "finished_at": _format_dt(j.finished_at), + } + for j in job_runs + ] + click.echo(json.dumps(rows, indent=2)) + return + + if not job_runs: + click.echo("No job runs match the given filters.") + return + + click.echo(f"{'ID':>6} {'STATUS':<10} {'TYPE':<24} {'FUNCTION':<36} " f"{'RETRIES':<8} {'CREATED':<26} URN") + for j in job_runs: + click.echo( + f"{j.id:>6} {str(j.status):<10} {j.job_type[:24]:<24} " + f"{j.job_function[:36]:<36} {j.retry_count:<8} " + f"{_format_dt(j.created_at):<26} {j.urn or '-'}" + ) + + +@script_environment.command(name="show-job-run") +@with_database_session +@click.argument("urn") +@click.option("--json", "as_json", is_flag=True, help="Emit full result as JSON.") +@click.option("--no-traceback", is_flag=True, help="Omit the error traceback from the output.") +def show_job_run(db: Session, urn: str, as_json: bool, no_traceback: bool) -> None: + """Show a single job run including error details.""" + job_run = db.scalars(select(JobRun).where(JobRun.urn == urn)).one_or_none() + if job_run is None: + click.echo(f"Job run not found: {urn}", err=True) + raise SystemExit(1) + + payload = { + "id": job_run.id, + "urn": job_run.urn, + "status": job_run.status, + "job_type": job_run.job_type, + "job_function": job_run.job_function, + "job_params": job_run.job_params, + "correlation_id": job_run.correlation_id, + "pipeline_id": job_run.pipeline_id, + "max_retries": job_run.max_retries, + "retry_count": job_run.retry_count, + "retry_delay_seconds": job_run.retry_delay_seconds, + "scheduled_at": _format_dt(job_run.scheduled_at), + "started_at": _format_dt(job_run.started_at), + "finished_at": _format_dt(job_run.finished_at), + "created_at": _format_dt(job_run.created_at), + "progress_current": job_run.progress_current, + "progress_total": job_run.progress_total, + "progress_message": job_run.progress_message, + "failure_category": job_run.failure_category, + "error_message": job_run.error_message, + "mavedb_version": job_run.mavedb_version, + "metadata": job_run.metadata_, + } + if not no_traceback: + payload["error_traceback"] = job_run.error_traceback + + if as_json: + click.echo(json.dumps(payload, indent=2, default=str)) + return + + click.echo(f"Job Run: {job_run.urn} (id={job_run.id})") + click.echo(f" Status: {job_run.status}") + click.echo(f" Type: {job_run.job_type}") + click.echo(f" Function: {job_run.job_function}") + click.echo(f" Pipeline id: {job_run.pipeline_id}") + click.echo(f" Correlation: {job_run.correlation_id or '-'}") + click.echo(f" Retries: {job_run.retry_count}/{job_run.max_retries}") + click.echo(f" Scheduled: {_format_dt(job_run.scheduled_at)}") + click.echo(f" Started: {_format_dt(job_run.started_at)}") + click.echo(f" Finished: {_format_dt(job_run.finished_at)}") + if job_run.progress_total is not None: + click.echo(f" Progress: {job_run.progress_current or 0}/{job_run.progress_total}") + if job_run.progress_message: + click.echo(f" Progress msg: {job_run.progress_message}") + if job_run.failure_category: + click.echo(f" Failure cat: {job_run.failure_category}") + if job_run.error_message: + click.echo(f" Error message: {job_run.error_message}") + if job_run.error_traceback and not no_traceback: + click.echo(" Error traceback:") + for line in job_run.error_traceback.splitlines(): + click.echo(f" {line}") + + +if __name__ == "__main__": + script_environment() diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py deleted file mode 100644 index 2ca3c0697..000000000 --- a/src/mavedb/scripts/link_clingen_variants.py +++ /dev/null @@ -1,75 +0,0 @@ -import click -import logging -from typing import Sequence - -from sqlalchemy import and_, select -from sqlalchemy.orm import Session - -from mavedb.lib.clingen.services import get_clingen_variation, clingen_allele_id_from_ldh_variation -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.models.mapped_variant import MappedVariant -from mavedb.scripts.environment import with_database_session - -logger = logging.getLogger(__name__) - - -@click.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--score-sets/--variants", default=False) -@click.option("--unlinked", default=False, is_flag=True) -def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool, unlinked: bool) -> None: - """ - Submit data to ClinGen for mapped variant allele ID generation for the given URNs. - """ - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - # Convert score set URNs to variant URNs. - if score_sets: - query = ( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where(MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) - ) - - if unlinked: - query = query.where(MappedVariant.clingen_allele_id.is_(None)) - - variants = [db.scalars(query.where(ScoreSet.urn == urn)).all() for urn in urns] - urns = [variant for sublist in variants for variant in sublist if variant is not None] - - failed_urns = [] - for urn in urns: - ldh_variation = get_clingen_variation(urn) - allele_id = clingen_allele_id_from_ldh_variation(ldh_variation) - - if not allele_id: - failed_urns.append(urn) - continue - - mapped_variant = db.scalar( - select(MappedVariant).join(Variant).where(and_(Variant.urn == urn, MappedVariant.current.is_(True))) - ) - - if not mapped_variant: - logger.warning(f"No mapped variant found for URN {urn}.") - failed_urns.append(urn) - continue - - mapped_variant.clingen_allele_id = allele_id - db.add(mapped_variant) - - logger.info(f"Successfully linked URN {urn} to ClinGen variation {allele_id}.") - - if failed_urns: - logger.warning(f"Failed to link the following {len(failed_urns)} URNs: {', '.join(failed_urns)}") - - logger.info(f"Linking process completed. Linked {len(urns) - len(failed_urns)}/{len(urns)} URNs successfully.") - - -if __name__ == "__main__": - link_clingen_variants() diff --git a/src/mavedb/scripts/link_gnomad_variants.py b/src/mavedb/scripts/link_gnomad_variants.py deleted file mode 100644 index e7f0fa495..000000000 --- a/src/mavedb/scripts/link_gnomad_variants.py +++ /dev/null @@ -1,80 +0,0 @@ -import logging -from typing import Sequence - -import click -from sqlalchemy import select -from sqlalchemy.orm import Session - -from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.models.score_set import ScoreSet -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.variant import Variant -from mavedb.scripts.environment import with_database_session - - -logger = logging.getLogger(__name__) - - -@click.command() -@with_database_session -@click.option( - "--score-set-urn", multiple=True, type=str, help="Score set URN(s) to process. Can be used multiple times." -) -@click.option("--all", "all_score_sets", is_flag=True, help="Process all score sets in the database.", default=False) -@click.option("--only-current", is_flag=True, help="Only process current mapped variants.", default=True) -def link_gnomad_variants(db: Session, score_set_urn: list[str], all_score_sets: bool, only_current: bool) -> None: - """ - Query AWS Athena for gnomAD variants matching mapped variant CAIDs for one or more score sets. - """ - # 1. Collect all CAIDs for mapped variants in the selected score sets - if all_score_sets: - score_sets = db.query(ScoreSet.id).all() - score_set_ids = [s.id for s in score_sets] - else: - if not score_set_urn: - logger.error("No score set URNs specified.") - return - - score_sets = db.query(ScoreSet.id).filter(ScoreSet.urn.in_(score_set_urn)).all() - score_set_ids = [s.id for s in score_sets] - if len(score_set_ids) != len(score_set_urn): - logger.warning("Some provided URNs were not found in the database.") - - if not score_set_ids: - logger.error("No score sets found.") - return - - caid_query = ( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .where(Variant.score_set_id.in_(score_set_ids), MappedVariant.clingen_allele_id.is_not(None)) - ) - - if only_current: - caid_query = caid_query.where(MappedVariant.current.is_(True)) - - # We filter out Nonetype CAIDs to avoid issues with Athena queries, so we can type this as Sequence[str] and ignore MyPy warnings - caids: Sequence[str] = db.scalars(caid_query.distinct()).all() # type: ignore - if not caids: - logger.error("No CAIDs found for the selected score sets.") - return - - logger.info(f"Found {len(caids)} CAIDs for the selected score sets to link to gnomAD variants.") - - # 2. Query Athena for gnomAD variants matching the CAIDs - gnomad_variant_data = gnomad_variant_data_for_caids(caids) - - if not gnomad_variant_data: - logger.error("No gnomAD records found for the provided CAIDs.") - return - - logger.info(f"Fetched {len(gnomad_variant_data)} gnomAD records from Athena.") - - # 3. Link gnomAD variants to mapped variants in the database - link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data, only_current=only_current) - - logger.info("Done linking gnomAD variants.") - - -if __name__ == "__main__": - link_gnomad_variants() diff --git a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py deleted file mode 100644 index c681babc0..000000000 --- a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py +++ /dev/null @@ -1,127 +0,0 @@ -import click -import logging -from typing import Optional - -from sqlalchemy.orm import Session - -from mavedb.scripts.environment import with_database_session -from mavedb.models.score_set import ScoreSet -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession -from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata - -VALID_UNIPROT_DBS = [ - "UniProtKB", - "UniProtKB_AC-ID", - "UniProtKB-Swiss-Prot", - "UniParc", - "UniRef50", - "UniRef90", - "UniRef100", -] - -logger = logging.getLogger(__name__) - - -@click.command() -@with_database_session -@click.option("--score-set-urn", type=str, default=None, help="Score set URN to process. If not provided, process all.") -@click.option("--polling-interval", type=int, default=30, help="Polling interval in seconds for checking job status.") -@click.option("--polling-attempts", type=int, default=5, help="Number of tries to poll for job completion.") -@click.option("--to-db", type=str, default="UniProtKB", help="Target UniProt database for ID mapping.") -@click.option( - "--prefer-swiss-prot", is_flag=True, default=True, help="Prefer Swiss-Prot entries in the mapping results." -) -@click.option( - "--refresh-mapped-identifier", - is_flag=True, - default=False, - help="Refresh the existing mapped identifier, if one exists.", -) -def main( - db: Session, - score_set_urn: Optional[str], - polling_interval: int, - polling_attempts: int, - to_db: str, - prefer_swiss_prot: bool = True, - refresh_mapped_identifier: bool = False, -) -> None: - if to_db not in VALID_UNIPROT_DBS: - raise ValueError(f"Invalid target database: {to_db}. Must be one of {VALID_UNIPROT_DBS}.") - if score_set_urn: - score_sets = db.query(ScoreSet).filter(ScoreSet.urn == score_set_urn).all() - else: - score_sets = db.query(ScoreSet).all() - - api = UniProtIDMappingAPI(polling_interval=polling_interval, polling_tries=polling_attempts) - - logger.info(f"Processing {len(score_sets)} score sets.") - for score_set in score_sets: - logger.info(f"Processing score set: {score_set.urn}") - - if not score_set.target_genes: - logger.warning(f"No target gene for score set {score_set.urn}. Skipped mapping this score set.") - continue - - for target_gene in score_set.target_genes: - if target_gene.uniprot_id_from_mapped_metadata and not refresh_mapped_identifier: - logger.debug( - f"Target gene {target_gene.id} already has UniProt ID {target_gene.uniprot_id_from_mapped_metadata} and refresh_mapped_identifier is False. Skipped mapping this target." - ) - continue - - if not target_gene.post_mapped_metadata: - logger.warning( - f"No post-mapped metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - - ids = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not ids: - logger.warning( - f"No IDs found in post_mapped_metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - if len(ids) > 1: - logger.warning( - f"More than one accession ID found in post_mapped_metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - - id_to_map = ids[0] - from_db = infer_db_name_from_sequence_accession(id_to_map) - job_id = api.submit_id_mapping(from_db, to_db=to_db, ids=[id_to_map]) - - if not job_id: - logger.warning(f"Failed to submit job for target gene {target_gene.id}. Skipped mapping this target.") - continue - if not api.check_id_mapping_results_ready(job_id): - logger.warning(f"Job {job_id} not ready for target gene {target_gene.id}. Skipped mapping this target.") - continue - - results = api.get_id_mapping_results(job_id) - mapped_results = api.extract_uniprot_id_from_results(results, prefer_swiss_prot=prefer_swiss_prot) - - if not mapped_results: - logger.warning(f"No UniProt ID found for target gene {target_gene.id}. Skipped mapping this target.") - continue - if len(mapped_results) > 1: - logger.warning( - f"Could not unambiguously map target gene {target_gene.id}. Found multiple UniProt IDs ({len(mapped_results)})." - ) - continue - - uniprot_id = mapped_results[0][id_to_map]["uniprot_id"] - target_gene.uniprot_id_from_mapped_metadata = uniprot_id - db.add(target_gene) - - logger.info(f"Updated target gene {target_gene.id} with UniProt ID {uniprot_id}.") - - logger.info(f"Processed score set {score_set.urn} with {len(score_set.target_genes)} target genes.") - - logger.info(f"Done processing {len(score_sets)} score sets.") - - -if __name__ == "__main__": - main() diff --git a/src/mavedb/scripts/pipelines.py b/src/mavedb/scripts/pipelines.py new file mode 100644 index 000000000..8e7795bb9 --- /dev/null +++ b/src/mavedb/scripts/pipelines.py @@ -0,0 +1,160 @@ +"""Operator-facing CLI for inspecting pipeline state. + +Usage: + # List all pipelines + poetry run python -m mavedb.scripts.pipelines list-pipelines + + # Filter by status + poetry run python -m mavedb.scripts.pipelines list-pipelines --status running + + # Show a single pipeline with progress statistics + poetry run python -m mavedb.scripts.pipelines show-pipeline urn:mavedb-pipeline: +""" + +import json +import logging +from datetime import datetime +from typing import Optional + +import asyncclick as click +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import PipelineStatus +from mavedb.models.pipeline import Pipeline +from mavedb.scripts.environment import script_environment, with_database_session +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + +logger = logging.getLogger(__name__) + + +def _format_dt(dt: Optional[datetime]) -> str: + return dt.isoformat() if dt else "-" + + +@script_environment.command(name="list-pipelines") +@with_database_session +@click.option( + "--status", + type=click.Choice([s.value for s in PipelineStatus]), + default=None, + help="Filter by pipeline status.", +) +@click.option("--name", default=None, help="Filter by pipeline name (exact match).") +@click.option("--correlation-id", default=None, help="Filter by correlation id.") +@click.option("--created-by-user-id", type=int, default=None, help="Filter by creating user id.") +@click.option("--limit", type=int, default=50, show_default=True, help="Maximum rows to return.") +@click.option("--json", "as_json", is_flag=True, help="Emit results as JSON.") +def list_pipelines( + db: Session, + status: Optional[str], + name: Optional[str], + correlation_id: Optional[str], + created_by_user_id: Optional[int], + limit: int, + as_json: bool, +) -> None: + """List pipelines with optional filters.""" + query = select(Pipeline) + if status: + query = query.where(Pipeline.status == status) + if name: + query = query.where(Pipeline.name == name) + if correlation_id: + query = query.where(Pipeline.correlation_id == correlation_id) + if created_by_user_id is not None: + query = query.where(Pipeline.created_by_user_id == created_by_user_id) + + query = query.order_by(Pipeline.created_at.desc()).limit(limit) + pipelines = db.scalars(query).all() + + if as_json: + rows = [ + { + "id": p.id, + "urn": p.urn, + "name": p.name, + "status": p.status, + "correlation_id": p.correlation_id, + "created_at": _format_dt(p.created_at), + "started_at": _format_dt(p.started_at), + "finished_at": _format_dt(p.finished_at), + "created_by_user_id": p.created_by_user_id, + } + for p in pipelines + ] + click.echo(json.dumps(rows, indent=2)) + return + + if not pipelines: + click.echo("No pipelines match the given filters.") + return + + click.echo(f"{'ID':>6} {'STATUS':<12} {'NAME':<32} {'CREATED':<26} URN") + for p in pipelines: + click.echo( + f"{p.id:>6} {str(p.status):<12} {p.name[:32]:<32} " f"{_format_dt(p.created_at):<26} {p.urn or '-'}" + ) + + +@script_environment.command(name="show-pipeline") +@with_database_session +@click.argument("urn") +@click.option("--json", "as_json", is_flag=True, help="Emit full result as JSON.") +def show_pipeline(db: Session, urn: str, as_json: bool) -> None: + """Show a single pipeline with progress statistics.""" + pipeline = db.scalars(select(Pipeline).where(Pipeline.urn == urn)).one_or_none() + if pipeline is None: + click.echo(f"Pipeline not found: {urn}", err=True) + raise SystemExit(1) + + # PipelineManager requires a redis client only for coordination; read-only progress + # aggregation does not dispatch jobs, so a None redis client is safe here if somewhat hacky. + manager = PipelineManager(db=db, redis=None, pipeline_id=pipeline.id) # type: ignore[arg-type] + progress = manager.get_pipeline_progress() + + payload = { + "id": pipeline.id, + "urn": pipeline.urn, + "name": pipeline.name, + "description": pipeline.description, + "status": pipeline.status, + "correlation_id": pipeline.correlation_id, + "created_at": _format_dt(pipeline.created_at), + "started_at": _format_dt(pipeline.started_at), + "finished_at": _format_dt(pipeline.finished_at), + "created_by_user_id": pipeline.created_by_user_id, + "mavedb_version": pipeline.mavedb_version, + "metadata": pipeline.metadata_, + "progress": progress, + } + + if as_json: + click.echo(json.dumps(payload, indent=2, default=str)) + return + + click.echo(f"Pipeline: {pipeline.urn} (id={pipeline.id})") + click.echo(f" Name: {pipeline.name}") + click.echo(f" Status: {pipeline.status}") + click.echo(f" Correlation: {pipeline.correlation_id or '-'}") + click.echo(f" Created: {_format_dt(pipeline.created_at)}") + click.echo(f" Started: {_format_dt(pipeline.started_at)}") + click.echo(f" Finished: {_format_dt(pipeline.finished_at)}") + click.echo(f" Created by uid: {pipeline.created_by_user_id}") + click.echo(" Progress:") + click.echo(f" Total jobs: {progress['total_jobs']}") + click.echo(f" Completed: {progress['completed_jobs']}") + click.echo(f" Successful: {progress['successful_jobs']}") + click.echo(f" Failed: {progress['failed_jobs']}") + click.echo(f" Running: {progress['running_jobs']}") + click.echo(f" Pending: {progress['pending_jobs']}") + click.echo(f" Completion pct: {progress['completion_percentage']:.1f}%") + click.echo(f" Duration (s): {progress['duration']}") + if progress["status_counts"]: + click.echo(" Status counts:") + for status_key, count in sorted(progress["status_counts"].items()): + click.echo(f" {status_key}: {count}") + + +if __name__ == "__main__": + script_environment() diff --git a/src/mavedb/scripts/populate_mapped_hgvs.py b/src/mavedb/scripts/populate_mapped_hgvs.py deleted file mode 100644 index ed60594c3..000000000 --- a/src/mavedb/scripts/populate_mapped_hgvs.py +++ /dev/null @@ -1,188 +0,0 @@ -import logging -import requests -from typing import Sequence, Optional - -import click -from sqlalchemy import select -from sqlalchemy.orm import Session - -from mavedb.lib.clingen.allele_registry import CLINGEN_API_URL -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.variants import get_hgvs_from_post_mapped - -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant - -from mavedb.scripts.environment import script_environment, with_database_session - -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -def get_target_info(score_set: ScoreSet) -> tuple[bool, Optional[str]]: - target_is_coding: bool - transcript_accession: Optional[str] = None - if len(score_set.target_genes) == 1: - target = score_set.target_genes[0] - if target.category == "protein_coding": - target_is_coding = True - # only get transcript accession if coding - # accession-based - if target.target_accession and target.target_accession.accession: - # only use accession info if a transcript was specified - if target.target_accession.accession.startswith(("NM", "ENST")): - transcript_accession = target.target_accession.accession - # sequence-based - if target.post_mapped_metadata: - # assert that post_mapped_metadata is a dict for mypy - assert isinstance(target.post_mapped_metadata, dict) - if target.post_mapped_metadata.get("cdna", {}).get("sequence_accessions"): - if len(target.post_mapped_metadata["cdna"]["sequence_accessions"]) == 1: - transcript_accession = target.post_mapped_metadata["cdna"]["sequence_accessions"][0] - else: - raise ValueError( - f"Multiple cDNA accessions found in post-mapped metadata for target {target.name} in score set {score_set.urn}. Cannot determine which to use." - ) - # if sequence-based and no cDNA accession, warn that no transcript was specified - else: - # for coding score sets, the mapper should have returned a cdna post mapped metadata entry. Use mane transcript from clingen for now, but warn that we are assuming transcript. - logger.warning( - f"No cDNA accession found in post-mapped metadata for target {target.name} in score set {score_set.urn}. This is expected if variants were only provided at the protein level. If variants are at the nucleotide level, will assume MANE transcript from ClinGen for coding variant." - ) - else: - # for coding score sets, the mapper should have returned a cdna post mapped metadata entry. Use mane transcript from clingen for now, but warn that we are assuming transcript. - logger.warning( - f"No post-mapped metadata for target {target.name} in score set {score_set.urn}. Will assume MANE transcript from ClinGen for coding variant." - ) - else: - target_is_coding = False - # multi-target score sets are more complex because there is no direct link between variants and targets in the db. support later - else: - raise NotImplementedError("Populating mapped hgvs for multi-target score sets is not yet supported.") - - return target_is_coding, transcript_accession - - -@script_environment.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--all", help="Populate mapped hgvs for every score set in MaveDB.", is_flag=True) -def populate_mapped_hgvs(db: Session, urns: Sequence[Optional[str]], all: bool): - score_set_ids: Sequence[Optional[int]] - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info(f"Command invoked with --all. Routine will populate mapped hgvs for {len(urns)} score sets.") - else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Populating mapped hgvs for the provided score sets ({len(urns)}).") - - for idx, ss_id in enumerate(score_set_ids): - if not ss_id: - continue - - score_set = db.scalar(select(ScoreSet).where(ScoreSet.id == ss_id)) - if not score_set: - logger.warning(f"Could not fetch score set with id={ss_id}.") - continue - - try: - target_is_coding, transcript_accession = get_target_info(score_set) - - variant_info = db.execute( - select(Variant.urn, MappedVariant) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.id == ss_id) - .where(MappedVariant.current == True) # noqa: E712 - ) - - variant_info_list = variant_info.all() - num_variants = len(variant_info_list) - - for v_idx, (variant_urn, mapped_variant) in enumerate(variant_info_list): - if (v_idx + 1) % ((num_variants + 9) // 10) == 0: - logger.info( - f"Processing variant {v_idx+1}/{num_variants} ({variant_urn}) for score set {score_set.urn} ({idx+1}/{len(urns)})." - ) - # TODO#469: support multi-target score sets - # returns None if no post-mapped object or if multi-variant - hgvs_assay_level = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - - hgvs_g: Optional[str] = None - hgvs_c: Optional[str] = None - hgvs_p: Optional[str] = None - - # NOTE: if no clingen allele id, could consider searching clingen using hgvs_assay_level. for now, skipping variant if no clingen allele id in db - # TODO#469: implement support for multi-variants - if mapped_variant.clingen_allele_id and len(mapped_variant.clingen_allele_id.split(",")) == 1: - response = requests.get(f"{CLINGEN_API_URL}/{mapped_variant.clingen_allele_id}") - if response.status_code != 200: - logger.error( - f"Failed for variant {variant_urn} to query ClinGen API for {mapped_variant.clingen_allele_id}: {response.status_code}" - ) - continue - data = response.json() - if mapped_variant.clingen_allele_id.startswith("CA"): - if data.get("genomicAlleles"): - for allele in data["genomicAlleles"]: - if allele.get("referenceGenome") == "GRCh38" and allele.get("hgvs"): - hgvs_g = allele["hgvs"][0] - break - if target_is_coding: - if data.get("transcriptAlleles"): - if transcript_accession: - for allele in data["transcriptAlleles"]: - if allele.get("hgvs"): - for hgvs_string in allele["hgvs"]: - hgvs_reference_sequence = hgvs_string.split(":")[0] - if transcript_accession == hgvs_reference_sequence: - hgvs_c = hgvs_string - break - if hgvs_c: - if allele.get("proteinEffect"): - hgvs_p = allele["proteinEffect"].get("hgvs") - break - else: - # no transcript specified, use mane if available - for allele in data["transcriptAlleles"]: - if allele.get("MANE"): - # TODO#571 consider prioritizing certain MANE transcripts (e.g. MANE Select) - hgvs_c = allele["MANE"].get("nucleotide", {}).get("RefSeq", {}).get("hgvs") - hgvs_p = allele["MANE"].get("protein", {}).get("RefSeq", {}).get("hgvs") - break - - elif mapped_variant.clingen_allele_id.startswith("PA"): - # if PA, assume that assay was performed at amino acid level, so only provide hgvs_p - if data.get("aminoAcidAlleles"): - for allele in data["aminoAcidAlleles"]: - if allele.get("hgvs"): - hgvs_p = allele["hgvs"][0] - break - - mapped_variant.hgvs_assay_level = hgvs_assay_level - mapped_variant.hgvs_g = hgvs_g - mapped_variant.hgvs_c = hgvs_c - mapped_variant.hgvs_p = hgvs_p - db.add(mapped_variant) - db.commit() - - except Exception as e: - logging_context = { - "processed_score_sets": urns[:idx], - "unprocessed_score_sets": urns[idx:], - } - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - f"Score set {score_set.urn} could not be processed to extract hgvs strings.", extra=logging_context - ) - logger.info(f"Rolling back all changes for scoreset {score_set.urn}") - db.rollback() - - logger.info(f"Done with score set {score_set.urn}. ({idx+1}/{len(urns)}).") - - logger.info("Done populating mapped hgvs.") - - -if __name__ == "__main__": - populate_mapped_hgvs() diff --git a/src/mavedb/scripts/populate_mapped_variants.py b/src/mavedb/scripts/populate_mapped_variants.py deleted file mode 100644 index de9eedbdd..000000000 --- a/src/mavedb/scripts/populate_mapped_variants.py +++ /dev/null @@ -1,179 +0,0 @@ -import logging -from datetime import date -from typing import Optional, Sequence, Union - -import click -from sqlalchemy import cast, select -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Session - -from mavedb.data_providers.services import vrs_mapper -from mavedb.lib.exceptions import NonexistentMappingReferenceError -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.mapping import ANNOTATION_LAYERS -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.scripts.environment import script_environment, with_database_session - -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -def variant_from_mapping(db: Session, mapping: dict, dcd_mapping_version: str) -> MappedVariant: - variant_urn = mapping.get("mavedb_id") - variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - - return MappedVariant( - variant_id=variant.id, - pre_mapped=mapping.get("pre_mapped"), - post_mapped=mapping.get("post_mapped"), - modification_date=date.today(), - mapped_date=date.today(), # since this is a one-time script, assume mapping was done today - vrs_version=mapping.get("vrs_version"), - mapping_api_version=dcd_mapping_version, - error_message=mapping.get("error_message"), - current=True, - ) - - -@script_environment.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--all", help="Populate mapped variants for every score set in MaveDB.", is_flag=True) -def populate_mapped_variant_data(db: Session, urns: Sequence[Optional[str]], all: bool): - score_set_ids: Sequence[Optional[int]] - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info( - f"Command invoked with --all. Routine will populate mapped variant data for {len(urns)} score sets." - ) - else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Populating mapped variant data for the provided score sets ({len(urns)}).") - - vrs = vrs_mapper() - - for idx, ss_id in enumerate(score_set_ids): - if not ss_id: - continue - - score_set = db.scalar(select(ScoreSet).where(ScoreSet.id == ss_id)) - if not score_set: - logger.warning(f"Could not fetch score set with id={ss_id}.") - continue - - try: - existing_mapped_variants = ( - db.query(MappedVariant) - .join(Variant) - .join(ScoreSet) - .filter(ScoreSet.id == ss_id, MappedVariant.current.is_(True)) - .all() - ) - - for variant in existing_mapped_variants: - variant.current = False - - assert score_set.urn - logger.info(f"Mapping score set {score_set.urn}.") - mapped_scoreset = vrs.map_score_set(score_set.urn) - logger.info(f"Done mapping score set {score_set.urn}.") - - dcd_mapping_version = mapped_scoreset["dcd_mapping_version"] - mapped_scores = mapped_scoreset.get("mapped_scores") - - if not mapped_scores: - # if there are no mapped scores, the score set failed to map. - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": mapped_scoreset.get("error_message")} - db.commit() - logger.info(f"No mapped variants available for {score_set.urn}.") - else: - reference_metadata = mapped_scoreset.get("reference_sequences") - if not reference_metadata: - raise NonexistentMappingReferenceError() - - for target_gene_identifier in reference_metadata: - target_gene = next( - ( - target_gene - for target_gene in score_set.target_genes - if target_gene.name == target_gene_identifier - ), - None, - ) - if not target_gene: - raise ValueError( - f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." - ) - # allow for multiple annotation layers - pre_mapped_metadata = {} - post_mapped_metadata: dict[str, Union[Optional[str], dict[str, dict[str, str | list[str]]]]] = {} - excluded_pre_mapped_keys = {"sequence"} - - gene_info = reference_metadata[target_gene_identifier].get("gene_info") - if gene_info: - target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") - post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") - - for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: - layer_premapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( - "computed_reference_sequence" - ) - if layer_premapped: - pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys - } - layer_postmapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( - "mapped_reference_sequence" - ) - if layer_postmapped: - post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped - target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) - target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) - - mapped_variants = [ - variant_from_mapping(db=db, mapping=mapped_score, dcd_mapping_version=dcd_mapping_version) - for mapped_score in mapped_scores - ] - logger.debug(f"Done constructing {len(mapped_variants)} mapped variant objects.") - - num_successful_variants = len( - [variant for variant in mapped_variants if variant.post_mapped is not None] - ) - logger.debug( - f"{num_successful_variants}/{len(mapped_variants)} variants generated a post-mapped VRS object." - ) - - if num_successful_variants == 0: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} - elif num_successful_variants < len(mapped_variants): - score_set.mapping_state = MappingState.incomplete - else: - score_set.mapping_state = MappingState.complete - - db.bulk_save_objects(mapped_variants) - db.commit() - logger.info(f"Done populating {len(mapped_variants)} mapped variants for {score_set.urn}.") - - except Exception as e: - logging_context = { - "mapped_score_sets": urns[:idx], - "unmapped_score_sets": urns[idx:], - } - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(f"Score set {score_set.urn} failed to map.", extra=logging_context) - logger.info(f"Rolling back all changes for scoreset {score_set.urn}") - db.rollback() - - logger.info(f"Done with score set {score_set.urn}. ({idx+1}/{len(urns)}).") - - logger.info("Done populating mapped variant data.") - - -if __name__ == "__main__": - populate_mapped_variant_data() diff --git a/src/mavedb/scripts/populate_variant_translations.py b/src/mavedb/scripts/populate_variant_translations.py deleted file mode 100644 index 9b61d5f1e..000000000 --- a/src/mavedb/scripts/populate_variant_translations.py +++ /dev/null @@ -1,162 +0,0 @@ -import logging -from typing import Sequence, Optional - -import click -from sqlalchemy import select -from sqlalchemy.orm import Session - -from mavedb.lib.clingen.allele_registry import get_canonical_pa_ids, get_matching_registered_ca_ids -from mavedb.lib.logging.context import format_raised_exception_info_as_dict - -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.models.variant_translation import VariantTranslation - -from mavedb.scripts.environment import script_environment, with_database_session - -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -@script_environment.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--all", help="Populate mapped variants for every score set in MaveDB.", is_flag=True) -def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], all: bool): - # TODO keep track of what has been processed. - # I think this makes sense to track on the mapped variant level in order to allow - # for individual variant translation failure, and also so that we don't have to reset the - # score set log to unprocessed if we redo a mapping. Since we create new mapped variant entries - # if a scoreset is remapped, we can just update the processed column once per mapped variant. - # However, this will also require keeping track of exactly what mapped variants fail here. - # Skipping this for now. - - score_set_ids: Sequence[Optional[int]] - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info( - f"Command invoked with --all. Routine will populate mapped variant data for {len(urns)} score sets." - ) - else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Populating mapped variant data for the provided score sets ({len(urns)}).") - - for idx, ss_id in enumerate(score_set_ids): - if not ss_id: - continue - - score_set = db.scalar(select(ScoreSet).where(ScoreSet.id == ss_id)) - if not score_set: - logger.warning(f"Could not fetch score set with id={ss_id}.") - continue - - clingen_allele_ids = db.scalars( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.id == ss_id) - .where(MappedVariant.current == True) # noqa: E712 - ).all() - logger.info( - f"Found {len(clingen_allele_ids)} clingen allele IDs in the database associated with this score set." - ) - - # treat multi-variants separately - expanded_allele_ids = [] - for allele_id in clingen_allele_ids: - if not allele_id: - continue - if "," in allele_id: - expanded_allele_ids.extend([single_allele_id for single_allele_id in allele_id.split(",")]) - else: - expanded_allele_ids.append(allele_id) - - for allele_id in set(expanded_allele_ids): - try: - if allele_id.startswith("CA"): - # Get the canonical PA ID(s) from the ClinGen API - canonical_pa_ids = get_canonical_pa_ids(allele_id) - if not canonical_pa_ids: - logger.warning( - f"No canonical PA IDs found for {allele_id}. This may be expected if the query is noncoding." - ) - continue - for pa_id in canonical_pa_ids: - existing_variant_translation = db.scalars( - select(VariantTranslation).where( - VariantTranslation.aa_clingen_id == pa_id, VariantTranslation.nt_clingen_id == allele_id - ) - ).one_or_none() - if not existing_variant_translation: - db.add( - VariantTranslation( - aa_clingen_id=pa_id, - nt_clingen_id=allele_id, - ) - ) - # commit after each addition in order to query the database for existing variant translations - db.commit() - - # For each canonical PA ID, get the matching registered transcript CA IDs - ca_ids = get_matching_registered_ca_ids(pa_id) - if not ca_ids: - logger.warning(f"No matching registered transcript CA IDs found for {pa_id}.") - continue - for ca_id in ca_ids: - existing_variant_translation = db.scalars( - select(VariantTranslation).where( - VariantTranslation.aa_clingen_id == pa_id, VariantTranslation.nt_clingen_id == ca_id - ) - ).one_or_none() - if not existing_variant_translation: - db.add( - VariantTranslation( - aa_clingen_id=pa_id, - nt_clingen_id=ca_id, - ) - ) - db.commit() - - elif allele_id.startswith("PA"): - # Get the matching registered transcript CA IDs from the ClinGen API - ca_ids = get_matching_registered_ca_ids(allele_id) - if not ca_ids: - logger.warning( - f"No matching registered transcript CA IDs found for {allele_id}. This is unexpected." - ) - continue - for ca_id in ca_ids: - existing_variant_translation = db.scalars( - select(VariantTranslation).where( - VariantTranslation.aa_clingen_id == allele_id, VariantTranslation.nt_clingen_id == ca_id - ) - ).one_or_none() - if not existing_variant_translation: - db.add( - VariantTranslation( - aa_clingen_id=allele_id, - nt_clingen_id=ca_id, - ) - ) - db.commit() - - else: - logger.warning(f"Invalid clingen allele ID format: {allele_id}") - - except Exception as e: - logging_context = { - "processed_score_sets": urns[:idx], - "unprocessed_score_sets": urns[idx:], - } - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(f"Unexpected error processing clingen allele ID {allele_id}: {e}") - db.rollback() - - logger.info(f"Done with score set {score_set.urn}. ({idx+1}/{len(urns)}).") - - logger.info("Done populating variant translations.") - - -if __name__ == "__main__": - populate_variant_translations() diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py deleted file mode 100644 index b043272c6..000000000 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ /dev/null @@ -1,172 +0,0 @@ -import click -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -import requests -import csv -import time -import logging -import gzip -import random -import io -import sys - -from typing import Dict, Any, Optional, Sequence -from datetime import date - -from sqlalchemy import and_, select, distinct -from sqlalchemy.orm import Session - -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.clinical_control import ClinicalControl -from mavedb.scripts.environment import with_database_session - -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -# Some older variant summary files have larger field sizes than the default CSV reader can handle. -csv.field_size_limit(sys.maxsize) - - -def fetch_clinvar_variant_summary_tsv(month: Optional[str], year: str) -> bytes: - if month is None and year is None: - url = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz" - else: - if int(year) <= 2023: - url = f"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive/{year}/variant_summary_{year}-{month}.txt.gz" - else: - url = ( - f"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive/variant_summary_{year}-{month}.txt.gz" - ) - - response = requests.get(url, stream=True) - response.raise_for_status() - return response.content - - -def parse_tsv(tsv_content: bytes) -> Dict[int, Dict[str, str]]: - with gzip.open(filename=io.BytesIO(tsv_content), mode="rt") as f: - # This readlines object will only be a list of bytes if the file is opened in "rb" mode. - reader = csv.DictReader(f.readlines(), delimiter="\t") # type: ignore - data = {int(row["#AlleleID"]): row for row in reader} - - return data - - -def query_clingen_allele_api(allele_id: str) -> Dict[str, Any]: - url = f"https://reg.clinicalgenome.org/allele/{allele_id}" - retries = 5 - for i in range(retries): - try: - response = requests.get(url) - response.raise_for_status() - break - except requests.RequestException as e: - if i < retries - 1: - wait_time = (2**i) + random.uniform(0, 1) - logger.warning(f"Request failed ({e}), retrying in {wait_time:.2f} seconds...") - time.sleep(wait_time) - else: - logger.error(f"Request failed after {retries} attempts: {e}") - raise - - logger.debug(f"Fetched ClinGen data for allele ID {allele_id}.") - return response.json() - - -def refresh_clinvar_variants(db: Session, month: Optional[str], year: str, urns: Sequence[str]) -> None: - tsv_content = fetch_clinvar_variant_summary_tsv(month, year) - tsv_data = parse_tsv(tsv_content) - version = f"{month}_{year}" if month and year else f"{date.today().month}_{date.today().year}" - logger.info(f"Fetched TSV variant data for ClinVar for {version}.") - - if urns: - clingen_ids = db.scalars( - select(distinct(MappedVariant.clingen_allele_id)) - .join(Variant) - .join(ScoreSet) - .where( - and_( - MappedVariant.clingen_allele_id.is_not(None), - MappedVariant.current.is_(True), - ScoreSet.urn.in_(urns), - ) - ) - ).all() - else: - clingen_ids = db.scalars( - select(distinct(MappedVariant.clingen_allele_id)).where(MappedVariant.clingen_allele_id.is_not(None)) - ).all() - total_variants_with_clingen_ids = len(clingen_ids) - - logger.info(f"Fetching ClinGen data for {total_variants_with_clingen_ids} variants.") - for index, clingen_id in enumerate(clingen_ids): - if total_variants_with_clingen_ids > 0 and index % (max(total_variants_with_clingen_ids // 100, 1)) == 0: - logger.info(f"Progress: {index / total_variants_with_clingen_ids:.0%}") - - if clingen_id is not None and "," in clingen_id: - logger.debug("Detected a multi-variant ClinGen allele ID, skipping.") - continue - - # Guaranteed based on our query filters. - clingen_data = query_clingen_allele_api(clingen_id) # type: ignore - clinvar_allele_id = clingen_data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") - - if not clinvar_allele_id or clinvar_allele_id not in tsv_data: - logger.debug( - f"No ClinVar variant data found for ClinGen allele ID {clingen_id}. ({index + 1}/{total_variants_with_clingen_ids})." - ) - continue - - variant_data = tsv_data[clinvar_allele_id] - identifier = str(clinvar_allele_id) - - clinvar_variant = db.scalars( - select(ClinicalControl).where( - ClinicalControl.db_identifier == identifier, - ClinicalControl.db_version == version, - ClinicalControl.db_name == "ClinVar", - ) - ).one_or_none() - if clinvar_variant: - clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") - clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") - clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") - else: - clinvar_variant = ClinicalControl( - db_identifier=identifier, - gene_symbol=variant_data.get("GeneSymbol"), - clinical_significance=variant_data.get("ClinicalSignificance"), - clinical_review_status=variant_data.get("ReviewStatus"), - db_version=version, - db_name="ClinVar", - ) - - db.add(clinvar_variant) - - variants_with_clingen_allele_id = db.scalars( - select(MappedVariant).where(MappedVariant.clingen_allele_id == clingen_id) - ).all() - for mapped_variant in variants_with_clingen_allele_id: - if clinvar_variant.id in [c.id for c in mapped_variant.clinical_controls]: - continue - mapped_variant.clinical_controls.append(clinvar_variant) - db.add(mapped_variant) - - db.commit() - logger.debug( - f"Added ClinVar variant data ({identifier}) for ClinGen allele ID {clingen_id}. ({index + 1}/{total_variants_with_clingen_ids})." - ) - - -@click.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--month", default=None, help="Populate mapped variants for every score set in MaveDB.") -@click.option("--year", required=True, help="Populate mapped variants for every score set in MaveDB.") -def refresh_clinvar_variants_command(db: Session, month: Optional[str], year: str, urns: Sequence[str]) -> None: - refresh_clinvar_variants(db, month, year, urns) - - -if __name__ == "__main__": - refresh_clinvar_variants_command() diff --git a/src/mavedb/scripts/run_job.py b/src/mavedb/scripts/run_job.py new file mode 100644 index 000000000..8e22afa07 --- /dev/null +++ b/src/mavedb/scripts/run_job.py @@ -0,0 +1,267 @@ +"""Run a standalone worker job locally or enqueue it via ARQ. + +By default, jobs execute in-process using a standalone worker context (no +Redis/worker required). Use --enqueue to submit to the ARQ worker instead. + +Usage: + # Run locally + poetry run python -m mavedb.scripts.run_job link_gnomad_variants \ + --score-set-urn urn:mavedb:00000001-a-1 + + # Enqueue to ARQ worker + poetry run python -m mavedb.scripts.run_job link_gnomad_variants \ + --score-set-urn urn:mavedb:00000001-a-1 --enqueue + + # List available jobs + poetry run python -m mavedb.scripts.run_job --list + + # Run job with extra params + poetry run python -m mavedb.scripts.run_job refresh_clinvar_controls \ + --score-set-urn urn:mavedb:00000001-a-1 --param year=2024 --param month=1 +""" + +import datetime +import logging +import sys +from typing import Callable + +import asyncclick as click +from arq import create_pool +from sqlalchemy import select + +from mavedb.db.session import SessionLocal +from mavedb.lib.types.workflow import JobDefinition +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.lib.managers.utils import arq_job_id +from mavedb.worker.settings import RedisWorkerSettings +from mavedb.worker.settings.lifecycle import standalone_ctx + +logger = logging.getLogger(__name__) + + +def _build_job_lookup() -> dict[str, tuple[Callable, JobDefinition]]: + """Build a mapping from job function name → (callable, job_definition).""" + return {job_def["function"]: (func, job_def) for func, job_def in STANDALONE_JOB_DEFINITIONS.items()} + + +def _print_available_jobs() -> None: + click.echo("Available standalone jobs:\n") + lookup = _build_job_lookup() + for name, (_, job_def) in sorted(lookup.items()): + required_params = [k for k, v in job_def["params"].items() if v is None] + # correlation_id is auto-generated + display_params = [p for p in required_params if p != "correlation_id"] + click.echo(f" {name}") + click.echo(f" Type: {job_def['type']}") + if display_params: + click.echo(f" Required params: {', '.join(display_params)}") + click.echo() + + +def _coerce_param_value(value: str) -> int | str: + """Attempt to coerce a string param value to int if it looks numeric.""" + try: + return int(value) + except ValueError: + return value + + +@click.command() +@click.argument("job_name", required=False) +@click.option("--list", "list_jobs", is_flag=True, help="List available jobs and exit.") +@click.option("--enqueue", is_flag=True, help="Enqueue to ARQ worker instead of running locally.") +@click.option("--score-set-urn", "score_set_urn", help="URN of the score set to process.") +@click.option("--all", "all_score_sets", is_flag=True, help="Run the job for every score set.") +@click.option("--updater-id", "updater_id", type=int, help="ID of the user (required by some jobs).") +@click.option( + "--param", + "extra_params", + multiple=True, + help="Additional key=value param (repeatable). e.g. --param year=2024", +) +async def main( + job_name: str | None, + list_jobs: bool, + enqueue: bool, + score_set_urn: str | None, + all_score_sets: bool, + updater_id: int | None, + extra_params: tuple[str, ...], +) -> None: + """Run a standalone worker job. + + JOB_NAME is the function name of the job to run (e.g. link_gnomad_variants). + Use --list to see available jobs. + """ + if list_jobs or not job_name: + _print_available_jobs() + return + + lookup = _build_job_lookup() + if job_name not in lookup: + click.echo(f"Unknown job: {job_name}", err=True) + click.echo(f"Available: {', '.join(sorted(lookup.keys()))}", err=True) + sys.exit(1) + + job_func, job_def = lookup[job_name] + + # Parse extra params + parsed_extra: dict[str, int | str] = {} + for param_str in extra_params: + if "=" not in param_str: + click.echo(f"Invalid --param format (expected key=value): {param_str}", err=True) + sys.exit(1) + key, value = param_str.split("=", 1) + parsed_extra[key] = _coerce_param_value(value) + + # Determine which params this job needs + required_params = {k for k, v in job_def["params"].items() if v is None} + needs_score_set = "score_set_id" in required_params + needs_updater = "updater_id" in required_params + + db = SessionLocal() + + # Resolve score sets if needed + score_set_ids: list[int] = [] + if needs_score_set: + if score_set_urn and all_score_sets: + click.echo("Cannot provide both --score-set-urn and --all.", err=True) + sys.exit(1) + if not score_set_urn and not all_score_sets: + click.echo("--score-set-urn or --all is required for this job.", err=True) + sys.exit(1) + + if all_score_sets: + score_set_ids = [id_ for id_ in db.scalars(select(ScoreSet.id)).all() if id_ is not None] + click.echo(f"Processing all {len(score_set_ids)} score sets.") + else: + # Support comma-separated URNs + urns = [u.strip() for u in score_set_urn.split(",")] # type: ignore[union-attr] + score_sets = db.scalars(select(ScoreSet).where(ScoreSet.urn.in_(urns))).all() + missing = set(urns) - {ss.urn for ss in score_sets} + if missing: + click.echo(f"Score sets not found: {', '.join(missing)}", err=True) + sys.exit(1) + score_set_ids = [ss.id for ss in score_sets if ss.id is not None] + + # Resolve user if needed + if needs_updater: + if not updater_id: + click.echo("--updater-id is required for this job.", err=True) + sys.exit(1) + user = db.scalars(select(User).where(User.id == updater_id)).one_or_none() + if not user: + click.echo(f"User not found: {updater_id}", err=True) + sys.exit(1) + updater_id = user.id + + correlation_id = f"{job_name}_{datetime.datetime.now().isoformat()}" + redis = await create_pool(RedisWorkerSettings) + job_factory = JobFactory(db) + + if enqueue: + await _enqueue_jobs( + db, + redis, + job_factory, + job_def, + job_name, + score_set_ids, + updater_id, + correlation_id, + parsed_extra, + needs_score_set, + ) + else: + await _run_locally( + db, + redis, + job_factory, + job_func, + job_def, + score_set_ids, + updater_id, + correlation_id, + parsed_extra, + needs_score_set, + ) + + db.close() + + +async def _enqueue_jobs( + db, redis, job_factory, job_def, job_name, score_set_ids, updater_id, correlation_id, extra_params, needs_score_set +) -> None: + """Create JobRun records and enqueue them in ARQ.""" + + try: + items = score_set_ids if needs_score_set else [None] + for score_set_id in items: + pipeline_params = {"correlation_id": correlation_id, **extra_params} + if score_set_id is not None: + pipeline_params["score_set_id"] = score_set_id + if updater_id is not None: + pipeline_params["updater_id"] = updater_id + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params=pipeline_params, + ) + db.flush() + + arq_id = arq_job_id(job_run) + job = await redis.enqueue_job(job_run.job_function, job_run.id, _job_id=arq_id) + if job: + click.echo(f"Enqueued {job_name} (job_run={job_run.id}, arq_id={arq_id})") + else: + click.echo(f"Job already enqueued (job_run={job_run.id})", err=True) + + db.commit() + finally: + await redis.aclose() + + +async def _run_locally( + db, redis, job_factory, job_func, job_def, score_set_ids, updater_id, correlation_id, extra_params, needs_score_set +) -> None: + """Execute jobs in-process using a standalone worker context.""" + ctx = standalone_ctx() + ctx["db"] = db + ctx["redis"] = redis + + items = score_set_ids if needs_score_set else [None] + for score_set_id in items: + pipeline_params = {"correlation_id": correlation_id, **extra_params} + if score_set_id is not None: + pipeline_params["score_set_id"] = score_set_id + if updater_id is not None: + pipeline_params["updater_id"] = updater_id + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params=pipeline_params, + ) + db.flush() + + resource = f"score_set_{score_set_id}" if score_set_id else "standalone" + click.echo(f"Running {job_def['function']} for {resource} (job_run={job_run.id})...") + + # The job_manager argument is injected by the with_pipeline_management decorator; + # we only pass ctx and job_run.id. + await job_func(ctx, job_run.id) # type: ignore[call-arg] + + click.echo(f" Completed job_run={job_run.id}") + + await redis.aclose() + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + main() diff --git a/src/mavedb/scripts/run_pipeline.py b/src/mavedb/scripts/run_pipeline.py new file mode 100644 index 000000000..2acb51223 --- /dev/null +++ b/src/mavedb/scripts/run_pipeline.py @@ -0,0 +1,149 @@ +"""Run a named pipeline end-to-end via ARQ. + +Creates the Pipeline and all associated JobRun/JobDependency records via +PipelineFactory, then enqueues the start_pipeline entrypoint in ARQ. +Requires a running Redis instance and worker. + +Usage: + poetry run python -m mavedb.scripts.run_pipeline annotate_score_set \ + --score-set-urn urn:mavedb:00000001-a-1 --updater-id 1 + + poetry run python -m mavedb.scripts.run_pipeline --list +""" + +import datetime +import logging +import sys + +import asyncclick as click +from arq import create_pool +from sqlalchemy import select + +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.definitions import PIPELINE_DEFINITIONS +from mavedb.lib.workflow.pipeline_factory import PipelineFactory +from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User +from mavedb.worker.lib.managers.utils import arq_job_id +from mavedb.worker.settings import RedisWorkerSettings + +logger = logging.getLogger(__name__) + + +def _print_available_pipelines() -> None: + click.echo("Available pipelines:\n") + for name, definition in PIPELINE_DEFINITIONS.items(): + click.echo(f" {name}") + click.echo(f" {definition['description']}") + + # Collect unique required params (those with None values) across all jobs + required_params: set[str] = set() + for job_def in definition["job_definitions"]: + for param, value in job_def["params"].items(): + if value is None: + required_params.add(param) + + # correlation_id is auto-generated, not user-supplied + required_params.discard("correlation_id") + if required_params: + click.echo(f" Required params: {', '.join(sorted(required_params))}") + + job_keys = [j["key"] for j in definition["job_definitions"]] + click.echo(f" Jobs ({len(job_keys)}): {', '.join(job_keys)}") + click.echo() + + +@click.command() +@click.argument("pipeline_name", required=False) +@click.option("--list", "list_pipelines", is_flag=True, help="List available pipelines and exit.") +@click.option("--score-set-urn", "score_set_urn", help="URN of the score set to process.") +@click.option("--updater-id", "updater_id", type=int, help="ID of the user to attribute pipeline actions to.") +@click.option( + "--extra-param", + "extra_params", + multiple=True, + type=(str, str), + help="Additional key=value params for the pipeline (repeatable).", +) +async def main( + pipeline_name: str | None, + list_pipelines: bool, + score_set_urn: str | None, + updater_id: int | None, + extra_params: tuple[tuple[str, str], ...], +) -> None: + """Run a named pipeline via ARQ. + + PIPELINE_NAME is the name of the pipeline to run (e.g. annotate_score_set). + Use --list to see available pipelines. + """ + if list_pipelines or not pipeline_name: + _print_available_pipelines() + return + + if pipeline_name not in PIPELINE_DEFINITIONS: + click.echo(f"Unknown pipeline: {pipeline_name}", err=True) + click.echo(f"Available: {', '.join(PIPELINE_DEFINITIONS.keys())}", err=True) + sys.exit(1) + + if not score_set_urn: + click.echo("--score-set-urn is required.", err=True) + sys.exit(1) + + if not updater_id: + click.echo("--updater-id is required.", err=True) + sys.exit(1) + + db = SessionLocal() + score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == score_set_urn)).one_or_none() + if not score_set: + click.echo(f"Score set not found: {score_set_urn}", err=True) + sys.exit(1) + + user = db.scalars(select(User).where(User.id == updater_id)).one_or_none() + if not user: + click.echo(f"User not found: {updater_id}", err=True) + sys.exit(1) + + correlation_id = f"{pipeline_name}_{score_set.urn}_{user.id}_{datetime.datetime.now().isoformat()}" + pipeline_params: dict = { + "correlation_id": correlation_id, + "score_set_id": score_set.id, + "updater_id": user.id, + } + for key, value in extra_params: + pipeline_params[key] = value + + try: + pipeline_factory = PipelineFactory(session=db) + pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=user, + pipeline_params=pipeline_params, + ) + except (KeyError, ValueError) as e: + click.echo(f"Failed to create pipeline: {e}", err=True) + sys.exit(1) + + click.echo(f"Created pipeline '{pipeline_name}' (id={pipeline.id}, correlation_id={correlation_id})") + + # Connect to Redis and enqueue + redis = await create_pool(RedisWorkerSettings) + try: + job = await redis.enqueue_job( + pipeline_entrypoint.job_function, + pipeline_entrypoint.id, + _job_id=arq_job_id(pipeline_entrypoint), + ) + if job: + click.echo(f"Enqueued start_pipeline job: {job.job_id}. Pipeline will execute asynchronously.") + else: + click.echo("Job was already enqueued (duplicate).", err=True) + finally: + await redis.aclose() + db.close() + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + main() diff --git a/src/mavedb/scripts/vep_functional_consequence.py b/src/mavedb/scripts/vep_functional_consequence.py deleted file mode 100644 index 8f188fa1d..000000000 --- a/src/mavedb/scripts/vep_functional_consequence.py +++ /dev/null @@ -1,268 +0,0 @@ -import logging -import requests -from datetime import date -from typing import Sequence, Optional - -import click -from sqlalchemy import select -from sqlalchemy.orm import Session - -from mavedb.models.score_set import ScoreSet -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.variant import Variant - -from mavedb.scripts.environment import script_environment, with_database_session - -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - -ENSEMBL_API_URL = "https://rest.ensembl.org" - -# List of all possible VEP consequences, in order from most to least severe -VEP_CONSEQUENCES = [ - "transcript_ablation", - "splice_acceptor_variant", - "splice_donor_variant", - "stop_gained", - "frameshift_variant", - "stop_lost", - "start_lost", - "transcript_amplification", - "feature_elongation", - "feature_truncation", - "inframe_insertion", - "inframe_deletion", - "missense_variant", - "protein_altering_variant", - "splice_donor_5th_base_variant", - "splice_region_variant", - "splice_donor_region_variant", - "splice_polypyrimidine_tract_variant", - "incomplete_terminal_codon_variant", - "start_retained_variant", - "stop_retained_variant", - "synonymous_variant", - "coding_sequence_variant", - "mature_miRNA_variant", - "5_prime_UTR_variant", - "3_prime_UTR_variant", - "non_coding_transcript_exon_variant", - "intron_variant", - "NMD_transcript_variant", - "non_coding_transcript_variant", - "coding_transcript_variant", - "upstream_gene_variant", - "downstream_gene_variant", - "TFBS_ablation", - "TFBS_amplification", - "TF_binding_site_variant", - "regulatory_region_ablation", - "regulatory_region_amplification", - "regulatory_region_variant", - "intergenic_variant", - "sequence_variant", -] - - -def run_variant_recoder(missing_hgvs: Sequence[str]) -> dict[str, list[str]]: - """ - Takes a list of input HGVS strings, calls the Variant Recoder API, and returns a mapping from input HGVS strings - to a list of genomic HGVS strings. - """ - headers = {"Content-Type": "application/json", "Accept": "application/json"} - recoder_response = requests.post( - f"{ENSEMBL_API_URL}/variant_recoder/human", - headers=headers, - json={"ids": list(missing_hgvs)}, - ) - input_hgvs_to_recoded: dict[str, list[str]] = {} - if recoder_response.status_code == 200: - recoder_data = recoder_response.json() - for entry in recoder_data: - for variant, variant_data in entry.items(): - input_hgvs = variant_data.get("input") - if not input_hgvs: - continue - genomic_hgvs_list = [] - genomic_strings = variant_data.get("hgvsg") - if genomic_strings: - for genomic_hgvs in genomic_strings: - if genomic_hgvs.startswith("NC_"): - genomic_hgvs_list.append(genomic_hgvs) - if genomic_hgvs_list: - if input_hgvs in input_hgvs_to_recoded: - input_hgvs_to_recoded[input_hgvs].extend(genomic_hgvs_list) - else: - input_hgvs_to_recoded[input_hgvs] = genomic_hgvs_list - else: - logger.error( - f"Failed batch Variant Recoder API request: {recoder_response.status_code} {recoder_response.text}" - ) - return input_hgvs_to_recoded - - -def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optional[str]]: - headers = {"Content-Type": "application/json", "Accept": "application/json"} - result: dict[str, Optional[str]] = {} - - # Batch POST to VEP - response = requests.post( - f"{ENSEMBL_API_URL}/vep/human/hgvs", - headers=headers, - json={"hgvs_notations": hgvs_strings}, - ) - - missing_hgvs = set(hgvs_strings) - if response.status_code == 200: - data = response.json() - # Map HGVS to consequence - for entry in data: - hgvs = entry.get("input") - most_severe_consequence = entry.get("most_severe_consequence") - if hgvs: - result[hgvs] = most_severe_consequence - missing_hgvs.discard(hgvs) - else: - logger.error(f"Failed batch VEP API request: {response.status_code} {response.text}") - - # Fallback for missing HGVS strings: batch POST to Variant Recoder - if missing_hgvs: - recoded_variants = run_variant_recoder(list(missing_hgvs)) - # Assign None for any missing_hgvs not present in recoder response - for hgvs_string in missing_hgvs: - if hgvs_string not in recoded_variants: - result[hgvs_string] = None - - # Collect all genomic HGVS strings for batch VEP request - all_recoded_hgvs = [] - for input_variant, recoded in recoded_variants.items(): - for variant in recoded: - all_recoded_hgvs.append(variant) - - # Run VEP in batches of 200 - vep_results: dict[str, str] = {} - for i in range(0, len(all_recoded_hgvs), 200): - batch = all_recoded_hgvs[i : i + 200] - vep_response = requests.post( - f"{ENSEMBL_API_URL}/vep/human/hgvs", - headers=headers, - json={"hgvs_notations": batch}, - ) - - if vep_response.status_code != 200: - logger.error(f"Failed batch VEP for genomic HGVS: {vep_response.status_code}") - continue - vep_data = vep_response.json() - for entry in vep_data: - recoded_input = entry.get("input") - most_severe_consequence = entry.get("most_severe_consequence") - if recoded_input and most_severe_consequence: - vep_results[recoded_input] = most_severe_consequence - - # For each original missing_hgvs, choose the most severe consequence among its genomic equivalents - for input_variant, recoded in recoded_variants.items(): - consequences = [] - for variant in recoded: - consequences.append(vep_results.get(variant)) - if consequences: - for consequence in VEP_CONSEQUENCES: - if consequence in consequences: - result[input_variant] = consequence - break - else: - result[input_variant] = None - else: - result[input_variant] = None - - return result - - -@script_environment.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--all", help="Populate functional consequence predictions for every score set in MaveDB.", is_flag=True) -def populate_functional_consequences(db: Session, urns: Sequence[Optional[str]], all: bool): - score_set_ids: Sequence[Optional[int]] - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info( - f"Command invoked with --all. Routine will populate functional consequence predictions for {len(score_set_ids)} score sets." - ) - else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info( - f"Populating functional consequence predictions for the provided score sets ({len(score_set_ids)})." - ) - - for ss_id in score_set_ids: - if not ss_id: - continue - - score_set = db.scalar(select(ScoreSet).where(ScoreSet.id == ss_id)) - if not score_set: - logger.warning(f"Could not fetch score set with id={ss_id}.") - continue - - try: - mapped_variants = db.scalars( - select(MappedVariant) - .join(Variant) - .where( - Variant.score_set_id == ss_id, - MappedVariant.current.is_(True), - MappedVariant.post_mapped.isnot(None), - ) - ).all() - - if not mapped_variants: - logger.info(f"No mapped variant post-mapped objects found for score set {score_set.urn}.") - continue - - queue = [] - variant_map = {} - for mapped_variant in mapped_variants: - hgvs_string = mapped_variant.post_mapped.get("expressions", {})[0].get("value") # type: ignore - if not hgvs_string: - logger.warning(f"No HGVS string found in post_mapped for variant {mapped_variant.id}.") - continue - queue.append(hgvs_string) - variant_map[hgvs_string] = mapped_variant - - if len(queue) == 200: - consequences = get_functional_consequence(queue) - for hgvs, consequence in consequences.items(): - mapped_variant = variant_map[hgvs] - if consequence: - mapped_variant.vep_functional_consequence = consequence - mapped_variant.vep_access_date = date.today() - db.add(mapped_variant) - else: - logger.warning(f"Could not retrieve functional consequence for HGVS {hgvs}.") - db.commit() - queue.clear() - variant_map.clear() - - # Process any remaining variants in the queue - if queue: - consequences = get_functional_consequence(queue) - for hgvs, consequence in consequences.items(): - mapped_variant = variant_map[hgvs] - if consequence: - mapped_variant.vep_functional_consequence = consequence - mapped_variant.vep_access_date = date.today() - db.add(mapped_variant) - else: - logger.warning(f"Could not retrieve functional consequence for HGVS {hgvs}.") - db.commit() - - except Exception as e: - logger.error( - f"Failed to populate functional consequence predictions for score set {score_set.urn}: {str(e)}" - ) - db.rollback() - - logger.info("Done populating functional consequence predictions.") - - -if __name__ == "__main__": - populate_functional_consequences() diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py index c7be2162f..c82965557 100644 --- a/src/mavedb/server_main.py +++ b/src/mavedb/server_main.py @@ -45,10 +45,12 @@ experiment_sets, experiments, hgvs, + job_runs, licenses, mapped_variant, orcid, permissions, + pipelines, publication_identifiers, raw_read_identifiers, refget, @@ -94,11 +96,13 @@ app.include_router(experiment_sets.router) app.include_router(experiments.router) app.include_router(hgvs.router) +app.include_router(job_runs.router) app.include_router(licenses.router) # app.include_router(log.router) app.include_router(mapped_variant.router) app.include_router(orcid.router) app.include_router(permissions.router) +app.include_router(pipelines.router) app.include_router(publication_identifiers.router) app.include_router(raw_read_identifiers.router) app.include_router(refget.router) diff --git a/src/mavedb/view_models/job_run.py b/src/mavedb/view_models/job_run.py new file mode 100644 index 000000000..a287f2725 --- /dev/null +++ b/src/mavedb/view_models/job_run.py @@ -0,0 +1,56 @@ +from datetime import datetime +from typing import Any, Optional + +from pydantic import Field + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.view_models.base.base import BaseModel + + +class JobRunBase(BaseModel): + """Base view model for job runs.""" + + urn: Optional[str] = None + job_type: str + job_function: str + status: JobStatus + correlation_id: Optional[str] = None + pipeline_id: Optional[int] = None + failure_category: Optional[str] = None + error_message: Optional[str] = None + mavedb_version: Optional[str] = None + + +class SavedJobRun(JobRunBase): + """View model for a saved job run record.""" + + id: int + job_params: Optional[dict[str, Any]] = None + # Read from the ORM's `metadata_` attribute (field name). Serialize under JSON key + # `metadata` for operator readability. We cannot use `alias="metadata"` because the + # SQLAlchemy Base exposes a class-level `metadata` attribute (MetaData) that would + # otherwise shadow the mapped column when Pydantic reads attributes. + metadata_: dict[str, Any] = Field(default_factory=dict, serialization_alias="metadata") + + max_retries: int + retry_count: int + retry_delay_seconds: Optional[int] = None + + scheduled_at: datetime + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + created_at: datetime + + progress_current: Optional[int] = None + progress_total: Optional[int] = None + progress_message: Optional[str] = None + + class Config: + from_attributes = True + populate_by_name = True + + +class JobRunDetail(SavedJobRun): + """Single-job-run detail response including the error traceback.""" + + error_traceback: Optional[str] = None diff --git a/src/mavedb/view_models/pipeline.py b/src/mavedb/view_models/pipeline.py new file mode 100644 index 000000000..f637f42a5 --- /dev/null +++ b/src/mavedb/view_models/pipeline.py @@ -0,0 +1,57 @@ +from datetime import datetime +from typing import Any, Optional + +from pydantic import Field + +from mavedb.models.enums.job_pipeline import PipelineStatus +from mavedb.view_models.base.base import BaseModel + + +class PipelineBase(BaseModel): + """Base view model for pipelines.""" + + urn: Optional[str] = None + name: str + description: Optional[str] = None + status: PipelineStatus + correlation_id: Optional[str] = None + mavedb_version: Optional[str] = None + + +class SavedPipeline(PipelineBase): + """View model for a saved pipeline record.""" + + id: int + # Read from the ORM's `metadata_` attribute (field name). Serialize under JSON key + # `metadata` for operator readability. We cannot use `alias="metadata"` because the + # SQLAlchemy Base exposes a class-level `metadata` attribute (MetaData) that would + # otherwise shadow the mapped column when Pydantic reads attributes. + metadata_: dict[str, Any] = Field(default_factory=dict, serialization_alias="metadata") + created_at: datetime + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + created_by_user_id: Optional[int] = None + + class Config: + from_attributes = True + populate_by_name = True + + +class PipelineProgress(BaseModel): + """Pipeline progress statistics returned by PipelineManager.get_pipeline_progress().""" + + total_jobs: int + completed_jobs: int + successful_jobs: int + failed_jobs: int + running_jobs: int + pending_jobs: int + completion_percentage: float + duration: int + status_counts: dict[str, int] + + +class PipelineDetail(SavedPipeline): + """Single-pipeline detail response including progress statistics.""" + + progress: PipelineProgress diff --git a/src/mavedb/worker/README.md b/src/mavedb/worker/README.md new file mode 100644 index 000000000..34c66ed0f --- /dev/null +++ b/src/mavedb/worker/README.md @@ -0,0 +1,120 @@ +# ARQ Worker System + +The worker is a separate process from the FastAPI API server, connected via Redis (ARQ). It processes background jobs for variant creation, genomic mapping, external service annotation, and system maintenance. + +## Quick Start: "I want to..." + +| Goal | Start Here | +|------|-----------| +| Understand the whole system | [Job System Overview](jobs_overview.md) | +| Add a new job to an existing pipeline | [Job Registry — Adding a Pipeline Job](job_registry.md#adding-a-pipeline-job) | +| Add a standalone or cron job | [Job Registry — Adding a Standalone/Cron Job](job_registry.md#adding-a-standalonecron-job) | +| Define a new pipeline | [Pipeline Management — Defining a New Pipeline](pipeline_management.md#defining-a-new-pipeline) | +| Understand how decorators work | [Job Decorators](job_decorators.md) | +| Understand how managers work | [Job Managers](job_managers.md) | +| Learn coding patterns and conventions | [Best Practices & Patterns](best_practices.md) | + +## Architecture Overview + +``` +┌───────────┐ enqueue ┌───────┐ dequeue ┌────────────────────────┐ +│ Router │ ──────────► │ Redis │ ──────────► │ ARQ Worker │ +│ (FastAPI) │ │ (ARQ) │ │ │ +└───────────┘ └───────┘ │ ┌──────────────────┐ │ + │ │ │ Decorators │ │ + │ PipelineFactory │ │ (lifecycle) │ │ + │ creates Pipeline, │ └────────┬─────────┘ │ + │ JobRun, and │ │ │ + │ JobDependency │ ┌────────▼─────────┐ │ + │ records in DB │ │ Job Function │ │ + │ │ │ (business) │ │ + └──► PostgreSQL ◄───────────────────────────────│ └────────┬─────────┘ │ + │ │ │ + │ ┌──────▼──────┐ │ + │ │ PostgreSQL │ │ + │ │ (state) │ │ + │ └─────────────┘ │ + └────────────────────────┘ +``` + +The system has **two layers**: + +1. **Infrastructure layer** (`lib/decorators/`, `lib/managers/`): Handles job lifecycle, state persistence, error recovery, pipeline coordination. Developers rarely modify this. +2. **Business layer** (`jobs/`): Implements domain logic. This is where most new code goes. + +Two types of work: +- **Pipeline jobs**: Multi-step workflows with dependency management (e.g., create → map → annotate variants). Orchestrated by `PipelineManager`. +- **Standalone jobs**: Independent tasks or cron-scheduled maintenance (e.g., cleanup stalled jobs, refresh materialized views). + +## Directory Structure + +``` +worker/ +├── README.md # This file +├── jobs_overview.md # System architecture and end-to-end flows +├── job_decorators.md # Decorator usage and internals +├── job_managers.md # Manager classes and their APIs +├── pipeline_management.md # Pipeline lifecycle and coordination +├── job_registry.md # Registration and step-by-step how-to guides +├── best_practices.md # Coding patterns and conventions +│ +├── jobs/ # ── Business Layer ── +│ ├── registry.py # Central registry of all job functions +│ ├── variant_processing/ # Variant creation and mapping jobs +│ │ ├── creation.py # create_variants_for_score_set +│ │ └── mapping.py # map_variants_for_score_set +│ ├── external_services/ # Integration with external APIs +│ │ ├── clingen.py # CAR and LDH submission +│ │ ├── clinvar.py # ClinVar control refresh +│ │ ├── gnomad.py # gnomAD variant linking +│ │ ├── hgvs.py # HGVS annotation +│ │ ├── uniprot.py # UniProt mapping submission/polling +│ │ └── variant_translation.py # Variant translation population +│ ├── data_management/ # Database maintenance jobs +│ │ └── views.py # Materialized view refresh +│ ├── pipeline_management/ # Pipeline orchestration jobs +│ │ └── start_pipeline.py # Pipeline entrypoint job +│ ├── system/ # System maintenance jobs +│ │ └── cleanup.py # Stalled job cleanup (cron) +│ └── utils/ # Shared job utilities +│ ├── setup.py # validate_job_params() +│ └── constants.py # Job-level constants +│ +├── lib/ # ── Infrastructure Layer ── +│ ├── decorators/ # Job/pipeline lifecycle decorators +│ │ ├── job_management.py # @with_job_management +│ │ ├── pipeline_management.py # @with_pipeline_management +│ │ ├── job_guarantee.py # @with_guaranteed_job_run_record +│ │ └── utils.py # Session management, test mode detection +│ └── managers/ # State management classes +│ ├── base_manager.py # BaseManager (DB + Redis init) +│ ├── job_manager.py # JobManager (individual job lifecycle) +│ ├── pipeline_manager.py # PipelineManager (pipeline coordination) +│ ├── constants.py # Status grouping constants +│ ├── exceptions.py # Exception hierarchy +│ ├── types.py # TypedDicts (RetryHistoryEntry, PipelineProgress) +│ └── utils.py # Dependency checking helpers, classify_exception() +│ +└── settings/ # ARQ worker configuration + ├── worker.py # ArqWorkerSettings class + ├── lifecycle.py # Startup/shutdown/job hooks, standalone_ctx() + ├── redis.py # Redis connection settings + └── constants.py # Environment variable handling +``` + +## Related Files Outside This Directory + +| File | Purpose | +|------|---------| +| `src/mavedb/lib/workflow/definitions.py` | `PIPELINE_DEFINITIONS` — declarative pipeline and job definitions | +| `src/mavedb/lib/workflow/pipeline_factory.py` | `PipelineFactory` — creates Pipeline + JobRun + JobDependency records | +| `src/mavedb/lib/workflow/job_factory.py` | `JobFactory` — creates individual JobRun records | +| `src/mavedb/lib/types/workflow.py` | `JobExecutionOutcome`, `JobDefinition`, `PipelineDefinition` types | +| `src/mavedb/models/pipeline.py` | `Pipeline` ORM model | +| `src/mavedb/models/job_run.py` | `JobRun` ORM model | +| `src/mavedb/models/job_dependency.py` | `JobDependency` ORM model | +| `src/mavedb/models/enums/job_pipeline.py` | `JobStatus`, `PipelineStatus`, `DependencyType`, `FailureCategory`, `JobType` enums | +| `src/mavedb/routers/score_sets.py` | Primary router that triggers the `validate_map_annotate_score_set` pipeline | +| `src/mavedb/scripts/run_pipeline.py` | CLI script for running pipelines outside the API | +| `src/mavedb/scripts/run_job.py` | CLI script for running standalone jobs outside the API | +| `tests/worker/` | Test suite mirroring this directory structure | diff --git a/src/mavedb/worker/best_practices.md b/src/mavedb/worker/best_practices.md new file mode 100644 index 000000000..e83eb42f1 --- /dev/null +++ b/src/mavedb/worker/best_practices.md @@ -0,0 +1,347 @@ +# Best Practices & Patterns + +Concrete patterns to follow when writing job code. Every example comes from or is modeled on the existing codebase. + +## Job Function Structure + +Every job function follows this template: + +```python +@with_pipeline_management +async def my_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + # 1. Get the job record and validate params + job = job_manager.get_job() + validate_job_params(["score_set_id", "correlation_id"], job) + + # 2. Extract params and set up logging context + score_set_id = job.job_params["score_set_id"] + correlation_id = job.job_params["correlation_id"] + + job_manager.save_to_context({ + "application": "mavedb-worker", + "function": "my_job", + "resource": score_set_id, + "correlation_id": correlation_id, + }) + + # 3. Initialize progress + job_manager.update_progress(0, 100, "Starting my job.") + logger.info("Starting my job", extra=job_manager.logging_context()) + + # 4. Load domain objects and do work + score_set = job_manager.db.scalars( + select(ScoreSet).where(ScoreSet.id == score_set_id) + ).one() + + # ... business logic ... + + # 5. Final progress update and return + job_manager.update_progress(100, 100, "My job complete.") + return JobExecutionOutcome.succeeded(data={"items_processed": count}) +``` + +## Parameter Validation + +Always validate required parameters at the top of the job function, before accessing them: + +```python +job = job_manager.get_job() +_job_required_params = ["score_set_id", "correlation_id", "updater_id"] +validate_job_params(_job_required_params, job) +``` + +`validate_job_params()` (from `jobs/utils/setup.py`) raises a `KeyError` if any required param is missing from `job.job_params`. This turns into an ERRORED status via the decorator. + +**Do not access `job.job_params[key]` without validation first** — a missing key would raise an uncontrolled `KeyError` without a helpful message. + +## Return Values + +Use `JobExecutionOutcome` factory methods to communicate results: + +### Succeeded — job completed normally +```python +return JobExecutionOutcome.succeeded(data={"variants_created": count}) +``` + +### Failed — a business-logic failure (not a bug) +```python +# Missing data, validation failure, precondition not met +if not mapped_variants: + return JobExecutionOutcome.failed( + reason="No mapped variants found for score set", + data={"score_set_id": score_set_id} + ) +``` + +The decorator marks the job as FAILED. Depending on the pipeline's dependency configuration, downstream jobs may still run (if using `SUCCESS_OR_FAILURE_REQUIRED`) or be cancelled. + +### Skipped — job intentionally not executed +```python +# Feature is disabled, already completed, nothing to do +if not settings.LDH_ENABLED: + return JobExecutionOutcome.skipped(data={"reason": "LDH submissions disabled"}) +``` + +The decorator marks the job as SKIPPED. In pipelines, SKIPPED counts as a completed state for dependency resolution — downstream jobs whose dependency on this job is `SUCCESS_REQUIRED` will NOT be blocked. + +### Errored — never return this from job code +Unhandled exceptions are caught by the decorator and automatically create an `.errored()` outcome. Do not return `JobExecutionOutcome.errored()` from job functions. + +## Progress Tracking + +`update_progress()` commits the session as a checkpoint. This is intentional — it persists progress even if the job fails later. + +### Simple progress (known total) +```python +job_manager.update_progress(0, total_records, "Starting variant creation") + +for i, record in enumerate(records): + process_record(record) + job_manager.update_progress(i + 1, total_records, f"Processed {i + 1}/{total_records} records") +``` + +### Incremental progress (using convenience methods) +```python +job_manager.set_progress_total(total_records, "Starting variant creation") + +for record in records: + process_record(record) + job_manager.increment_progress() +``` + +### Stage-based progress (multiple phases) +```python +job_manager.update_progress(0, 100, "Loading score set data.") +# ... loading phase ... +job_manager.update_progress(25, 100, "Validating variants.") +# ... validation phase ... +job_manager.update_progress(50, 100, "Writing to database.") +# ... write phase ... +job_manager.update_progress(100, 100, "Variant creation complete.") +``` + +## Logging Context + +Always set up logging context early in the job function: + +```python +job_manager.save_to_context({ + "application": "mavedb-worker", + "function": "my_job_name", + "resource": score_set.urn, + "correlation_id": correlation_id, +}) +``` + +Then use `job_manager.logging_context()` with every log call: + +```python +logger.info("Processing variants", extra=job_manager.logging_context()) +logger.warning("Missing expected data", extra=job_manager.logging_context()) +``` + +This provides structured, correlated logs across the full request lifecycle (API request → pipeline creation → multiple job executions). + +## External Service Integration Pattern + +Jobs that submit to external services follow a consistent pattern: + +```python +@with_pipeline_management +async def submit_to_external_service(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + job = job_manager.get_job() + validate_job_params(["score_set_id", "correlation_id"], job) + + # 1. Check if the service is enabled + if not settings.SERVICE_ENABLED: + return JobExecutionOutcome.skipped(data={"reason": "Service submissions disabled"}) + + # 2. Load required data + score_set = job_manager.db.scalars( + select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"]) + ).one() + + # 3. Check preconditions + if not score_set.mapped_variants: + return JobExecutionOutcome.failed(reason="No mapped variants to submit") + + # 4. Submit to the service (let exceptions propagate for service errors) + result = await external_client.submit(score_set) + + # 5. Return outcome + return JobExecutionOutcome.succeeded(data={"submission_id": result.id}) +``` + +Key points: +- Return `skipped()` if the service is disabled — don't raise an exception +- Return `failed()` if preconditions aren't met — this is a business failure, not a bug +- Let connection errors and timeouts propagate as exceptions — the decorator handles them (ERRORED status, Slack alert, retry logic) + +## Database Access + +### Use `job_manager.db` for the session +```python +db = job_manager.db # This is the task-local SQLAlchemy Session + +score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() +``` + +### Commit discipline + +Direct `db.commit()` calls from job code are only permitted in the bare-`raise` error paths of `creation.py` and `mapping.py`, where score set state (`processing_state`, `mapping_state`, `processing_errors`) must survive the decorator's rollback-on-exception. Everywhere else the decorator owns the commit decision. + +> **These exceptions are temporary.** Once score set processing and mapping state is derived from job run records rather than stored directly on the score set model, the pre-raise commits in `creation.py` and `mapping.py` will no longer be necessary and should be removed. + +`update_progress()` (and `update_status_message()`, `increment_progress()`, `set_progress_total()`) commit by default. This is intentional — they act as explicit checkpoints that persist progress even if the job fails or is retried later. Each call commits *all* pending session state at that point, not just the progress fields, so call them only at safe transaction boundaries. + +If you need database IDs before a checkpoint (e.g., after creating records), use `db.flush()`: +```python +new_record = MyModel(name="example") +db.add(new_record) +db.flush() # new_record.id is now available, but not yet committed +``` + +### Flush immediately before every return + +Every `return JobExecutionOutcome.*` **must** be preceded by `job_manager.db.flush()`: + +```python +job_manager.db.flush() +return JobExecutionOutcome.succeeded(data={...}) +``` + +**Why this matters:** + +In production the decorator always commits after the job function returns, which triggers an autoflush — so a missing explicit flush is invisible. In tests, job functions are called directly (the decorator is a no-op), so only an explicit flush ensures pending ORM state is staged to the DB before the test's `session.refresh()` call reads it back. + +Without this flush, tests that use `session.refresh(obj)` to verify persistence would silently pass by reading stale in-memory state rather than catching a missing `db.add()` or `flag_modified()` call. + +This flush is a no-op at the statement level (it costs nothing if no state is pending), but it makes the job's contract with the session explicit and testable: *"by the time I return, all DB state I care about is staged."* The decorator then decides whether to commit or rollback based on the outcome. + +### Bulk operations +For performance-critical operations (e.g., variant creation), use bulk inserts: +```python +db.execute(insert(Variant), variant_dicts) +db.flush() +``` + +## Score Set Processing State Management + +Jobs that process score sets update the score set's `processing_state` and `mapping_state` fields via dedicated methods in `JobManager`: + +```python +# Managed by the infrastructure — don't set these directly from job code. +# The decorator/manager handles score set state transitions based on +# the job type and outcome. +``` + +**Exception**: Some jobs currently manage score set state directly. This is legacy behavior being refactored. New jobs should rely on the infrastructure-layer state management where possible. + +## Idempotency Contract + +**All job functions must be safe to retry from scratch.** The worker infrastructure retries jobs that fail with transient errors (network timeouts, DB disconnects) and recovers stalled jobs via the cleanup cron. A retried job re-executes the entire function — there is no checkpointing or partial-resume mechanism. + +This means a job that partially completes, crashes, and gets retried must not produce duplicate side effects. In practice: + +- **Database writes** are generally safe — if the crash happens before commit, the transaction rolls back and retry starts clean. +- **External API submissions** (CAR, LDH, UniProt, ClinGen) must tolerate duplicate calls. Currently our external targets handle this gracefully (idempotent endpoints or deduplication on their side), but this is an implicit assumption, not an enforced guarantee. +- **Cache writes** are inherently idempotent. + +When writing a new job that calls an external service, verify that the target handles duplicate submissions. If it doesn't, guard against re-submission by checking for prior results before calling: + +```python +# Check if we already submitted successfully in a prior attempt +existing = db.scalars( + select(Submission).where( + Submission.score_set_id == score_set_id, + Submission.status == "accepted", + ) +).first() + +if existing: + return JobExecutionOutcome.succeeded(data={"submission_id": existing.external_id}) + +# No prior submission — proceed +result = await external_client.submit(score_set) +``` + +## Common Pitfalls + +### Don't call lifecycle methods from job code +```python +# WRONG — the decorator handles these +job_manager.start_job() +job_manager.succeed_job(outcome) + +# RIGHT — just return the outcome +return JobExecutionOutcome.succeeded() +``` + +### Don't construct JobExecutionOutcome directly +```python +# WRONG +return JobExecutionOutcome(status="succeeded", data={}) + +# RIGHT +return JobExecutionOutcome.succeeded(data={}) +``` + +### Don't catch exceptions just to re-raise or log +```python +# WRONG — the decorator already handles this +try: + result = await external_service.call() +except Exception as e: + logger.error(f"Failed: {e}") + raise + +# RIGHT — let it propagate +result = await external_service.call() +``` + +The decorator catches unhandled exceptions, logs them with full context, sends Slack alerts, and marks the job as ERRORED. + +### Don't forget to export new job functions +New job functions must be: +1. Exported from their category's `__init__.py` +2. Added to `BACKGROUND_FUNCTIONS` in `registry.py` +3. Added to a pipeline definition in `definitions.py` (if a pipeline job) + +Missing any of these will cause the job to either not be discoverable by ARQ or not be included in a pipeline. + +### Don't pass `job_manager` when enqueueing +```python +# WRONG — ARQ can't serialize a JobManager +await redis.enqueue_job("my_job", job_id, job_manager=manager) + +# RIGHT — decorator injects job_manager +await redis.enqueue_job("my_job", job_id) +``` + +## Testing Patterns + +### Test mode bypasses decorators +When `MAVEDB_TEST_MODE=1` (set by the test fixtures), all decorators become no-ops. Tests call job functions directly, passing a pre-built `JobManager`: + +```python +manager = JobManager(session, mock_worker_ctx["redis"], sample_job_run.id) +result = await create_variants_for_score_set(mock_worker_ctx, sample_job_run.id, manager) +assert result.status == "succeeded" +``` + +### Mock only at system boundaries +- Mock external services (ClinGen, DCD Mapping, etc.) +- Mock Redis/ARQ enqueue calls +- Mock Slack notifications +- **Do NOT mock** `update_progress`, `validate_job_params`, or other internal helpers + +### Use fixtures for job setup +The test `conftest.py` provides fixtures for creating `JobRun` records with the right params structure. Use these rather than constructing records manually. + +For complete testing guidelines, see `.github/instructions/testing.instructions.md`. + +## See Also + +- [Job Registry](job_registry.md) — Step-by-step guides for adding new jobs +- [Job Decorators](job_decorators.md) — How the decorator layer works +- [Job Managers](job_managers.md) — Manager APIs and commit discipline diff --git a/src/mavedb/worker/job_decorators.md b/src/mavedb/worker/job_decorators.md new file mode 100644 index 000000000..eafe3fc02 --- /dev/null +++ b/src/mavedb/worker/job_decorators.md @@ -0,0 +1,182 @@ +# Job Decorators + +Decorators are the bridge between the infrastructure layer and business layer. They wrap job functions to provide lifecycle management, error handling, state persistence, and pipeline coordination — so job functions can focus purely on business logic. + +## Available Decorators + +### `@with_pipeline_management` — The Default Choice + +**Use for**: Any job that belongs to (or may belong to) a pipeline. This is the most commonly used decorator. + +```python +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management + +@with_pipeline_management +async def my_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + # Business logic here + return JobExecutionOutcome.succeeded(data={...}) +``` + +**What it does** (in order): +1. Creates a task-local DB session via `ensure_session_ctx()` +2. Checks test mode — if `MAVEDB_TEST_MODE=1`, skips all decorator logic and calls the function directly +3. Loads the job's `pipeline_id` from the `JobRun` record +4. If the pipeline exists and is in `CREATED` state, starts it (status → `RUNNING`) without coordinating yet +5. Wraps the function with `@with_job_management` (see below) and executes it +6. After the job completes (success or failure): calls `PipelineManager.coordinate_pipeline()` +7. On unhandled exceptions: rolls back, attempts final coordination, sends Slack alert, swallows exception so ARQ finishes cleanly + +**If the job has no pipeline** (pipeline_id is null): the decorator skips all pipeline coordination and only applies job management. This makes it safe to use on jobs that might or might not be part of a pipeline. + +### `@with_job_management` — Job Lifecycle Only + +**Use for**: Standalone jobs that will never be part of a pipeline. Usually stacked under `@with_guaranteed_job_run_record`. + +```python +from mavedb.worker.lib.decorators.job_management import with_job_management + +@with_job_management +async def my_standalone_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + # Business logic here + return JobExecutionOutcome.succeeded(data={...}) +``` + +**What it does** (in order): +1. Creates a task-local DB session via `ensure_session_ctx()` +2. Checks test mode — if `MAVEDB_TEST_MODE=1`, calls the function directly +3. Extracts `db`, `redis`, and `job_id` from context/args +4. Creates a `JobManager` instance +5. Checks if the job is already in a terminal state (race condition protection — e.g., a sibling job cancelled this one before ARQ picked it up). If so, returns `SKIPPED`. +6. Marks job as `RUNNING` and commits +7. Injects `job_manager` into kwargs and calls the function +8. Based on the returned `JobExecutionOutcome.status`: + - `SUCCEEDED` → `job_manager.succeed_job()` + commit + - `FAILED` → `job_manager.fail_job()` + Slack alert + commit + - `ERRORED` → `job_manager.error_job()` + Slack alert + commit + - `SKIPPED` → `job_manager.skip_job()` + commit +9. If job didn't succeed: checks `should_retry()` and prepares retry if eligible +10. On unhandled exceptions: rolls back, marks job as `ERRORED`, checks retry, sends Slack alert, swallows exception + +### `@with_guaranteed_job_run_record(job_type)` — Auto-Create JobRun + +**Use for**: Cron jobs or standalone jobs where no `JobRun` record exists before execution (because no `PipelineFactory` or script pre-created one). + +```python +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management + +@with_guaranteed_job_run_record("cron_job") +@with_job_management +async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + # Business logic here + return JobExecutionOutcome.succeeded(data={...}) +``` + +**What it does**: +1. Checks test mode — if `MAVEDB_TEST_MODE=1`, calls the function directly +2. If `job_id` is already present (pre-created by a script): validates it exists and passes through +3. Otherwise: creates a new `JobRun` record with the given `job_type` and the function name, commits, and inserts the `job_id` into the function's args +4. Calls the wrapped function (which should be `@with_job_management`) + +## Stacking Rules + +| Pattern | When | +|---------|------| +| `@with_pipeline_management` alone | Pipeline jobs (most common) | +| `@with_guaranteed_job_run_record` + `@with_job_management` | Standalone/cron jobs needing auto-created JobRun | +| `@with_job_management` alone | Standalone jobs with pre-created JobRun | + +**Never** stack `@with_guaranteed_job_run_record` with `@with_pipeline_management`. Pipeline jobs get their `JobRun` records created by `PipelineFactory`, not by the guarantee decorator. + +**Never** stack `@with_job_management` on top of `@with_pipeline_management`. The pipeline decorator wraps job management internally. + +## Session Management Internals + +The `ensure_session_ctx()` context manager (in `lib/decorators/utils.py`) solves a critical concurrency problem: + +**Problem**: ARQ runs multiple jobs concurrently as asyncio tasks. If all tasks share the same `ctx["db"]` session, one task closing or rolling back the session can corrupt another task's database operations. + +**Solution**: A `ContextVar` named `_task_db_session` provides task-local storage: + +```python +@contextmanager +def ensure_session_ctx(ctx): + existing = _task_db_session.get() + if existing is not None: + # Re-entrant: update ctx["db"] to this task's session + ctx["db"] = existing + yield existing + else: + # First entry: create a new session for this task + with db_session() as session: + _task_db_session.set(session) + ctx["db"] = session + try: + yield session + finally: + _task_db_session.set(None) +``` + +This means: +- Each concurrent ARQ job gets its own database session +- Nested decorators (`with_pipeline_management` → `with_job_management`) share the same session via the ContextVar +- The session is cleaned up when the outermost decorator exits + +## Test Mode Bypass + +All decorators check `is_test_mode()` (which reads `MAVEDB_TEST_MODE` env var) and become **no-ops** when it's set to `"1"`. This is critical for testing because: + +1. Decorators are applied at **import time** — they can't be easily mocked or patched +2. Tests need to control the `JobManager` instance (e.g., use a test DB session) rather than having the decorator create one +3. Tests need deterministic behavior without Redis, task-local sessions, or automatic commits + +In tests, job functions are called directly: + +```python +# Test code +job_manager = JobManager(session, mock_redis, sample_job_run.id) +result = await create_variants_for_score_set( + mock_worker_ctx, + sample_job_run.id, + job_manager, # Passed directly, not injected by decorator +) +assert result.status == JobStatus.SUCCEEDED +``` + +The `MAVEDB_TEST_MODE=1` environment variable is set in the test `conftest.py`. The `patch_db_session_ctxmgr` fixture further patches session management for integration tests. + +## Error Handling Flow + +When a job raises an unhandled exception, the decorator chain handles it: + +``` +Job function raises Exception + │ + ├─► @with_job_management catches it + │ ├─ Rolls back DB session + │ ├─ Creates JobExecutionOutcome.errored(exception=e) + │ ├─ Calls job_manager.error_job(result) + │ ├─ Commits error state + │ ├─ Checks should_retry() + │ │ ├─ If retryable: prepare_retry() → commit → return result (don't re-raise) + │ │ └─ If not: just return result + │ ├─ Sends Slack alert + │ └─ Returns result (swallows exception) + │ + ├─► @with_pipeline_management receives the result + │ ├─ Calls PipelineManager.coordinate_pipeline() + │ │ ├─ transition_pipeline_status() → likely FAILED or still RUNNING (if retry pending) + │ │ ├─ If FAILED: cancel_remaining_jobs() + │ │ └─ If RUNNING: enqueue_ready_jobs() (may pick up retried job) + │ └─ Commits coordination changes + │ + └─► ARQ receives a clean return value (no exception propagation) +``` + +Exceptions are **swallowed** after alerting. This prevents ARQ from marking the job with its own error handling, since we manage job state ourselves via `JobManager`. + +## See Also + +- [Job Managers](job_managers.md) — What `JobManager` and `PipelineManager` do +- [Pipeline Management](pipeline_management.md) — How coordination works +- [Best Practices](best_practices.md) — Return value patterns, when to let exceptions propagate diff --git a/src/mavedb/worker/job_managers.md b/src/mavedb/worker/job_managers.md new file mode 100644 index 000000000..f42659789 --- /dev/null +++ b/src/mavedb/worker/job_managers.md @@ -0,0 +1,153 @@ +# Job Managers + +Managers handle state transitions and coordination. There are two managers, each with a distinct role: + +- **`JobManager`** — Manages individual job lifecycle (start, progress, complete, retry). Used by both decorators and job code. +- **`PipelineManager`** — Coordinates pipeline execution (dependency resolution, job enqueueing, status transitions). Used primarily by decorators. + +Both inherit from `BaseManager`, which provides a common `db` (SQLAlchemy session) and `redis` (ARQ client) interface. + +## JobManager — Individual Job Lifecycle + +### Who uses it + +| Context | How it's used | +|---------|---------------| +| **Job code** | Call `update_progress()`, `save_to_context()`, `logging_context()`, access `db` and `get_job()` | +| **`@with_job_management` decorator** | Call `start_job()`, `succeed_job()`, `fail_job()`, `error_job()`, `should_retry()`, `prepare_retry()` | +| **`PipelineManager`** | Call `prepare_queue()`, `skip_job()`, `cancel_job()`, `reset_job()` | + +### Methods job code should use + +```python +# Get the JobRun ORM object (to read job_params, status, etc.) +job = job_manager.get_job() + +# Access the database session +score_set = job_manager.db.scalars(select(ScoreSet).where(...)).one() + +# Update progress (commits by default as a checkpoint) +job_manager.update_progress(current=50, total=100, message="Processing variants") +job_manager.update_progress(75, 100, "Annotating", commit=False) # Skip checkpoint + +# Update just the status message (commits by default) +job_manager.update_status_message("Connecting to ClinGen API...") + +# Add context for structured logging +job_manager.save_to_context({ + "score_set_id": score_set.id, + "correlation_id": correlation_id, + "function": "create_variants_for_score_set", +}) +logger.info("Started processing", extra=job_manager.logging_context()) +``` + +### Methods decorators/infrastructure use (not job code) + +| Method | What it does | +|--------|-------------| +| `start_job()` | Transitions QUEUED/PENDING → RUNNING, sets started_at timestamp | +| `complete_job(status, result)` | Transitions to terminal status, sets finished_at, records result | +| `succeed_job(result)` | Shortcut for `complete_job(SUCCEEDED, result)` | +| `fail_job(result)` | Shortcut for `complete_job(FAILED, result)` | +| `error_job(result)` | Shortcut for `complete_job(ERRORED, result)` | +| `cancel_job(result)` | Shortcut for `complete_job(CANCELLED, result)` | +| `skip_job(result)` | Shortcut for `complete_job(SKIPPED, result)` | +| `should_retry()` | Checks retry_count < max_retries AND failure_category is retryable | +| `prepare_retry(reason)` | Resets job to PENDING, increments retry_count, records retry history | +| `prepare_queue()` | Transitions PENDING → QUEUED before ARQ enqueueing | +| `reset_job()` | Resets all fields to initial state (for pipeline restart) | +| `get_job_status()` | Returns current `JobStatus` | +| `is_cancelled()` | Checks if job has been cancelled | + +### Commit discipline + +**JobManager methods do not commit.** They mutate the `JobRun` ORM object in memory. The **caller** (decorator or pipeline manager) is responsible for committing. + +**Exception**: `update_progress(commit=True)` (the default) commits immediately as a checkpoint. This is by design — it provides real-time progress visibility and creates safe transaction boundaries during long-running jobs. + +When `update_progress()` commits, it commits **all** pending session changes, not just the progress update. Call it only at safe transaction boundaries (e.g., after processing a batch of independent records). + +### Exception hierarchy + +``` +ManagerError +├── JobManagerError +│ ├── JobStateError # Cannot persist state changes (critical) +│ ├── JobTransitionError # Invalid state transition (e.g., start already-running job) +│ └── DatabaseConnectionError # Cannot fetch job from DB +└── PipelineManagerError + ├── PipelineStateError # Cannot persist pipeline state (critical) + ├── PipelineTransitionError # Invalid pipeline state transition + └── PipelineCoordinationError # Coordination failed (enqueueing, cancelling) +``` + +All exceptions are defined in `lib/managers/exceptions.py`. + +## PipelineManager — Pipeline Coordination + +### Who uses it + +| Context | How it's used | +|---------|---------------| +| **`@with_pipeline_management` decorator** | Calls `coordinate_pipeline()` after each job completes | +| **`start_pipeline` job** | Calls `coordinate_pipeline()` explicitly for initial coordination | +| **`cleanup_stalled_jobs`** | Uses it to check dependencies before re-enqueueing stalled pipeline jobs | +| **Scripts** | Manual pipeline operations (pause, cancel, restart) | + +### Key methods + +| Method | What it does | +|--------|-------------| +| `start_pipeline()` | Sets CREATED → RUNNING, optionally coordinates | +| `coordinate_pipeline()` | Main coordination loop: updates status, enqueues ready jobs or cancels remaining | +| `transition_pipeline_status()` | Analyzes job status distribution, determines pipeline status | +| `enqueue_ready_jobs()` | Finds PENDING jobs with met dependencies, marks QUEUED, enqueues in ARQ | +| `cancel_remaining_jobs(reason)` | Skips PENDING jobs, cancels QUEUED/RUNNING jobs | +| `cancel_pipeline(reason)` | Sets pipeline CANCELLED, coordinates cleanup | +| `pause_pipeline(reason)` | Sets PAUSED, stops new job enqueueing | +| `unpause_pipeline(reason)` | Sets RUNNING, resumes coordination | +| `restart_pipeline()` | Resets all jobs and pipeline, starts fresh | +| `can_enqueue_job(job)` | Checks if all dependencies for a job are met | +| `should_skip_job_due_to_dependencies(job)` | Checks if a job has unfulfillable dependencies | +| `get_pipeline_progress()` | Returns progress statistics dict | +| `get_job_counts_by_status()` | Returns dict of `JobStatus → count` | + +### Commit discipline + +PipelineManager methods generally **flush** (not commit) for status changes. The notable exception: + +**`enqueue_ready_jobs()` commits before the async Redis enqueue loop.** This is critical to prevent deadlocks: +- `flush()` holds PostgreSQL row-level locks +- The `await` in the enqueue loop yields control to the event loop +- A downstream job started by ARQ could attempt a synchronous UPDATE on the locked row +- Since psycopg2 is synchronous, that UPDATE would block the event loop entirely + +By committing before the loop, we release the locks and prevent this deadlock scenario. + +## Status Grouping Constants + +The `lib/managers/constants.py` module defines commonly-used status groupings: + +```python +STARTABLE_JOB_STATUSES = [QUEUED, PENDING] +TERMINAL_JOB_STATUSES = [SUCCEEDED, FAILED, ERRORED, CANCELLED, SKIPPED] +COMPLETED_JOB_STATUSES = [SUCCEEDED, FAILED, ERRORED] +ACTIVE_JOB_STATUSES = [PENDING, QUEUED, RUNNING] +RETRYABLE_JOB_STATUSES = [FAILED, ERRORED, CANCELLED, SKIPPED] +CANCELLED_JOB_STATUSES = [CANCELLED, SKIPPED, FAILED, ERRORED] + +TERMINAL_PIPELINE_STATUSES = [SUCCEEDED, FAILED, PARTIAL, CANCELLED] +RUNNING_PIPELINE_STATUSES = [RUNNING] +CANCELLED_PIPELINE_STATUSES = [CANCELLED, FAILED] + +RETRYABLE_FAILURE_CATEGORIES = (NETWORK_ERROR, TIMEOUT, SERVICE_UNAVAILABLE) +``` + +These are used throughout the managers and decorators for state validation and transition logic. Always use these constants rather than hardcoding status checks. + +## See Also + +- [Job Decorators](job_decorators.md) — How decorators call manager methods +- [Pipeline Management](pipeline_management.md) — Detailed coordination logic +- [Best Practices](best_practices.md) — How to use JobManager from job code diff --git a/src/mavedb/worker/job_registry.md b/src/mavedb/worker/job_registry.md new file mode 100644 index 000000000..952b4359d --- /dev/null +++ b/src/mavedb/worker/job_registry.md @@ -0,0 +1,254 @@ +# Job Registry and Configuration + +The registry (`jobs/registry.py`) is the central manifest of all worker jobs. ARQ uses it to discover available functions, cron schedules, and job metadata. + +## Registry Components + +### `BACKGROUND_FUNCTIONS` + +A flat list of all async job functions that ARQ can execute. Every job — whether pipeline, standalone, or cron — must be listed here. + +```python +BACKGROUND_FUNCTIONS: List[Callable] = [ + # Variant processing jobs + create_variants_for_score_set, + map_variants_for_score_set, + # External service jobs + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, + refresh_clinvar_controls, + # ... etc + # Pipeline management jobs + start_pipeline, + # System maintenance jobs + cleanup_stalled_jobs, +] +``` + +ARQ resolves functions by name — the `job_function` field on `JobRun` must match `func.__name__` for the function listed here. + +### `BACKGROUND_CRONJOBS` + +Cron-scheduled jobs with ARQ's `cron()` utility: + +```python +BACKGROUND_CRONJOBS: List[CronJob] = [ + cron( + refresh_materialized_views, + name="refresh_all_materialized_views", + hour=20, minute=0, + keep_result=timedelta(minutes=2).total_seconds(), + ), + cron( + cleanup_stalled_jobs, + name="cleanup_stalled_jobs_cron", + minute={15, 45}, # Every 30 minutes + keep_result=timedelta(minutes=25).total_seconds(), + ), +] +``` + +### `STANDALONE_JOB_DEFINITIONS` + +Metadata for jobs that can be invoked independently via operational scripts (`run_job.py`). Maps function references to `JobDefinition` dicts: + +```python +STANDALONE_JOB_DEFINITIONS: dict[Callable, JobDefinition] = { + create_variants_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "updater_id": None, ...}, + "function": "create_variants_for_score_set", + "key": "create_variants_for_score_set", + "type": JobType.VARIANT_CREATION, + }, + # ... +} +``` + +These are used by `src/mavedb/scripts/run_job.py` to create a `JobRun` with the correct params structure for running a single job outside of a pipeline. + +### `PIPELINE_DEFINITIONS` + +Located in `src/mavedb/lib/workflow/definitions.py` (not in the registry file). Defines multi-step pipeline workflows. See [Pipeline Management](pipeline_management.md#defining-a-new-pipeline) for details. + +## Adding a Pipeline Job + +Follow these steps to add a new job to an existing pipeline: + +### 1. Create the job function + +Create a new file or add to an existing file in the appropriate `jobs//` directory: + +```python +# src/mavedb/worker/jobs/external_services/my_new_service.py + +import logging +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.score_set import ScoreSet +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def submit_to_new_service(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Submit mapped variants to NewService for annotation.""" + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + score_set = job_manager.db.scalars( + select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"]) + ).one() + correlation_id = job.job_params["correlation_id"] + + job_manager.save_to_context({ + "application": "mavedb-worker", + "function": "submit_to_new_service", + "resource": score_set.urn, + "correlation_id": correlation_id, + }) + job_manager.update_progress(0, 100, "Starting NewService submission.") + logger.info("Started NewService submission", extra=job_manager.logging_context()) + + # ... business logic ... + + job_manager.update_progress(100, 100, "NewService submission complete.") + return JobExecutionOutcome.succeeded(data={"variants_submitted": count}) +``` + +### 2. Export from the category's `__init__.py` + +```python +# src/mavedb/worker/jobs/external_services/__init__.py +from mavedb.worker.jobs.external_services.my_new_service import submit_to_new_service +``` + +### 3. Register in `registry.py` + +Add the function to `BACKGROUND_FUNCTIONS`: + +```python +from mavedb.worker.jobs.external_services import submit_to_new_service + +BACKGROUND_FUNCTIONS: List[Callable] = [ + # ... existing entries ... + submit_to_new_service, +] +``` + +### 4. Add to pipeline definition + +In `src/mavedb/lib/workflow/definitions.py`, add a `JobDefinition` to the appropriate pipeline: + +```python +{ + "key": "submit_to_new_service", + "function": "submit_to_new_service", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, + "score_set_id": None, + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], +}, +``` + +### 5. Write tests + +Create `tests/worker/jobs/external_services/test_my_new_service.py` following the patterns in existing test files (e.g., `test_clingen.py`). + +## Adding a Standalone/Cron Job + +### 1. Create the job function + +```python +# src/mavedb/worker/jobs/system/my_maintenance.py + +import logging +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_guaranteed_job_run_record("system_maintenance") +@with_job_management +async def my_maintenance_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Periodic maintenance task.""" + db = job_manager.db + + # ... maintenance logic ... + + return JobExecutionOutcome.succeeded(data={"records_cleaned": count}) +``` + +### 2. Export and register + +Same as steps 2-3 for pipeline jobs. + +### 3. Add cron schedule (if applicable) + +```python +BACKGROUND_CRONJOBS: List[CronJob] = [ + # ... existing entries ... + cron( + my_maintenance_job, + name="my_maintenance_job_cron", + hour=4, minute=0, # Run daily at 4:00 AM + keep_result=timedelta(minutes=5).total_seconds(), + ), +] +``` + +### 4. Add to `STANDALONE_JOB_DEFINITIONS` (if needed) + +Only if the job should be invocable via `run_job.py` for manual execution: + +```python +STANDALONE_JOB_DEFINITIONS: dict[Callable, JobDefinition] = { + # ... existing entries ... + my_maintenance_job: { + "dependencies": [], + "params": {}, + "function": "my_maintenance_job", + "key": "my_maintenance_job", + "type": JobType.SYSTEM_MAINTENANCE, + }, +} +``` + +## Worker Settings + +The `ArqWorkerSettings` class (in `settings/worker.py`) brings everything together for ARQ: + +```python +class ArqWorkerSettings: + on_startup = startup # Create ProcessPoolExecutor + on_shutdown = shutdown + on_job_start = on_job_start # Initialize hdp (HGVS data provider), state dict + on_job_end = on_job_end + after_job_end = log_job # Canonical job logging + redis_settings = RedisWorkerSettings + functions = BACKGROUND_FUNCTIONS + cron_jobs = BACKGROUND_CRONJOBS + job_timeout = 5 * 60 * 60 # 5 hours +``` + +The lifecycle hooks (in `settings/lifecycle.py`) manage the ARQ context dict (`ctx`): +- `startup`: Creates `ProcessPoolExecutor` for CPU-intensive tasks +- `on_job_start`: Initializes `hdp` (CDOT REST data provider) and `state` dict +- `standalone_ctx()`: Creates an equivalent context for running jobs outside ARQ (used by scripts) + +## See Also + +- [Job System Overview](jobs_overview.md) — How everything fits together +- [Pipeline Management](pipeline_management.md) — Pipeline definitions and coordination +- [Best Practices](best_practices.md) — Patterns for writing job code diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py deleted file mode 100644 index 3a690d974..000000000 --- a/src/mavedb/worker/jobs.py +++ /dev/null @@ -1,1766 +0,0 @@ -import asyncio -import functools -import logging -from contextlib import asynccontextmanager -from datetime import date, timedelta -from typing import Any, Optional, Sequence - -import pandas as pd -from arq import ArqRedis -from arq.jobs import Job, JobStatus -from cdot.hgvs.dataproviders import RESTDataProvider -from sqlalchemy import cast, delete, null, select -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Session - -from mavedb.data_providers.services import vrs_mapper -from mavedb.db.view import refresh_all_mat_views -from mavedb.lib.clingen.constants import ( - CAR_SUBMISSION_ENDPOINT, - CLIN_GEN_SUBMISSION_ENABLED, - DEFAULT_LDH_SUBMISSION_BATCH_SIZE, - LDH_SUBMISSION_ENDPOINT, - LINKED_DATA_RETRY_THRESHOLD, -) -from mavedb.lib.clingen.content_constructors import construct_ldh_submission -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, - ClinGenLdhService, - clingen_allele_id_from_ldh_variation, - get_allele_registry_associations, - get_clingen_variation, -) -from mavedb.lib.exceptions import ( - LinkingEnqueueError, - MappingEnqueueError, - NonexistentMappingReferenceError, - NonexistentMappingResultsError, - SubmissionEnqueueError, - UniProtIDMappingEnqueueError, - UniProtPollingEnqueueError, -) -from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.mapping import ANNOTATION_LAYERS, extract_ids_from_post_mapped_metadata -from mavedb.lib.score_sets import ( - columns_for_dataset, - create_variants, - create_variants_data, -) -from mavedb.lib.slack import log_and_send_slack_message, send_slack_error, send_slack_message -from mavedb.lib.uniprot.constants import UNIPROT_ID_MAPPING_ENABLED -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession -from mavedb.lib.validation.dataframe.dataframe import ( - validate_and_standardize_dataframe_pair, -) -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.lib.variants import get_hgvs_from_post_mapped -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.published_variant import PublishedVariantsMV -from mavedb.models.score_set import ScoreSet -from mavedb.models.user import User -from mavedb.models.variant import Variant -from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata - -logger = logging.getLogger(__name__) - -MAPPING_QUEUE_NAME = "vrs_mapping_queue" -MAPPING_CURRENT_ID_NAME = "vrs_mapping_current_job_id" -BACKOFF_LIMIT = 5 -MAPPING_BACKOFF_IN_SECONDS = 15 -LINKING_BACKOFF_IN_SECONDS = 15 * 60 - - -#################################################################################################### -# Job utilities -#################################################################################################### - - -def setup_job_state( - ctx, invoker: Optional[int], resource: Optional[str], correlation_id: Optional[str] -) -> dict[str, Any]: - ctx["state"][ctx["job_id"]] = { - "application": "mavedb-worker", - "user": invoker, - "resource": resource, - "correlation_id": correlation_id, - } - return ctx["state"][ctx["job_id"]] - - -async def enqueue_job_with_backoff( - redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args -) -> tuple[Optional[str], bool, Any]: - new_job_id = None - limit_reached = attempt > BACKOFF_LIMIT - if not limit_reached: - limit_reached = True - backoff = backoff * (2**attempt) - attempt = attempt + 1 - - # NOTE: for jobs supporting backoff, `attempt` should be the final argument. - new_job = await redis.enqueue_job( - job_name, - *args, - attempt, - _defer_by=timedelta(seconds=backoff), - ) - - if new_job: - new_job_id = new_job.job_id - - return (new_job_id, not limit_reached, backoff) - - -#################################################################################################### -# Creating variants -#################################################################################################### - - -async def create_variants_for_score_set( - ctx, - correlation_id: str, - score_set_id: int, - updater_id: int, - scores: pd.DataFrame, - counts: pd.DataFrame, - score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, - count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, -): - """ - Create variants for a score set. Intended to be run within a worker. - On any raised exception, ensure ProcessingState of score set is set to `failed` prior - to exiting. - """ - logging_context = {} - try: - db: Session = ctx["db"] - hdp: RESTDataProvider = ctx["hdp"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logger.info(msg="Began processing of score set variants.", extra=logging_context) - - updated_by = db.scalars(select(User).where(User.id == updater_id)).one() - - score_set.modified_by = updated_by - score_set.processing_state = ProcessingState.processing - score_set.mapping_state = MappingState.pending_variant_processing - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - - db.add(score_set) - db.commit() - db.refresh(score_set) - - if not score_set.target_genes: - logger.warning( - msg="No targets are associated with this score set; could not create variants.", - extra=logging_context, - ) - raise ValueError("Can't create variants when score set has no targets.") - - validated_scores, validated_counts, validated_score_columns_metadata, validated_count_columns_metadata = ( - validate_and_standardize_dataframe_pair( - scores_df=scores, - counts_df=counts, - score_columns_metadata=score_columns_metadata, - count_columns_metadata=count_columns_metadata, - targets=score_set.target_genes, - hdp=hdp, - ) - ) - - score_set.dataset_columns = { - "score_columns": columns_for_dataset(validated_scores), - "count_columns": columns_for_dataset(validated_counts), - "score_columns_metadata": validated_score_columns_metadata - if validated_score_columns_metadata is not None - else {}, - "count_columns_metadata": validated_count_columns_metadata - if validated_count_columns_metadata is not None - else {}, - } - - # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. - if score_set.variants: - existing_variants = db.scalars(select(Variant.id).where(Variant.score_set_id == score_set.id)).all() - db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) - db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) - logging_context["deleted_variants"] = score_set.num_variants - score_set.num_variants = 0 - - logger.info(msg="Deleted existing variants from score set.", extra=logging_context) - - db.flush() - db.refresh(score_set) - - variants_data = create_variants_data(validated_scores, validated_counts, None) - create_variants(db, score_set, variants_data) - - # Validation errors arise from problematic user data. These should be inserted into the database so failures can - # be persisted to them. - except ValidationError as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} - score_set.mapping_state = MappingState.not_attempted - - if score_set.num_variants: - score_set.processing_errors["exception"] = ( - f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" - ) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered a validation error while processing variants.", extra=logging_context) - - return {"success": False} - - # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. - # Catch all non-system exiting exceptions. - except Exception as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": []} - score_set.mapping_state = MappingState.not_attempted - - if score_set.num_variants: - score_set.processing_errors["exception"] = ( - f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" - ) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered an internal exception while processing variants.", extra=logging_context) - - send_slack_error(err=e) - return {"success": False} - - # Catch all other exceptions. The exceptions caught here were intented to be system exiting. - except BaseException as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.mapping_state = MappingState.not_attempted - db.commit() - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.error( - msg="Encountered an unhandled exception while creating variants for score set.", extra=logging_context - ) - - # Don't raise BaseExceptions so we may emit canonical logs (TODO: Perhaps they are so problematic we want to raise them anyway). - return {"success": False} - - else: - score_set.processing_state = ProcessingState.success - score_set.processing_errors = null() - - logging_context["created_variants"] = score_set.num_variants - logging_context["processing_state"] = score_set.processing_state.name - logger.info(msg="Finished creating variants in score set.", extra=logging_context) - - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - await redis.enqueue_job("variant_mapper_manager", correlation_id, updater_id) - score_set.mapping_state = MappingState.queued - finally: - db.add(score_set) - db.commit() - db.refresh(score_set) - logger.info(msg="Committed new variants to score set.", extra=logging_context) - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True} - - -#################################################################################################### -# Mapping variants -#################################################################################################### - - -@asynccontextmanager -async def mapping_in_execution(redis: ArqRedis, job_id: str): - await redis.set(MAPPING_CURRENT_ID_NAME, job_id) - try: - yield - finally: - await redis.set(MAPPING_CURRENT_ID_NAME, "") - - -async def map_variants_for_score_set( - ctx: dict, correlation_id: str, score_set_id: int, updater_id: int, attempt: int = 1 -) -> dict: - async with mapping_in_execution(redis=ctx["redis"], job_id=ctx["job_id"]): - logging_context = {} - score_set = None - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logging_context["attempt"] = attempt - logger.info(msg="Started variant mapping", extra=logging_context) - - score_set.mapping_state = MappingState.processing - score_set.mapping_errors = null() - db.add(score_set) - db.commit() - - mapping_urn = score_set.urn - assert mapping_urn, "A valid URN is needed to map this score set." - - logging_context["current_mapping_resource"] = mapping_urn - logging_context["mapping_state"] = score_set.mapping_state - logger.debug(msg="Fetched score set metadata for mapping job.", extra=logging_context) - - # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. - vrs = vrs_mapper() - blocking = functools.partial(vrs.map_score_set, mapping_urn) - loop = asyncio.get_running_loop() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - db.rollback() - if score_set: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - - return {"success": False, "retried": False, "enqueued_jobs": []} - - mapping_results = None - try: - mapping_results = await loop.run_in_executor(ctx["pool"], blocking) - logger.debug(msg="Done mapping variants.", extra=logging_context) - - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an internal server error during mapping. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="Variant mapper encountered an unexpected error while mapping variants. This job will be retried.", - extra=logging_context, - ) - - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id - ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - db.add(score_set) - db.commit() - logger.info( - msg="After encountering an error while mapping variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - try: - if mapping_results: - mapped_scores = mapping_results.get("mapped_scores") - if not mapped_scores: - # if there are no mapped scores, the score set failed to map. - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} - else: - reference_metadata = mapping_results.get("reference_sequences") - if not reference_metadata: - raise NonexistentMappingReferenceError() - - for target_gene_identifier in reference_metadata: - target_gene = next( - ( - target_gene - for target_gene in score_set.target_genes - if target_gene.name == target_gene_identifier - ), - None, - ) - if not target_gene: - raise ValueError( - f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." - ) - # allow for multiple annotation layers - pre_mapped_metadata: dict[str, Any] = {} - post_mapped_metadata: dict[str, Any] = {} - excluded_pre_mapped_keys = {"sequence"} - - gene_info = reference_metadata[target_gene_identifier].get("gene_info") - if gene_info: - target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") - post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") - - for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: - layer_premapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("computed_reference_sequence") - if layer_premapped: - pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys - } - layer_postmapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("mapped_reference_sequence") - if layer_postmapped: - post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped - target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) - target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) - - total_variants = 0 - successful_mapped_variants = 0 - for mapped_score in mapped_scores: - total_variants += 1 - variant_urn = mapped_score.get("mavedb_id") - variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - - # there should only be one current mapped variant per variant id, so update old mapped variant to current = false - existing_mapped_variant = ( - db.query(MappedVariant) - .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) - .one_or_none() - ) - - if existing_mapped_variant: - existing_mapped_variant.current = False - db.add(existing_mapped_variant) - - if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): - successful_mapped_variants += 1 - - mapped_variant = MappedVariant( - pre_mapped=mapped_score.get("pre_mapped", null()), - post_mapped=mapped_score.get("post_mapped", null()), - variant_id=variant.id, - modification_date=date.today(), - mapped_date=mapping_results["mapped_date_utc"], - vrs_version=mapped_score.get("vrs_version", null()), - mapping_api_version=mapping_results["dcd_mapping_version"], - error_message=mapped_score.get("error_message", null()), - current=True, - ) - db.add(mapped_variant) - - if successful_mapped_variants == 0: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} - elif successful_mapped_variants < total_variants: - score_set.mapping_state = MappingState.incomplete - else: - score_set.mapping_state = MappingState.complete - - logging_context["mapped_variants_inserted_db"] = len(mapped_scores) - logging_context["variants_successfully_mapped"] = successful_mapped_variants - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["mapping_errors"] = score_set.mapping_errors - logger.info(msg="Inserted mapped variants into db.", extra=logging_context) - - else: - raise NonexistentMappingResultsError() - - db.add(score_set) - db.commit() - - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an unexpected error while parsing mapped variants. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="An unexpected error occurred during variant mapping. This job will be attempted again.", - extra=logging_context, - ) - - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id - ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - logger.info( - msg="After encountering an error while parsing mapped variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - db.add(score_set) - db.commit() - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - new_uniprot_job_id = None - try: - if UNIPROT_ID_MAPPING_ENABLED: - new_job = await redis.enqueue_job( - "submit_uniprot_mapping_jobs_for_score_set", - score_set.id, - correlation_id, - ) - - if new_job: - new_uniprot_job_id = new_job.job_id - - logging_context["submit_uniprot_mapping_job_id"] = new_uniprot_job_id - logger.info(msg="Queued a new UniProt mapping job.", extra=logging_context) - - else: - raise UniProtIDMappingEnqueueError() - else: - logger.warning( - msg="UniProt ID mapping is disabled, skipped submission of UniProt mapping jobs.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not enqueue UniProt mapping job for score set {score_set.urn}. UniProt mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant UniProt submission encountered an unexpected error while attempting to enqueue a mapping job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_uniprot_job_id] if job]} - - new_clingen_job_id = None - try: - if CLIN_GEN_SUBMISSION_ENABLED: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_car", - correlation_id, - score_set.id, - ) - - if new_job: - new_clingen_job_id = new_job.job_id - - logging_context["submit_clingen_variants_job_id"] = new_clingen_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) - - else: - raise SubmissionEnqueueError() - else: - logger.warning( - msg="ClinGen submission is disabled, skipped submission of mapped variants to CAR and LDH.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not submit mappings to CAR and/or LDH mappings for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, - ) - - return { - "success": False, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], - } - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return { - "success": True, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], - } - - -async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int, attempt: int = 1) -> dict: - logging_context = {} - mapping_job_id = None - mapping_job_status = None - queued_score_set = None - try: - redis: ArqRedis = ctx["redis"] - db: Session = ctx["db"] - - logging_context = setup_job_state(ctx, updater_id, None, correlation_id) - logging_context["attempt"] = attempt - logger.debug(msg="Variant mapping manager began execution", extra=logging_context) - - queue_length = await redis.llen(MAPPING_QUEUE_NAME) # type: ignore - queued_id = await redis.rpop(MAPPING_QUEUE_NAME) # type: ignore - logging_context["variant_mapping_queue_length"] = queue_length - - # Setup the job id cache if it does not already exist. - if not await redis.exists(MAPPING_CURRENT_ID_NAME): - await redis.set(MAPPING_CURRENT_ID_NAME, "") - - if not queued_id: - logger.debug(msg="No mapping jobs exist in the queue.", extra=logging_context) - return {"success": True, "enqueued_job": None} - else: - queued_id = queued_id.decode("utf-8") - queued_score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_id)).one() - - logging_context["upcoming_mapping_resource"] = queued_score_set.urn - logger.debug(msg="Found mapping job(s) still in queue.", extra=logging_context) - - mapping_job_id = await redis.get(MAPPING_CURRENT_ID_NAME) - if mapping_job_id: - mapping_job_id = mapping_job_id.decode("utf-8") - mapping_job_status = (await Job(job_id=mapping_job_id, redis=redis).status()).value - - logging_context["existing_mapping_job_status"] = mapping_job_status - logging_context["existing_mapping_job_id"] = mapping_job_id - - except Exception as e: - send_slack_error(e) - - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(msg="Variant mapper manager encountered an unexpected error during setup.", extra=logging_context) - - return {"success": False, "enqueued_job": None} - - new_job = None - new_job_id = None - try: - if not mapping_job_id or mapping_job_status in (JobStatus.not_found, JobStatus.complete): - logger.debug(msg="No mapping jobs are running, queuing a new one.", extra=logging_context) - - new_job = await redis.enqueue_job( - "map_variants_for_score_set", correlation_id, queued_score_set.id, updater_id, attempt - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["new_mapping_job_id"] = new_job_id - logger.info(msg="Queued a new mapping job.", extra=logging_context) - - return {"success": True, "enqueued_job": new_job_id} - - logger.info( - msg="A mapping job is already running, or a new job was unable to be enqueued. Deferring mapping by 5 minutes.", - extra=logging_context, - ) - - new_job = await redis.enqueue_job( - "variant_mapper_manager", - correlation_id, - updater_id, - attempt, - _defer_by=timedelta(minutes=5), - ) - - if new_job: - # Ensure this score set remains in the front of the queue. - queued_id = await redis.rpush(MAPPING_QUEUE_NAME, queued_score_set.id) # type: ignore - new_job_id = new_job.job_id - - logging_context["new_mapping_manager_job_id"] = new_job_id - logger.info(msg="Deferred a new mapping manager job.", extra=logging_context) - - # Our persistent Redis queue and ARQ's execution rules ensure that even if the worker is stopped and not restarted - # before the deferred time, these deferred jobs will still run once able. - return {"success": True, "enqueued_job": new_job_id} - - raise MappingEnqueueError() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper manager encountered an unexpected error while enqueing a mapping job. This job will not be retried.", - extra=logging_context, - ) - - db.rollback() - - # We shouldn't rely on the passed score set id matching the score set we are operating upon. - if not queued_score_set: - return {"success": False, "enqueued_job": new_job_id} - - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass - - score_set_exc = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_score_set.id)).one_or_none() - if score_set_exc: - score_set_exc.mapping_state = MappingState.failed - score_set_exc.mapping_errors = "Unable to queue a new mapping job or defer score set mapping." - db.add(score_set_exc) - db.commit() - - return {"success": False, "enqueued_job": new_job_id} - - -#################################################################################################### -# Materialized Views -#################################################################################################### - - -# TODO#405: Refresh materialized views within an executor. -async def refresh_materialized_views(ctx: dict): - logging_context = setup_job_state(ctx, None, None, None) - logger.debug(msg="Began refresh materialized views.", extra=logging_context) - refresh_all_mat_views(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing materialized views.", extra=logging_context) - return {"success": True} - - -async def refresh_published_variants_view(ctx: dict, correlation_id: str): - logging_context = setup_job_state(ctx, None, None, correlation_id) - logger.debug(msg="Began refresh of published variants materialized view.", extra=logging_context) - PublishedVariantsMV.refresh(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing published variants materialized view.", extra=logging_context) - return {"success": True} - - -#################################################################################################### -# ClinGen resource creation / linkage -#################################################################################################### - - -async def submit_score_set_mappings_to_car(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = "Could not submit mappings to ClinGen Allele Registry for score set %s. Mappings for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started CAR mapped resource submission", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit CAR objects for this score set." - - logging_context["current_car_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for CAR mapped resource submission.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="CAR mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_post_mapped_objects = db.execute( - select(MappedVariant.id, MappedVariant.post_mapped) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_post_mapped_objects: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", - extra=logging_context, - ) - return {"success": True, "retried": False, "enqueued_job": None} - - variant_post_mapped_hgvs: dict[str, list[int]] = {} - for mapped_variant_id, post_mapped in variant_post_mapped_objects: - hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) - - if not hgvs_for_post_mapped: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue - - if hgvs_for_post_mapped in variant_post_mapped_hgvs: - variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) - else: - variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct post mapped HGVS strings. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - if not CAR_SUBMISSION_ENDPOINT: - logger.warning( - msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", - extra=logging_context, - ) - return {"success": False, "retried": False, "enqueued_job": None} - - car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) - registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) - for hgvs_string, caid in linked_alleles.items(): - mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] - mapped_variants = db.scalars(select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids))).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = caid - db.add(mapped_variant) - - db.commit() - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_ldh", - correlation_id, - score_set.id, - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["submit_clingen_ldh_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) - - else: - raise SubmissionEnqueueError() - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not submit mappings to LDH for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True, "retried": False, "enqueued_job": new_job_id} - - -async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = ( - "Could not submit mappings to LDH for score set %s. Mappings for this score set should be submitted manually." - ) - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started LDH mapped resource submission", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit LDH objects for this score set." - - logging_context["current_ldh_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) - ldh_service.authenticate() - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_objects: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", - extra=logging_context, - ) - return {"success": True, "retried": False, "enqueued_job": None} - - variant_content = [] - for variant, mapped_variant in variant_objects: - variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - - if not variation: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue - - variant_content.append((variation, variant, mapped_variant)) - - submission_content = construct_ldh_submission(variant_content) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - blocking = functools.partial( - ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE - ) - loop = asyncio.get_running_loop() - submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." - logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) - except AssertionError as e: - send_slack_error(e) - send_slack_message( - text=f"{len(submission_failures)} submissions failed to be dispatched to the LDH for score set {score_set.urn}." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "link_clingen_variants", - correlation_id, - score_set.id, - 1, - _defer_by=timedelta(seconds=LINKING_BACKOFF_IN_SECONDS), - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["link_clingen_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen linking job.", extra=logging_context) - - else: - raise LinkingEnqueueError() - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to enqueue a linking job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - return {"success": True, "retried": False, "enqueued_job": new_job_id} - - -def do_clingen_fetch(variant_urns): - return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] - - -async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logging_context["linkage_retry_threshold"] = LINKED_DATA_RETRY_THRESHOLD - logging_context["attempt"] = attempt - logging_context["max_attempts"] = BACKOFF_LIMIT - logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link LDH objects for this score set." - - logging_context["current_ldh_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_urns = db.scalars( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None) - ) - ).all() - num_variant_urns = len(variant_urns) - - logging_context["variants_to_link_ldh"] = num_variant_urns - - if not variant_urns: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) - - # TODO#372: Non-nullable variant urns. - blocking = functools.partial( - do_clingen_fetch, - variant_urns, # type: ignore - ) - loop = asyncio.get_running_loop() - linked_data = await loop.run_in_executor(ctx["pool"], blocking) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_allele_ids = [ - (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) - for variant_urn, clingen_variation in linked_data - ] - - linkage_failures = [] - for variant_urn, ldh_variation in linked_allele_ids: - # XXX: Should we unlink variation if it is not found? Does this constitute a failure? - if not ldh_variation: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant = db.scalars( - select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) - ).one_or_none() - - if not mapped_variant: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant.clingen_allele_id = ldh_variation - db.add(mapped_variant) - - db.commit() - - except Exception as e: - db.rollback() - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - num_linkage_failures = len(linkage_failures) - ratio_failed_linking = round(num_linkage_failures / num_variant_urns, 3) - logging_context["linkage_failure_rate"] = ratio_failed_linking - logging_context["linkage_failures"] = num_linkage_failures - logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures - - assert ( - len(linked_allele_ids) == num_variant_urns - ), f"{num_variant_urns - len(linked_allele_ids)} appear to not have been attempted to be linked." - - job_succeeded = False - if not linkage_failures: - logger.info( - msg="Successfully linked all mapped variants to LDH submissions.", - extra=logging_context, - ) - - job_succeeded = True - - elif ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: - logger.warning( - msg="Linkage failures exist, but did not exceed the retry threshold.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." - f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." - ) - - job_succeeded = True - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to finalize linkage. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - if job_succeeded: - gnomad_linking_job_id = None - try: - new_job = await redis.enqueue_job( - "link_gnomad_variants", - correlation_id, - score_set.id, - ) - - if new_job: - gnomad_linking_job_id = new_job.job_id - - logging_context["link_gnomad_variants_job_id"] = gnomad_linking_job_id - logger.info(msg="Queued a new gnomAD linking job.", extra=logging_context) - - else: - raise LinkingEnqueueError() - - except Exception as e: - job_succeeded = False - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to enqueue a gnomAD linking job. GnomAD variants should be linked manually for this score set. This job will not be retried.", - extra=logging_context, - ) - finally: - return {"success": job_succeeded, "retried": False, "enqueued_job": gnomad_linking_job_id} - - # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). - new_job_id = None - max_retries_exceeded = None - try: - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - ctx["redis"], "variant_mapper_manager", attempt, LINKING_BACKOFF_IN_SECONDS, correlation_id - ) - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.critical( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to retry a failed linkage job. This job will not be retried.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - logger.info( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking * 100}% of total mapped variants for {score_set.urn})." - f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." - ) - elif new_job_id is None and not max_retries_exceeded: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was unable to be queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"This job could not be retried due to an unexpected issue while attempting to enqueue another linkage job. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." - ) - else: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, the maximum retries for this job were exceeded. The reamining linkage failures will not be retried.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"The retry threshold was exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." - ) - - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_job": new_job_id, - } - - -######################################################################################################## -# Mapping between Mapped Metadata and UniProt IDs -######################################################################################################## - - -async def submit_uniprot_mapping_jobs_for_score_set(ctx, score_set_id: int, correlation_id: Optional[str] = None): - logging_context = {} - score_set = None - spawned_mapping_jobs: dict[int, Optional[str]] = {} - text = "Could not submit mapping jobs to UniProt for this score set %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt mapping job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped mapping targets to UniProt." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - logging_context["total_target_genes_to_map_to_uniprot"] = len(score_set.target_genes) - for target_gene in score_set.target_genes: - spawned_mapping_jobs[target_gene.id] = None # type: ignore - - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - ac_to_map = acs[0] - from_db = infer_db_name_from_sequence_accession(ac_to_map) - - try: - spawned_mapping_jobs[target_gene.id] = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore - except Exception as e: - log_and_send_slack_message( - msg=f"Failed to submit UniProt mapping job for target gene {target_gene.id}: {e}. This target will be skipped.", - ctx=logging_context, - level=logging.WARNING, - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg=f"UniProt mapping job encountered an unexpected error while attempting to submit mapping jobs for score set {score_set.urn}. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - new_job_id = None - try: - successfully_spawned_mapping_jobs = sum(1 for job in spawned_mapping_jobs.values() if job is not None) - logging_context["successfully_spawned_mapping_jobs"] = successfully_spawned_mapping_jobs - - if not successfully_spawned_mapping_jobs: - msg = f"No UniProt mapping jobs were successfully spawned for score set {score_set.urn}. Skipped enqueuing polling job." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - return {"success": True, "retried": False, "enqueued_jobs": []} - - new_job = await redis.enqueue_job( - "poll_uniprot_mapping_jobs_for_score_set", - spawned_mapping_jobs, - score_set_id, - correlation_id, - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["poll_uniprot_mapping_job_id"] = new_job_id - logger.info(msg="Enqueued polling jobs for UniProt mapping jobs.", extra=logging_context) - - else: - raise UniProtPollingEnqueueError() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to enqueue polling jobs for mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - return {"success": True, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - -async def poll_uniprot_mapping_jobs_for_score_set( - ctx, mapping_jobs: dict[int, Optional[str]], score_set_id: int, correlation_id: Optional[str] = None -): - logging_context = {} - score_set = None - text = "Could not poll mapping jobs from UniProt for this Target %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt polling job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped polling targets for UniProt mapping results." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - for target_gene in score_set.target_genes: - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_ac = acs[0] - job_id = mapping_jobs.get(target_gene.id) # type: ignore - - if not job_id: - msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - # This issue has already been sent to Slack in the job submission function, so we just log it here. - logger.debug(msg=msg, extra=logging_context) - continue - - if not uniprot_api.check_id_mapping_results_ready(job_id): - msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - results = uniprot_api.get_id_mapping_results(job_id) - mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) - - if not mapped_ids: - msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(mapped_ids) != 1: - msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] - target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id - db.add(target_gene) - logger.info( - msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", extra=logging_context - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to poll mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - db.commit() - return {"success": True, "retried": False, "enqueued_jobs": []} - - -#################################################################################################### -# gnomAD Variant Linkage -#################################################################################################### - - -async def link_gnomad_variants(ctx: dict, correlation_id: str, score_set_id: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to gnomAD variants for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started gnomAD variant linkage", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link gnomAD objects for this score set." - - logging_context["current_gnomad_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for gnomAD mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. - variant_caids: Sequence[str] = db.scalars( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, - MappedVariant.current.is_(True), - MappedVariant.clingen_allele_id.is_not(None), - ) - ).all() # type: ignore - num_variant_caids = len(variant_caids) - - logging_context["num_variants_to_link_gnomad"] = num_variant_caids - - if not variant_caids: - logger.warning( - msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) - num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - logging_context["num_gnomad_variants_with_caid_match"] = num_gnomad_variants_with_caid_match - - if not gnomad_variant_data: - logger.warning( - msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to fetch gnomAD variant data from S3 via Athena. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=logging_context) - num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data) - db.commit() - logging_context["num_mapped_variants_linked_to_gnomad_variants"] = num_linked_gnomad_variants - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=logging_context) - return {"success": True, "retried": False, "enqueued_job": None} diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py new file mode 100644 index 000000000..e421bbad2 --- /dev/null +++ b/src/mavedb/worker/jobs/__init__.py @@ -0,0 +1,54 @@ +"""MaveDB Worker Job Functions. + +This package contains all worker job functions organized by domain: +- variant_processing: Variant creation and VRS mapping jobs +- external_services: Third-party service integration jobs (ClinGen, UniProt, gnomAD) +- data_management: Database and materialized view management jobs +- utils: Shared utilities for job state, retry logic, and constants + +All job functions are exported at the package level for easy import +by the worker settings and other modules. Additionally, a job registry +is provided for ARQ worker configuration. +""" + +from mavedb.worker.jobs.data_management.views import ( + refresh_materialized_views, + refresh_published_variants_view, +) +from mavedb.worker.jobs.external_services.clingen import ( + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants +from mavedb.worker.jobs.external_services.uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.jobs.registry import ( + BACKGROUND_CRONJOBS, + BACKGROUND_FUNCTIONS, + STANDALONE_JOB_DEFINITIONS, +) +from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set +from mavedb.worker.jobs.variant_processing.mapping import ( + map_variants_for_score_set, +) + +__all__ = [ + # Variant processing jobs + "create_variants_for_score_set", + "map_variants_for_score_set", + # External service integration jobs + "submit_score_set_mappings_to_car", + "submit_score_set_mappings_to_ldh", + "poll_uniprot_mapping_jobs_for_score_set", + "submit_uniprot_mapping_jobs_for_score_set", + "link_gnomad_variants", + # Data management jobs + "refresh_materialized_views", + "refresh_published_variants_view", + # Job registry and utilities + "BACKGROUND_FUNCTIONS", + "BACKGROUND_CRONJOBS", + "STANDALONE_JOB_DEFINITIONS", +] diff --git a/src/mavedb/worker/jobs/data_management/__init__.py b/src/mavedb/worker/jobs/data_management/__init__.py new file mode 100644 index 000000000..635025813 --- /dev/null +++ b/src/mavedb/worker/jobs/data_management/__init__.py @@ -0,0 +1,16 @@ +"""Data management job functions. + +This module exports jobs for database and view management: +- Materialized view refresh for optimized query performance +- Database maintenance and cleanup operations +""" + +from .views import ( + refresh_materialized_views, + refresh_published_variants_view, +) + +__all__ = [ + "refresh_materialized_views", + "refresh_published_variants_view", +] diff --git a/src/mavedb/worker/jobs/data_management/py.typed b/src/mavedb/worker/jobs/data_management/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py new file mode 100644 index 000000000..e342a0ff4 --- /dev/null +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -0,0 +1,112 @@ +"""Database materialized view refresh jobs. + +This module contains jobs for refreshing materialized views used throughout +the MaveDB application. Materialized views provide optimized, pre-computed +data for complex queries and are refreshed periodically to maintain +data consistency and performance. +""" + +import logging + +from mavedb.db.view import refresh_all_mat_views +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.published_variant import PublishedVariantsMV +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +# TODO#405: Refresh materialized views within an executor. +@with_guaranteed_job_run_record("cron_job") +@with_job_management +async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Refresh all materialized views in the database. + + This job refreshes all materialized views to ensure that they are up-to-date + with the latest data. It is typically run as a scheduled cron job and meant + to be invoked indirectly via a job queue system. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Refreshes all materialized views in the database. + + Returns: + dict: Result indicating success and any exception details + """ + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_materialized_views", + "resource": "all_materialized_views", + "correlation_id": None, + } + ) + job_manager.update_progress(0, 100, "Starting refresh of all materialized views.") + logger.debug(msg="Began refresh of all materialized views.", extra=job_manager.logging_context()) + + # Do refresh + refresh_all_mat_views(job_manager.db) + job_manager.db.flush() + + # Finalize job state + logger.debug(msg="Done refreshing materialized views.", extra=job_manager.logging_context()) + + return JobExecutionOutcome.succeeded(data={"views_refreshed": ["all_materialized_views"]}) + + +@with_pipeline_management +async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Refresh the published variants materialized view. + + This job refreshes the PublishedVariantsMV materialized view to ensure that it + is up-to-date with the latest data. It is meant to be invoked as part of a job queue system. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Refreshes the PublishedVariantsMV materialized view in the database. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_published_variants_view", + "resource": "published_variants_materialized_view", + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting refresh of published variants materialized view.") + logger.info(msg="Started refresh of published variants materialized view", extra=job_manager.logging_context()) + + # Do refresh + PublishedVariantsMV.refresh(job_manager.db) + job_manager.db.flush() + + # Finalize job state + logger.debug(msg="Done refreshing published variants materialized view.", extra=job_manager.logging_context()) + + return JobExecutionOutcome.succeeded() diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py new file mode 100644 index 000000000..4537c0edd --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -0,0 +1,40 @@ +"""External service integration job functions. + +This module exports jobs for integrating with third-party services: +- ClinGen (Clinical Genome Resource) for allele registration and data submission +- ClinGen cache pre-warming to prevent stampede on downstream annotation jobs +- UniProt for protein sequence annotation and ID mapping +- gnomAD for population frequency and genomic context data +- HGVS for standardized variant nomenclature population +- Variant Translation for PA<->CA allele relationship mapping +- VEP for functional consequence annotation +""" + +# External services job functions +from .clingen import ( + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from .clingen_cache import warm_clingen_cache +from .clinvar import refresh_clinvar_controls +from .gnomad import link_gnomad_variants +from .hgvs import populate_hgvs_for_score_set +from .uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from .variant_translation import populate_variant_translations_for_score_set +from .vep import populate_vep_for_score_set + +__all__ = [ + "submit_score_set_mappings_to_car", + "submit_score_set_mappings_to_ldh", + "warm_clingen_cache", + "refresh_clinvar_controls", + "link_gnomad_variants", + "populate_hgvs_for_score_set", + "populate_variant_translations_for_score_set", + "poll_uniprot_mapping_jobs_for_score_set", + "submit_uniprot_mapping_jobs_for_score_set", + "populate_vep_for_score_set", +] diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py new file mode 100644 index 000000000..501077083 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -0,0 +1,487 @@ +"""ClinGen integration jobs for variant submission and linking. + +This module contains jobs for submitting mapped variants to ClinGen services: +- ClinGen Allele Registry (CAR) for allele registration +- ClinGen Linked Data Hub (LDH) for data submission +- Variant linking and association management + +These jobs enable integration with the ClinGen ecosystem for clinical +variant interpretation and data sharing. +""" + +import asyncio +import functools +import logging + +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.clingen.constants import ( + CAR_SUBMISSION_ENDPOINT, + CLIN_GEN_SUBMISSION_ENABLED, + DEFAULT_LDH_SUBMISSION_BATCH_SIZE, + LDH_SUBMISSION_ENDPOINT, +) +from mavedb.lib.clingen.content_constructors import construct_ldh_submission +from mavedb.lib.clingen.services import ( + ClinGenAlleleRegistryService, + ClinGenLdhService, + get_allele_registry_associations, +) +from mavedb.lib.types.clingen import is_car_submission_error +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, FailureCategory +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """ + Submit mapped variants for a score set to the ClinGen Allele Registry (CAR). + + This job registers mapped variants with CAR, assigns ClinGen Allele IDs (CAIDs), + and updates the database with the results. Progress is tracked throughout the submission. + + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking + + Args: + ctx (dict): Worker context containing DB and Redis connections + job_manager (JobManager): Manager for job lifecycle and DB operations + + Side Effects: + - Updates MappedVariant records with ClinGen Allele IDs + - Submits data to ClinGen Allele Registry + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_score_set_mappings_to_car", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting CAR mapped resource submission.") + logger.info(msg="Started CAR mapped resource submission", extra=job_manager.logging_context()) + + # Ensure we've enabled ClinGen submission + if not CLIN_GEN_SUBMISSION_ENABLED: + logger.warning( + msg="ClinGen submission is disabled via configuration, skipping submission of mapped variants to CAR.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.skipped(data={"reason": "ClinGen submission disabled"}) + + # Check for CAR submission endpoint + if not CAR_SUBMISSION_ENDPOINT: + logger.warning( + msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason="ClinGen Allele Registry submission endpoint is not configured.", + failure_category=FailureCategory.CONFIGURATION_ERROR, + ) + + # Fetch mapped variants with post-mapped data for the score set + variant_post_mapped_objects = job_manager.db.execute( + select(MappedVariant.id, MappedVariant.post_mapped) + .join(Variant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + # Track total variants to submit + job_manager.save_to_context({"total_variants_to_submit_car": len(variant_post_mapped_objects)}) + if not variant_post_mapped_objects: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "matched_count": 0}) + + job_manager.update_progress( + 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." + ) + + # Build HGVS strings for submission. Don't do duplicate submissions-- store mapped variant IDs by HGVS. + variant_post_mapped_hgvs: dict[str, list[int]] = {} + for mapped_variant_id, post_mapped in variant_post_mapped_objects: + hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) + + if not hgvs_for_post_mapped: + logger.warning( + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", + extra=job_manager.logging_context(), + ) + continue + + if hgvs_for_post_mapped in variant_post_mapped_hgvs: + variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) + else: + variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] + + job_manager.save_to_context({"unique_variants_to_submit_car": len(variant_post_mapped_hgvs)}) + job_manager.update_progress(15, 100, "Submitting mapped variants to CAR.") + + # Do submission + car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) + registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) + job_manager.update_progress(60, 100, "Processing registered alleles from CAR.") + + # Build a map of HGVS string -> CAR error details for every rejected submission. + # The CAR response intermixes successes (have "@id") and errors (have "errorType"). + car_errors_by_hgvs: dict[str, dict] = { + err["hgvs"]: { + "error_type": err.get("errorType"), + "message": err.get("message"), + } + for err in registered_alleles + if is_car_submission_error(err) + } + + # Build an inverse map so we can look up the HGVS string for any mapped_variant_id. + mapped_variant_id_to_hgvs: dict[int, str] = { + vid: hgvs for hgvs, vids in variant_post_mapped_hgvs.items() for vid in vids + } + + # Process registered alleles and update mapped variants + linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) + total = len(linked_alleles) + processed = 0 + # Setup annotation manager + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) + registered_mapped_variant_ids = [] + for hgvs_string, caid in linked_alleles.items(): + mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] + registered_mapped_variant_ids.extend(mapped_variant_ids) + mapped_variants = job_manager.db.scalars( + select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids)) + ).all() + + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = caid + job_manager.db.add(mapped_variant) + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "annotation_metadata": {"clingen_allele_id": caid}, + }, + current=True, + ) + + processed += 1 + + # Calculate progress: 50% + (processed/total_mapped)*50, rounded to nearest 5% + if total % 20 == 0 or processed == total: + progress = 50 + round((processed / total) * 45 / 5) * 5 + job_manager.update_progress(progress, 100, f"Processed {processed} of {total} registered alleles.") + logger.info( + msg=f"Processed {processed}/{total} registered alleles from CAR.", + extra=job_manager.logging_context(), + ) + + # For mapped variants which did not get a CAID, log failure annotation + failed_submissions = set(obj[0] for obj in variant_post_mapped_objects) - set(registered_mapped_variant_ids) + for mapped_variant_id in failed_submissions: + mapped_variant = job_manager.db.scalars( + select(MappedVariant).where(MappedVariant.id == mapped_variant_id) + ).one() + + failed_variant_hgvs = mapped_variant_id_to_hgvs.get(mapped_variant_id) + car_error = car_errors_by_hgvs.get(failed_variant_hgvs) if failed_variant_hgvs else None + + annotation_metadata: dict = {"submitted_hgvs": failed_variant_hgvs} + if car_error: + annotation_metadata["car_error_type"] = car_error["error_type"] + annotation_metadata["car_error_message"] = car_error["message"] + + # Use EXTERNAL_SERVICE_REJECTED when CAR explicitly rejected the submission with an error + # response (e.g. InvalidHGVS), vs EXTERNAL_API_ERROR for silent failures where CAR returned + # no response at all (network drop, service-side omission, etc.). + failure_category = ( + AnnotationFailureCategory.EXTERNAL_SERVICE_REJECTED + if car_error + else AnnotationFailureCategory.EXTERNAL_API_ERROR + ) + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + status=AnnotationStatus.FAILED, + failure_category=failure_category, + annotation_data={ + "error_message": "Failed to register variant with ClinGen Allele Registry.", + "annotation_metadata": annotation_metadata, + }, + current=True, + ) + + annotation_manager.flush() + + # When all registrations fail we will not be able to render any annotations. Fail the job + # to explicitly halt the pipeline. + if failed_submissions and not linked_alleles: + error_message = ( + f"CAR submission failed for all {len(failed_submissions)} variants in score set {score_set.urn}." + ) + logger.error( + msg=error_message, + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=error_message, + data={ + "submitted_count": len(variant_post_mapped_hgvs), + "matched_count": 0, + "failed_count": len(failed_submissions), + }, + failure_category=FailureCategory.DEPENDENCY_FAILURE, + ) + + if failed_submissions: + # CAR rejections are typically per-variant data quality issues (e.g. invalid HGVS) rather than + # systemic failures. Per-variant AnnotationStatus.FAILED records are already written above for + # traceability. We continue the pipeline so that successfully registered variants still receive + # downstream annotations (warm_clingen_cache, gnomAD, ClinVar, HGVS, translations). + logger.warning( + msg=f"CAR submission failed for {len(failed_submissions)} of {len(variant_post_mapped_hgvs)} variants in score set {score_set.urn}.", + extra=job_manager.logging_context(), + ) + + logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "submitted_count": len(variant_post_mapped_hgvs), + "matched_count": len(linked_alleles), + "failed_count": len(failed_submissions), + } + ) + + +@with_pipeline_management +async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """ + Submit mapped variants for a score set to the ClinGen Linked Data Hub (LDH). + + This job submits mapped variant data to LDH for a given score set, handling authentication, + submission batching, and error reporting. Progress and errors are logged and reported to Slack. + + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking + + Args: + ctx (dict): Worker context containing DB and Redis connections + job_manager (JobManager): Manager for job lifecycle and DB operations + + Side Effects: + - Submits data to ClinGen Linked Data Hub + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_score_set_mappings_to_ldh", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting LDH mapped resource submission.") + logger.info(msg="Started LDH mapped resource submission", extra=job_manager.logging_context()) + + # Connect to LDH service + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) + ldh_service.authenticate() + + # Fetch mapped variants with post-mapped data for the score set + variant_objects = job_manager.db.execute( + select(Variant, MappedVariant) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + # Track total variants to submit + job_manager.save_to_context({"total_variants_to_submit_ldh": len(variant_objects)}) + if not variant_objects: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "failed_count": 0}) + + job_manager.update_progress(10, 100, f"Submitting {len(variant_objects)} mapped variants to LDH.") + + # Build submission content + variant_content = [] + variant_for_urn = {} + for variant, mapped_variant in variant_objects: + variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) + + if not variation: + logger.warning( + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", + extra=job_manager.logging_context(), + ) + continue + + variant_content.append((variation, variant, mapped_variant)) + variant_for_urn[variant.urn] = variant + + if not variant_content: + logger.warning( + msg="No valid mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "failed_count": 0}) + + job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) + job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") + submission_content = construct_ldh_submission(variant_content) + + blocking = functools.partial( + ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE + ) + loop = asyncio.get_running_loop() + submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) + job_manager.update_progress(90, 100, "Finalizing LDH mapped resource submission.") + job_manager.save_to_context( + { + "ldh_submission_successes": len(submission_successes), + "ldh_submission_failures": len(submission_failures), + } + ) + + # TODO prior to finalizing: Verify typing of ClinGen submission responses. See https://reg.clinicalgenome.org/doc/AlleleRegistry_1.01.xx_api_v1.pdf + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) + submitted_variant_urns = set() + for success in submission_successes: + logger.debug( + msg=f"Successfully submitted mapped variant to LDH: {success}", + extra=job_manager.logging_context(), + ) + + submitted_urn = success["data"]["entId"] + submitted_variant = variant_for_urn[submitted_urn] + + annotation_manager.add_annotation( + variant_id=submitted_variant.id, + annotation_type=AnnotationType.LDH_SUBMISSION, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "annotation_metadata": {"ldh_iri": success["data"]["ldhIri"], "ldh_id": success["data"]["ldhId"]}, + }, + current=True, + ) + submitted_variant_urns.add(submitted_urn) + + # It isn't trivial to map individual failures back to their corresponding variants, + # especially when submission occurred in batch. Save all failures generically here. + # Note that failures may not be present in the submission failures list, but they are + # guaranteed to be absent from the successes list. + for failure_urn in set(variant_for_urn.keys()) - submitted_variant_urns: + logger.error( + msg=f"Failed to submit mapped variant to LDH: {failure_urn}", + extra=job_manager.logging_context(), + ) + + failed_variant = variant_for_urn[failure_urn] + + annotation_manager.add_annotation( + variant_id=failed_variant.id, + annotation_type=AnnotationType.LDH_SUBMISSION, + version=None, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, + annotation_data={ + "error_message": "Failed to submit variant to ClinGen Linked Data Hub.", + }, + current=True, + ) + + annotation_manager.flush() + + if submission_failures: + logger.warning( + msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", + extra=job_manager.logging_context(), + ) + + if not submission_successes: + error_message = f"All LDH submissions failed for score set {score_set.urn}." + logger.error( + msg=error_message, + extra=job_manager.logging_context(), + ) + + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=error_message, + data={"submitted_count": 0, "failed_count": len(submission_failures)}, + failure_category=FailureCategory.DEPENDENCY_FAILURE, + ) + + logger.info( + msg="Completed LDH mapped resource submission", + extra=job_manager.logging_context(), + ) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={"submitted_count": len(submission_successes), "failed_count": len(submission_failures)} + ) diff --git a/src/mavedb/worker/jobs/external_services/clingen_cache.py b/src/mavedb/worker/jobs/external_services/clingen_cache.py new file mode 100644 index 000000000..10890f23f --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/clingen_cache.py @@ -0,0 +1,124 @@ +"""ClinGen cache pre-warming job. + +Pre-fetches ClinGen allele data into the Redis cache before downstream annotation +jobs fan out. Without this, 40+ concurrent ClinVar refresh jobs all miss the cache +simultaneously and stampede the ClinGen API, causing large payloads to contend for +Redis write slots and triggering timeouts. + +Fetches are made concurrently up to CLINGEN_CACHE_WARMING_CONCURRENCY (default 5) +to balance speed against ClinGen API and Redis write pool load. +""" + +import asyncio +import logging + +from sqlalchemy import select + +from mavedb.lib.clingen.allele_registry import get_clingen_allele_data +from mavedb.lib.clingen.constants import CLINGEN_CACHE_WARMING_CONCURRENCY +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def warm_clingen_cache(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Pre-warm the ClinGen allele data cache for all mapped variants in a score set. + + Queries all distinct ClinGen allele IDs from mapped variants, then fetches each + one via `get_clingen_allele_data()` (which populates the aiocache Redis cache), + with up to CLINGEN_CACHE_WARMING_CONCURRENCY requests in-flight at a time. + Downstream jobs that depend on this step will see 100% cache hits. + """ + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "warm_clingen_cache", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting ClinGen cache pre-warming.") + logger.info("Starting ClinGen cache pre-warming", extra=job_manager.logging_context()) + + # Get distinct clingen_allele_ids for this score set's current mapped variants + allele_ids = job_manager.db.scalars( + select(MappedVariant.clingen_allele_id) + .join(Variant) + .where( + Variant.score_set_id == score_set.id, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.isnot(None), + # Exclude multi-variant IDs (comma-separated) — they can't be fetched individually + MappedVariant.clingen_allele_id.not_like("%,%"), + ) + .distinct() + ).all() + + total = len(allele_ids) + job_manager.save_to_context({"total_allele_ids_to_warm": total}) + logger.info(f"Found {total} distinct ClinGen allele IDs to pre-warm", extra=job_manager.logging_context()) + + if total == 0: + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"warmed": 0, "failed": 0}) + + # Fetch alleles concurrently up to CLINGEN_CACHE_WARMING_CONCURRENCY in-flight at a time. + # get_clingen_allele_data() is decorated with @cached, so each call populates Redis. + semaphore = asyncio.Semaphore(CLINGEN_CACHE_WARMING_CONCURRENCY) + + async def fetch_one(allele_id: str) -> tuple[str, bool, BaseException | None]: + async with semaphore: + try: + await get_clingen_allele_data(allele_id) + return allele_id, True, None + except Exception as exc: + return allele_id, False, exc + + warmed = 0 + failed = 0 + for index, completed_task in enumerate(asyncio.as_completed([fetch_one(a) for a in allele_ids if a])): + allele_id, success, exc = await completed_task + if success: + warmed += 1 + else: + failed += 1 + logger.warning( + f"Failed to warm cache for allele {allele_id}", + extra=job_manager.logging_context(), + exc_info=exc, + ) + + if total > 0 and index % max(total // 20, 1) == 0: + job_manager.save_to_context({"warmed_alleles": warmed, "failed_alleles": failed}) + job_manager.update_progress( + int((index / total) * 100), + 100, + f"Warming ClinGen cache ({index}/{total}).", + ) + logger.info( + f"Warming ClinGen cache: {index}/{total} allele IDs processed. Warmed: {warmed}, failed: {failed}.", + extra=job_manager.logging_context(), + ) + + logger.info( + f"ClinGen cache pre-warming complete. Warmed: {warmed}, failed: {failed}.", + extra=job_manager.logging_context(), + ) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"warmed": warmed, "failed": failed, "total": total}) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py new file mode 100644 index 000000000..2d1d040c0 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -0,0 +1,297 @@ +"""ClinVar integration jobs for variant annotation + +This module contains job definitions and utility functions for integrating ClinVar +variant data into MaveDB. It includes functions to fetch and parse ClinVar variant +summary data, and update MaveDB records with the latest ClinVar annotations. + +Both ClinGen API calls and ClinVar TSV data fetches are automatically cached using +aiocache with Redis backend: +- ClinGen API calls: 24-hour TTL +- ClinVar TSV files: 90-day TTL (archival data doesn't change) + +This significantly reduces redundant network requests when refreshing ClinVar +controls across multiple months/years. +""" + +import logging +from datetime import datetime + +import requests +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.clingen.allele_registry import get_associated_clinvar_allele_id +from mavedb.lib.clinvar.utils import fetch_clinvar_variant_data +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, FailureCategory +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + +# ClinVar archived data starts from February 2015, then January of each +# subsequent year. This list is used to generate the date range for refreshing. +CLINVAR_START_YEAR = 2015 +CLINVAR_START_MONTH = 2 + + +def generate_clinvar_versions() -> list[tuple[int, int]]: + """Generate all ClinVar version (year, month) pairs from Feb 2015 to current Jan. + + Returns a list of (year, month) tuples representing each ClinVar archival + snapshot that should be processed. + """ + current_year = datetime.now().year + versions = [(CLINVAR_START_YEAR, CLINVAR_START_MONTH)] + for year in range(CLINVAR_START_YEAR + 1, current_year + 1): + versions.append((year, 1)) + return versions + + +@with_pipeline_management +async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Refresh ClinVar clinical control data across all archival versions. + + Iterates over every ClinVar archival snapshot (Feb 2015, then Jan of each + subsequent year through the current year), fetching TSV data and updating + clinical control records for all mapped variants in the score set. Individual + version failures are logged and skipped — the job continues processing + remaining versions. + """ + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + versions = generate_clinvar_versions() + + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_clinvar_controls", + "resource": score_set.urn, + "correlation_id": correlation_id, + "versions": versions, + "total_versions": len(versions), + } + ) + job_manager.update_progress(0, 100, f"Starting ClinVar refresh across {len(versions)} versions.") + logger.info(f"Starting ClinVar refresh across {len(versions)} versions", extra=job_manager.logging_context()) + + variants_to_refresh = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + total_variants_to_refresh = len(variants_to_refresh) + job_manager.save_to_context({"total_variants_to_refresh": total_variants_to_refresh}) + + total_refreshed = 0 + total_failed = 0 + versions_completed = 0 + + for version_index, (year, month) in enumerate(versions): + clinvar_version = f"{month:02d}_{year}" + job_manager.save_to_context({"current_version": clinvar_version, "version_index": version_index}) + + version_progress = int((version_index / len(versions)) * 100) + job_manager.update_progress( + version_progress, + 100, + f"Processing ClinVar version {clinvar_version} ({version_index + 1}/{len(versions)}).", + ) + logger.info(f"Processing ClinVar version {clinvar_version}", extra=job_manager.logging_context()) + + try: + tsv_data = await fetch_clinvar_variant_data(month, year) + except Exception: + logger.error( + f"Failed to fetch/parse ClinVar TSV for version {clinvar_version}, skipping.", + extra=job_manager.logging_context(), + exc_info=True, + ) + continue + + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) + for mapped_variant in variants_to_refresh: + clingen_id = mapped_variant.clingen_allele_id + + if clingen_id is None: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.MISSING_IDENTIFIER, + annotation_data={ + "error_message": "Mapped variant does not have an associated ClinGen allele ID.", + }, + current=True, + replace_all_versions=False, + ) + continue + + if "," in clingen_id: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, + annotation_data={ + "error_message": "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data.", + }, + current=True, + replace_all_versions=False, + ) + continue + + try: + clinvar_allele_id = await get_associated_clinvar_allele_id(clingen_id) # type: ignore + except requests.exceptions.RequestException as exc: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, + annotation_data={ + "error_message": f"Failed to retrieve ClinVar allele ID from ClinGen API: {str(exc)}", + }, + current=True, + replace_all_versions=False, + ) + logger.error( + f"Failed to retrieve ClinVar allele ID from ClinGen API for ClinGen allele ID {clingen_id}.", + extra=job_manager.logging_context(), + exc_info=exc, + ) + total_failed += 1 + continue + + if not clinvar_allele_id: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, + annotation_data={ + "error_message": "No ClinVar allele ID found for ClinGen allele ID.", + }, + current=True, + replace_all_versions=False, + ) + continue + + if clinvar_allele_id not in tsv_data: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, + annotation_data={ + "error_message": "No ClinVar data found for ClinVar allele ID.", + }, + current=True, + replace_all_versions=False, + ) + continue + + variant_data = tsv_data[clinvar_allele_id] + identifier = str(clinvar_allele_id) + + clinvar_variant = job_manager.db.scalars( + select(ClinicalControl).where( + ClinicalControl.db_identifier == identifier, + ClinicalControl.db_version == clinvar_version, + ClinicalControl.db_name == "ClinVar", + ) + ).one_or_none() + if clinvar_variant is None: + clinvar_variant = ClinicalControl( + db_identifier=identifier, + gene_symbol=variant_data.get("GeneSymbol"), + clinical_significance=variant_data.get("ClinicalSignificance"), + clinical_review_status=variant_data.get("ReviewStatus"), + db_version=clinvar_version, + db_name="ClinVar", + ) + else: + clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") + clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") + clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") + + job_manager.db.add(clinvar_variant) + job_manager.db.flush() + + if clinvar_variant not in mapped_variant.clinical_controls: + mapped_variant.clinical_controls.append(clinvar_variant) + job_manager.db.add(mapped_variant) + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "annotation_metadata": { + "clinvar_allele_id": clinvar_allele_id, + }, + }, + current=True, + replace_all_versions=False, + ) + + total_refreshed += 1 + + annotation_manager.flush() + versions_completed += 1 + logger.info( + f"Completed ClinVar version {clinvar_version} for {total_variants_to_refresh} variants.", + extra=job_manager.logging_context(), + ) + + logger.info( + f"ClinVar refresh complete: {versions_completed}/{len(versions)} versions, " + f"{total_refreshed} variant-version annotations.", + extra=job_manager.logging_context(), + ) + + if total_failed > 0 and total_refreshed == 0: + error_message = ( + f"All {total_failed} ClinVar lookups failed for score set {score_set.urn}. Possible ClinGen API outage." + ) + logger.error(error_message, extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=error_message, + data={ + "versions_completed": versions_completed, + "versions_total": len(versions), + "variant_annotations": 0, + }, + failure_category=FailureCategory.DEPENDENCY_FAILURE, + ) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "versions_completed": versions_completed, + "versions_total": len(versions), + "variant_annotations": total_refreshed, + } + ) diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py new file mode 100644 index 000000000..969c3a20e --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -0,0 +1,162 @@ +"""gnomAD variant linking jobs for population frequency annotation. + +This module handles linking of mapped variants to gnomAD (Genome Aggregation Database) +variants to provide population frequency and other genomic context information. +This enrichment helps researchers understand the clinical significance and +rarity of variants in their datasets. +""" + +import logging +from typing import Sequence + +from sqlalchemy import select + +from mavedb.db import athena +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.gnomad import ( + GNOMAD_DATA_VERSION, + gnomad_variant_data_for_caids, + link_gnomad_variants_to_mapped_variants, +) +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """ + Link mapped variants to gnomAD variants based on ClinGen Allele IDs (CAIDs). + This job fetches mapped variants associated with a given score set that have CAIDs, + retrieves corresponding gnomAD variant data, and establishes links between them + in the database. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing mapped variants to process. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job being executed. + job_manager (JobManager): The job manager instance for database and logging operations. + + Side Effects: + - Updates MappedVariant records to link to gnomAD variants. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "link_gnomad_variants", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting gnomAD mapped resource linkage.") + logger.info(msg="Started gnomAD mapped resource linkage", extra=job_manager.logging_context()) + + # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. + variant_caids: Sequence[str] = job_manager.db.scalars( + select(MappedVariant.clingen_allele_id) + .join(Variant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.is_not(None), + ) + ).all() # type: ignore + + num_variant_caids = len(variant_caids) + job_manager.save_to_context({"num_variants_to_link_gnomad": num_variant_caids}) + + if not variant_caids: + logger.warning( + msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"linked_count": 0, "skipped_count": 0}) + + job_manager.update_progress(10, 100, f"Found {num_variant_caids} variants with CAIDs to link to gnomAD variants.") + logger.info( + msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", + extra=job_manager.logging_context(), + ) + + # Fetch gnomAD variant data for the CAIDs + with athena.engine.connect() as athena_session: + logger.debug("Fetching gnomAD variants from Athena.") + gnomad_variant_data = gnomad_variant_data_for_caids(athena_session, variant_caids) + + num_gnomad_variants_with_caid_match = len(gnomad_variant_data) + + # NOTE: Proceed intentionally with linking even if no matches were found, to record skipped annotations. + + job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) + job_manager.update_progress(75, 100, f"Found {num_gnomad_variants_with_caid_match} gnomAD variants matching CAIDs.") + + # Link mapped variants to gnomAD variants + logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=job_manager.logging_context()) + num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(job_manager.db, gnomad_variant_data) + job_manager.db.flush() + + # For variants which are not linked, create annotation status records indicating skipped linkage + mapped_variants_with_caids = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.is_not(None), + ) + ).all() + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) + for mapped_variant in mapped_variants_with_caids: + if not mapped_variant.gnomad_variants: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.GNOMAD_ALLELE_FREQUENCY, + version=GNOMAD_DATA_VERSION, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, + annotation_data={ + "error_message": "No gnomAD variant could be linked for this mapped variant.", + }, + current=True, + ) + + annotation_manager.flush() + + # Save final context and progress + job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) + logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "linked_count": num_linked_gnomad_variants, + "skipped_count": num_variant_caids - num_linked_gnomad_variants, + } + ) diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py new file mode 100644 index 000000000..0b4687398 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -0,0 +1,298 @@ +"""ClinGen allele HGVS population jobs for mapped variant annotation. + +This module populates mapped variants with HGVS representations (genomic, coding, +protein) by querying the ClinGen Allele Registry. It uses ClinGen allele IDs +(CAIDs) already associated with mapped variants to look up standardized HGVS +nomenclature at different levels (hgvs_g, hgvs_c, hgvs_p), plus the assay-level +HGVS derived from post-mapped VRS data. +""" + +import logging +from typing import Optional + +import requests +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.clingen.allele_registry import ( + extract_hgvs_from_ca_allele_data, + extract_hgvs_from_pa_allele_data, + get_clingen_allele_data, +) +from mavedb.lib.slack import log_and_send_slack_message +from mavedb.lib.target_genes import get_target_coding_info +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Populate mapped variants with HGVS representations for a score set. + + Queries the ClinGen Allele Registry using existing ClinGen allele IDs to populate + standardized HGVS nomenclature (genomic, coding, protein) on mapped variants. + Also extracts the assay-level HGVS from post-mapped VRS data. + + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking + + Args: + ctx: Worker context containing DB and Redis connections. + job_id: The ID of the job run. + job_manager: Manager for job lifecycle and DB operations. + + Side Effects: + - Updates MappedVariant records with hgvs_assay_level, hgvs_g, hgvs_c, hgvs_p. + - Creates AnnotationStatus records for each processed variant. + + Returns: + JobExecutionOutcome indicating success, failure, or skip. + """ + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "populate_hgvs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting mapped HGVS population.") + logger.info(msg="Started mapped HGVS population", extra=job_manager.logging_context()) + + # Determine target info; multi-target score sets are not yet supported + try: + target_is_coding, transcript_accession = get_target_coding_info(score_set) + except NotImplementedError: + logger.warning( + msg="Multi-target score sets not supported for HGVS population. Skipping.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.skipped(data={"reason": "Multi-target score sets not supported"}) + + job_manager.save_to_context({"target_is_coding": target_is_coding, "transcript_accession": transcript_accession}) + logger.info( + msg=f"Target info resolved: coding={target_is_coding}, transcript={transcript_accession}", + extra=job_manager.logging_context(), + ) + + # Fetch current mapped variants for the score set + variant_rows = job_manager.db.execute( + select(Variant.id, MappedVariant) + .join(Variant) + .join(ScoreSet) + .where(ScoreSet.id == score_set.id) + .where(MappedVariant.current.is_(True)) + ).all() + + total_variants = len(variant_rows) + job_manager.save_to_context({"total_variants": total_variants}) + + if not variant_rows: + logger.warning( + msg="No current mapped variants found for this score set. Skipping HGVS population.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"populated_count": 0, "skipped_count": 0, "failed_count": 0}) + + job_manager.update_progress(5, 100, f"Processing {total_variants} mapped variants for HGVS population.") + + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) + populated_count = 0 + skipped_count = 0 + failed_count = 0 + + for index, (variant_id, mapped_variant) in enumerate(variant_rows): + # Periodic progress updates + if total_variants > 0 and index % max(total_variants // 20, 1) == 0: + progress = 5 + int((index / total_variants) * 90) + job_manager.update_progress(progress, 100, f"Processing HGVS for variant {index + 1}/{total_variants}.") + logger.info( + "Processing variant %s/%s: variant_id=%s", + index + 1, + total_variants, + variant_id, + extra=job_manager.logging_context(), + ) + + hgvs_g: Optional[str] = None + hgvs_c: Optional[str] = None + hgvs_p: Optional[str] = None + + clingen_id = mapped_variant.clingen_allele_id + + job_manager.save_to_context( + { + "mapped_variant_id": mapped_variant.id, + "clingen_allele_id": clingen_id, + "progress_index": index, + } + ) + + if not clingen_id: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.MISSING_IDENTIFIER, + annotation_data={ + "error_message": "No ClinGen allele ID available for ClinGen HGVS lookup.", + }, + current=True, + ) + logger.debug( + "Skipping variant %s: no ClinGen allele ID.", + variant_id, + extra=job_manager.logging_context(), + ) + skipped_count += 1 + continue + + # Skip multi-variant allele IDs (comma-separated) + if "," in clingen_id: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, + annotation_data={ + "error_message": "Multi-variant ClinGen allele IDs not supported for HGVS lookup.", + }, + current=True, + ) + logger.debug( + "Skipping variant %s: multi-variant ClinGen allele ID.", + variant_id, + extra=job_manager.logging_context(), + ) + skipped_count += 1 + continue + + # Query ClinGen API for allele data + try: + allele_data = await get_clingen_allele_data(clingen_id) + except requests.exceptions.RequestException as exc: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, + annotation_data={ + "error_message": f"Failed to fetch ClinGen allele data: {str(exc)}", + }, + current=True, + ) + logger.error( + "ClinGen API request failed for allele %s.", + clingen_id, + extra=job_manager.logging_context(), + exc_info=exc, + ) + failed_count += 1 + continue + + if allele_data is None: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, + annotation_data={ + "error_message": f"ClinGen allele {clingen_id} not found in the registry.", + }, + current=True, + ) + logger.debug( + "ClinGen allele %s not found in registry. Skipping variant %s.", + clingen_id, + variant_id, + extra=job_manager.logging_context(), + ) + skipped_count += 1 + continue + + # Extract HGVS based on allele type + if clingen_id.startswith("CA"): + hgvs_g, hgvs_c, hgvs_p = extract_hgvs_from_ca_allele_data( + allele_data, target_is_coding, transcript_accession + ) + elif clingen_id.startswith("PA"): + hgvs_g, hgvs_c, hgvs_p = extract_hgvs_from_pa_allele_data(allele_data) + + # Update mapped variant + mapped_variant.hgvs_g = hgvs_g + mapped_variant.hgvs_c = hgvs_c + mapped_variant.hgvs_p = hgvs_p + job_manager.db.add(mapped_variant) + + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "annotation_metadata": { + "hgvs_g": hgvs_g, + "hgvs_c": hgvs_c, + "hgvs_p": hgvs_p, + }, + }, + current=True, + ) + populated_count += 1 + + annotation_manager.flush() + job_manager.db.flush() + + job_manager.save_to_context( + { + "populated_count": populated_count, + "skipped_count": skipped_count, + "failed_count": failed_count, + } + ) + logger.info( + msg=f"Completed mapped HGVS population: {populated_count} populated, {skipped_count} skipped, {failed_count} failed.", + extra=job_manager.logging_context(), + ) + + if failed_count > 0 and populated_count == 0: + log_and_send_slack_message( + f"All {failed_count} variants failed HGVS population for score set {score_set.urn}. Possible ClinGen API outage.", + job_manager.logging_context(), + logging.ERROR, + ) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "populated_count": populated_count, + "skipped_count": skipped_count, + "failed_count": failed_count, + } + ) diff --git a/src/mavedb/worker/jobs/external_services/py.typed b/src/mavedb/worker/jobs/external_services/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py new file mode 100644 index 000000000..e16984292 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -0,0 +1,343 @@ +"""UniProt ID mapping jobs for protein sequence annotation. + +This module handles the submission and polling of UniProt ID mapping jobs +to enrich target gene metadata with UniProt identifiers. This enables +linking of genomic variants to protein-level functional information. + +The mapping process is asynchronous, requiring both submission and polling +jobs to handle the UniProt API's batch processing workflow. +""" + +import logging +from typing import Optional, TypedDict + +from sqlalchemy import select +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI +from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession +from mavedb.models.enums.job_pipeline import FailureCategory +from mavedb.models.job_dependency import JobDependency +from mavedb.models.score_set import ScoreSet +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +class MappingJob(TypedDict): + job_id: Optional[str] + accession: str + + +@with_pipeline_management +async def submit_uniprot_mapping_jobs_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: + """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. + + NOTE: This function assumes that a dependent polling job has already been created + for the same ScoreSet. It is the responsibility of this function to ensure that + the polling job exists and to set the `mapping_jobs` parameter on the polling job. + + Without running the polling job, the results of the submitted UniProt mapping jobs + will never be retrieved or processed, so running this function alone is insufficient + to complete the UniProt mapping workflow. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing target genes to map. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job being executed. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Submits UniProt ID mapping jobs for each target gene in the ScoreSet. + - Fetches the dependent job for this function, which is the polling job for UniProt results. + Sets the parameter `mapping_jobs` on the polling job with a dictionary of target gene IDs to UniProt job IDs. + TODO#646: Split mapping jobs into one per target gene so that polling can be more granular. + + Raises: + - UniProtPollingEnqueueError: If the dependent polling job cannot be found. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_uniprot_mapping_jobs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting UniProt mapping job submission.") + logger.info(msg="Started UniProt mapping job submission", extra=job_manager.logging_context()) + + # Preset submitted jobs metadata so it persists even if no jobs are submitted. + job.metadata_["submitted_jobs"] = {} + job_manager.db.flush() + + if not score_set.target_genes: + logger.error( + msg=f"No target genes found for score set {score_set.urn}. Skipped UniProt mapping job submission.", + extra=job_manager.logging_context(), + ) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"jobs_submitted": 0}) + + uniprot_api = UniProtIDMappingAPI() + job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) + + mapping_jobs: dict[str, MappingJob] = {} + for idx, target_gene in enumerate(score_set.target_genes): + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + logger.warning( + msg=f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped mapping this target.", + extra=job_manager.logging_context(), + ) + continue + + if len(acs) != 1: + logger.warning( + msg=f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped mapping this target.", + extra=job_manager.logging_context(), + ) + continue + + ac_to_map = acs[0] + from_db = infer_db_name_from_sequence_accession(ac_to_map) + spawned_job = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore + + # Explicitly cast ints to strs in mapping job keys. These are converted to strings internally + # by SQLAlchemy when storing job_params as JSON, so be explicit here to avoid confusion. + mapping_jobs[str(target_gene.id)] = {"job_id": spawned_job, "accession": ac_to_map} + + job_manager.save_to_context( + { + "submitted_uniprot_mapping_jobs": { + **job_manager.logging_context().get("submitted_uniprot_mapping_jobs", {}), + str(target_gene.id): mapping_jobs[str(target_gene.id)], + } + } + ) + job_manager.update_progress( + int((idx + 1 / len(score_set.target_genes)) * 95), + 100, + f"Submitted UniProt mapping job for target gene {target_gene.name}.", + ) + logger.info( + msg=f"Submitted UniProt ID mapping job for target gene {target_gene.id}.", + extra=job_manager.logging_context(), + ) + + # Save submitted jobs to job metadata for auditing purposes + job.metadata_["submitted_jobs"] = mapping_jobs + flag_modified(job, "metadata_") + + # If no mapping jobs were submitted, log and exit early. + if not mapping_jobs or not any((job_info["job_id"] for job_info in mapping_jobs.values())): + logger.warning(msg="No UniProt mapping jobs were submitted.", extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"jobs_submitted": 0}) + + # It's an essential responsibility of the submit job (when submissions exist) to ensure that the polling job exists. + dependent_polling_job = job_manager.db.scalars( + select(JobDependency).where(JobDependency.depends_on_job_id == job.id) + ).all() + if not dependent_polling_job or len(dependent_polling_job) != 1: + logger.error( + msg=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", + extra=job_manager.logging_context(), + ) + + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", + data={"jobs_submitted": len(mapping_jobs)}, + failure_category=FailureCategory.SYSTEM_ERROR, + ) + + # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. + polling_job = dependent_polling_job[0].job_run + polling_job.job_params = { + **(polling_job.job_params or {}), + "mapping_jobs": mapping_jobs, + } + + logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"jobs_submitted": len(mapping_jobs)}) + + +@with_pipeline_management +async def poll_uniprot_mapping_jobs_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: + """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing target genes to map. + - correlation_id (str): Correlation ID for tracing requests across services. + - mapping_jobs (dict): Dictionary of target gene IDs to UniProt job IDs. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job being processed. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Polls UniProt ID mapping jobs for each target gene in the ScoreSet. + - Updates target genes with mapped UniProt IDs in the database. + + TODO#646: Split mapping jobs into one per target gene so that polling can be more granular. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id", "mapping_jobs"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + mapping_jobs: dict[str, MappingJob] = job.job_params.get("mapping_jobs", {}) # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "poll_uniprot_mapping_jobs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting UniProt mapping job polling.") + logger.info(msg="Started UniProt mapping job polling", extra=job_manager.logging_context()) + + if not mapping_jobs or not any(mapping_jobs.values()): + logger.warning( + msg=f"No mapping jobs found in job parameters for polling UniProt mapping jobs for score set {score_set.urn}.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"genes_mapped": 0}) + + # Poll each mapping job and update target genes with UniProt IDs + uniprot_api = UniProtIDMappingAPI() + pending_jobs = [] + failed_genes: dict[str, str] = {} + for target_gene_id, mapping_job in mapping_jobs.items(): + mapping_job_id = mapping_job["job_id"] + + if not mapping_job_id: + logger.warning( + msg=f"No UniProt mapping job ID found for target gene ID {target_gene_id}. Skipped polling this job.", + extra=job_manager.logging_context(), + ) + continue + + # Check if the mapping job is ready + if not uniprot_api.check_id_mapping_results_ready(mapping_job_id): + logger.warning( + msg=f"Job {mapping_job_id} not ready. Will retry polling.", + extra=job_manager.logging_context(), + ) + pending_jobs.append(target_gene_id) + continue + + # Extract mapped UniProt IDs from results + results = uniprot_api.get_id_mapping_results(mapping_job_id) + mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) + mapped_ac = mapping_job["accession"] + + # Handle cases where no or ambiguous results are found. These are data quality issues + # for this specific gene — record the failure and continue mapping remaining genes. + if not mapped_ids: + msg = f"No UniProt ID found for accession {mapped_ac}." + logger.error(msg=msg, extra=job_manager.logging_context()) + failed_genes[target_gene_id] = f"no_results:{mapped_ac}" + continue + + if len(mapped_ids) != 1: + msg = f"Ambiguous UniProt ID mapping results for accession {mapped_ac}." + logger.error(msg=msg, extra=job_manager.logging_context()) + failed_genes[target_gene_id] = f"ambiguous_results:{mapped_ac}" + continue + + mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] + + # Update target gene with mapped UniProt ID + target_gene = next( + (tg for tg in score_set.target_genes if str(tg.id) == str(target_gene_id)), + None, + ) + if not target_gene: + msg = f"Target gene ID {target_gene_id} not found in score set {score_set.urn}." + logger.error(msg=msg, extra=job_manager.logging_context()) + failed_genes[target_gene_id] = f"gene_not_found:{target_gene_id}" + continue + + target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id + job_manager.db.add(target_gene) + logger.info( + msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", + extra=job_manager.logging_context(), + ) + job_manager.update_progress( + int((list(score_set.target_genes).index(target_gene) + 1) / len(score_set.target_genes) * 95), + 100, + f"Polled UniProt mapping job for target gene {target_gene.name}.", + ) + + # If any polling jobs are still pending, signal for retry via a retryable failure category. + # The decorator will evaluate should_retry() and re-enqueue if retries remain. This is a little hacky, + # but it allows us to avoid raising exceptions for expected cases where UniProt results aren't ready yet. + # A future version of this workflow could be improved by leveraging the _defer_by functionality in ARQ. + if pending_jobs: + logger.info( + msg=f"UniProt results not ready for target gene(s) {pending_jobs}. Requesting retry.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=f"UniProt results not ready for {len(pending_jobs)} target gene(s). Will retry.", + data={"pending_target_genes": pending_jobs}, + failure_category=FailureCategory.SERVICE_UNAVAILABLE, + ) + + if failed_genes: + logger.warning( + msg=f"UniProt mapping failed for {len(failed_genes)} target gene(s): {failed_genes}", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=f"UniProt mapping failed for {len(failed_genes)} target gene(s).", + data={"failed_genes": failed_genes, "genes_mapped": len(mapping_jobs) - len(failed_genes)}, + failure_category=FailureCategory.DATA_ERROR, + ) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"genes_mapped": len(mapping_jobs)}) diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py new file mode 100644 index 000000000..ddec4b731 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -0,0 +1,371 @@ +"""ClinGen allele variant translation jobs for mapping PA<->CA allele relationships. + +This module populates the variant_translations table with relationships between +protein allele (PA) and nucleotide allele (CA) ClinGen IDs. For CA alleles, it +looks up MANE canonical PA IDs and their matching registered transcript CA IDs. +For PA alleles, it looks up matching registered transcript CA IDs directly. +""" + +import logging + +import requests +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.clingen.allele_registry import ( + expand_allele_ids, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.lib.variant_translations import upsert_variant_translations +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, FailureCategory +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def populate_variant_translations_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: + """Populate variant translations (PA<->CA relationships) for a score set. + + Queries the ClinGen Allele Registry to discover relationships between protein + allele (PA) and nucleotide allele (CA) ClinGen IDs, then stores them in the + variant_translations table. Each unique allele ID is processed once even if + shared across multiple mapped variants. + + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking + """ + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "populate_variant_translations_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant translation population.") + logger.info(msg="Started variant translation population.", extra=job_manager.logging_context()) + + # Fetch all current mapped variants with their ClinGen allele IDs + variant_rows = job_manager.db.execute( + select(Variant.id, MappedVariant.clingen_allele_id) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .join(ScoreSet, Variant.score_set_id == ScoreSet.id) + .where(ScoreSet.id == score_set.id) + .where(MappedVariant.current.is_(True)) + ).all() + + if not variant_rows: + logger.warning( + msg="No current mapped variants found for this score set.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={"translations_created": 0, "alleles_skipped": 0, "alleles_failed": 0} + ) + + # Deduplicate: multiple mapped variants can share the same allele ID, but we only + # need to query the ClinGen API once per unique ID. Track which variants map to each + # allele so we can record annotations for all of them after a single lookup. + allele_to_variants: dict[str, list[int]] = {} + for variant_id, clingen_allele_id in variant_rows: + if not clingen_allele_id: + continue + + for individual_id in expand_allele_ids([clingen_allele_id]): + allele_to_variants.setdefault(individual_id, []).append(variant_id) + + unique_allele_ids = list(allele_to_variants.keys()) + total_alleles = len(unique_allele_ids) + job_manager.save_to_context({"total_variants": len(variant_rows), "unique_allele_ids": total_alleles}) + + if not unique_allele_ids: + logger.warning( + msg="No ClinGen allele IDs found on mapped variants.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={"translations_created": 0, "alleles_skipped": 0, "alleles_failed": 0} + ) + + job_manager.update_progress(5, 100, f"Processing {total_alleles} unique allele IDs for variant translations.") + logger.info( + "Processing %s unique allele IDs for variant translations.", + total_alleles, + extra=job_manager.logging_context(), + ) + + total_created = 0 + total_skipped = 0 + total_failed = 0 + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) + + for index, allele_id in enumerate(unique_allele_ids): + if total_alleles > 0 and index % max(total_alleles // 20, 1) == 0: + progress = 5 + int((index / total_alleles) * 90) + job_manager.update_progress(progress, 100, f"Processing allele {index + 1}/{total_alleles}.") + logger.info( + "Processing allele %s/%s: %s", + index + 1, + total_alleles, + allele_id, + extra=job_manager.logging_context(), + ) + + job_manager.save_to_context( + { + "current_allele_id": allele_id, + "progress_index": index, + } + ) + + variant_ids = allele_to_variants[allele_id] + + if allele_id.startswith("CA"): + # CA (nucleotide) alleles: look up the MANE canonical protein alleles (PAs) for + # this CA, then for each PA discover all registered transcript-level CAs. This + # CA -> PA -> CA expansion builds the full translation graph so we can link + # nucleotide variants to their protein equivalents and vice versa. + try: + canonical_pa_ids = await get_canonical_pa_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + "ClinGen API request failed for canonical PA lookup of %s.", + allele_id, + extra=job_manager.logging_context(), + exc_info=exc, + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, + annotation_data={ + "error_message": f"ClinGen API error looking up PA IDs for {allele_id}: {exc}", + }, + current=True, + ) + total_failed += len(variant_ids) + continue + + if not canonical_pa_ids: + # Noncoding variants won't have protein alleles — this is expected and not an error. + logger.debug( + "No canonical PA IDs found for %s (may be noncoding).", + allele_id, + extra=job_manager.logging_context(), + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, + annotation_data={ + "error_message": f"No canonical PA IDs for {allele_id}.", + }, + current=True, + ) + total_skipped += len(variant_ids) + continue + + created = 0 + failed = 0 + translation_pairs: set[tuple[str, str]] = set() + for pa_id in canonical_pa_ids: + # Record the direct PA <-> original CA relationship. + translation_pairs.add((pa_id, allele_id)) + + # Then expand: find all other CAs registered under this PA so we capture + # alternate transcript-level representations of the same protein change. + try: + ca_ids = await get_matching_registered_ca_ids(pa_id) + except requests.exceptions.RequestException as exc: + logger.error( + "ClinGen API request failed for registered CA lookup of %s.", + pa_id, + extra=job_manager.logging_context(), + exc_info=exc, + ) + failed += 1 + continue + + for ca_id in ca_ids: + translation_pairs.add((pa_id, ca_id)) + + created, existing = upsert_variant_translations(job_manager.db, list(translation_pairs)) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.FAILED if failed > 0 else AnnotationStatus.SUCCESS, + annotation_data={ + "annotation_metadata": { + "allele_id": allele_id, + "translation_pairs": [[pa, ca] for pa, ca in translation_pairs], + "translations_new": created, + "translations_existing": existing, + "pa_lookups_failed": failed, + "pa_lookups_total": len(canonical_pa_ids), + }, + }, + current=True, + ) + + total_created += created + total_failed += failed + + elif allele_id.startswith("PA"): + # PA (protein) alleles: directly look up all registered transcript-level CAs. + # This is simpler than the CA path since we already have the protein allele. + try: + ca_ids = await get_matching_registered_ca_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + "ClinGen API request failed for registered CA lookup of %s.", + allele_id, + extra=job_manager.logging_context(), + exc_info=exc, + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, + annotation_data={ + "error_message": f"ClinGen API error for {allele_id}: {exc}", + }, + current=True, + ) + total_failed += len(variant_ids) + continue + + if not ca_ids: + logger.warning( + "No matching registered transcript CA IDs for PA allele %s. This is unexpected.", + allele_id, + extra=job_manager.logging_context(), + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, + annotation_data={ + "error_message": f"No registered transcript CA IDs for {allele_id}.", + }, + current=True, + ) + total_skipped += len(variant_ids) + continue + + translation_pairs = set([(allele_id, ca_id) for ca_id in ca_ids]) + created, existing = upsert_variant_translations(job_manager.db, list(translation_pairs)) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "annotation_metadata": { + "allele_id": allele_id, + "translation_pairs": [[pa, ca] for pa, ca in translation_pairs], + "translations_new": created, + "translations_existing": existing, + }, + }, + current=True, + ) + + total_created += created + + else: + logger.warning( + "Unrecognized ClinGen allele ID format: %s. Skipping.", + allele_id, + extra=job_manager.logging_context(), + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, + annotation_data={ + "error_message": f"Unrecognized allele ID format: {allele_id}", + }, + current=True, + ) + total_skipped += len(variant_ids) + + annotation_manager.flush() + job_manager.db.flush() + + job_manager.save_to_context( + { + "translations_created": total_created, + "alleles_skipped": total_skipped, + "alleles_failed": total_failed, + } + ) + logger.info( + "Completed variant translation population: %s created, %s skipped, %s failed.", + total_created, + total_skipped, + total_failed, + extra=job_manager.logging_context(), + ) + + if total_failed > 0 and total_created == 0: + error_message = f"All {total_failed} variant translation lookups failed for score set {score_set.urn}. Possible ClinGen API outage." + logger.error(error_message, extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=error_message, + data={ + "translations_created": 0, + "alleles_skipped": total_skipped, + "alleles_failed": total_failed, + }, + failure_category=FailureCategory.DEPENDENCY_FAILURE, + ) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "translations_created": total_created, + "alleles_skipped": total_skipped, + "alleles_failed": total_failed, + } + ) diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py new file mode 100644 index 000000000..f6eac2ab2 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -0,0 +1,450 @@ +"""VEP functional consequence jobs for variant effect prediction. + +This module handles the submission and processing of variant effect predictions +using the Ensembl VEP API. + +The processing is asynchronous, requiring batch submission of HGVS strings +to the VEP API with fallback to Variant Recoder when necessary. +""" + +import asyncio +import logging +from datetime import date + +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.lib.utils import batched +from mavedb.lib.vep import VEP_CONSEQUENCES, get_functional_consequence, run_variant_recoder +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + +_VEP_BATCH_SIZE = 200 +_RECODER_BATCH_SIZE = 25 +_RECODER_CONCURRENCY = 5 + + +@with_pipeline_management +async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Populate VEP functional consequence predictions for all mapped variants in a ScoreSet. + + This function retrieves all mapped variants with a populated hgvs_assay_level field for a given + ScoreSet and submits them to the Ensembl VEP API in configurable batches. It handles fallback + to the Variant Recoder API for variants that cannot be processed by VEP directly. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing mapped variants. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job being executed. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Returns: + JobExecutionOutcome: Outcome with counts of processed, successful, and failed variants. + """ + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Safely ignore mypy warnings here, as params were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "populate_vep_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting VEP population.") + logger.info(msg="Started VEP population", extra=job_manager.logging_context()) + + mapped_variants = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == score_set.id, + MappedVariant.current.is_(True), + MappedVariant.post_mapped.isnot(None), + ) + ).all() + + if not mapped_variants: + logger.warning( + msg=f"No mapped variants found for score set {score_set.urn}. Skipped VEP population.", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "variants_processed": 0, + "variants_with_consequences": 0, + "variants_without_consequences": 0, + "variants_recoder_failed": 0, + } + ) + + job_manager.save_to_context({"total_variants_to_process": len(mapped_variants)}) + logger.info( + msg=f"Found {len(mapped_variants)} mapped variants for VEP processing", + extra=job_manager.logging_context(), + ) + + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) + + mapped_variants_by_id = {mv.id: mv for mv in mapped_variants} + + # Extract HGVS strings; skip and annotate variants that have none. + hgvs_and_mapped_variant_id_pairs: list[tuple[str, int]] = [] + + for mapped_variant in mapped_variants: + if not mapped_variant.hgvs_assay_level: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.MISSING_IDENTIFIER, + annotation_data={"error_message": "Mapped variant does not have an associated HGVS string."}, + ) + logger.debug("Mapped variant does not have an associated HGVS string.", extra=job_manager.logging_context()) + continue + + hgvs_and_mapped_variant_id_pairs.append((mapped_variant.hgvs_assay_level, mapped_variant.id)) # type: ignore + + batches = list(batched(hgvs_and_mapped_variant_id_pairs, _VEP_BATCH_SIZE)) + + job_manager.save_to_context({"vep_batches": len(batches)}) + logger.debug( + msg=f"Prepared {len(batches)} VEP batches ({_VEP_BATCH_SIZE} variants/batch)", + extra=job_manager.logging_context(), + ) + + # --- Phase 1: Initial VEP pass --- + all_consequences: dict[str, str | None] = {} + all_missing_hgvs: set[str] = set() + missing_hgvs_to_variant_ids: dict[str, list[int]] = {} + + for batch_idx, batch in enumerate(batches): + try: + logger.debug( + msg=f"Processing VEP batch {batch_idx + 1}/{len(batches)}", + extra=job_manager.logging_context(), + ) + + hgvs_strings, mapped_variant_ids = map(list, zip(*batch)) # type: ignore + + consequences = await get_functional_consequence(hgvs_strings) + logger.debug( + msg=f"Received consequences for {len(consequences)} variants in VEP batch {batch_idx + 1}", + extra=job_manager.logging_context(), + ) + + all_consequences.update(consequences) + + missing_hgvs = set(hgvs_strings) - set(consequences.keys()) + for hgvs, mapped_variant_id in zip(hgvs_strings, mapped_variant_ids): + if hgvs in missing_hgvs: + all_missing_hgvs.add(hgvs) + mv = mapped_variants_by_id[mapped_variant_id] + missing_hgvs_to_variant_ids.setdefault(hgvs, []).append(mv.variant_id) # type: ignore + + progress_pct = int((batch_idx + 1) / len(batches) * 33) + job_manager.update_progress( + progress_pct, + 100, + f"Processed initial VEP batch {batch_idx + 1}/{len(batches)}", + ) + job_manager.save_to_context( + { + "initial_vep_batches_processed": batch_idx + 1, + "missing_hgvs_count": len(all_missing_hgvs), + } + ) + + except Exception as e: + logger.error( + msg=f"VEP processing error for batch {batch_idx + 1}: {str(e)}", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.errored( + exception=e, + data={ + "initial_vep_batches_processed": batch_idx + 1, + "variant_recoder_batches_processed": 0, + "missing_hgvs_count": len(all_missing_hgvs), + }, + ) + + logger.info( + msg=f"Completed initial VEP processing. {len(all_missing_hgvs)} variants require Variant Recoder fallback.", + extra=job_manager.logging_context(), + ) + + # --- Phase 2: Variant Recoder fallback for HGVS strings VEP could not resolve --- + hgvs_to_genomic: dict[str, list[str]] = {} + recoder_missing_hgvs: set[str] = set() + + if all_missing_hgvs: + logger.info( + msg=f"Running Variant Recoder for {len(all_missing_hgvs)} HGVS strings", + extra=job_manager.logging_context(), + ) + + recoder_batch_list = list(batched(list(all_missing_hgvs), _RECODER_BATCH_SIZE)) + + logger.debug( + msg=f"Running {len(recoder_batch_list)} Variant Recoder batches with concurrency {_RECODER_CONCURRENCY}", + extra=job_manager.logging_context(), + ) + + semaphore = asyncio.Semaphore(_RECODER_CONCURRENCY) + + async def _recoder_with_semaphore(batch: list[str], batch_idx: int, total: int) -> dict[str, list[str]]: + async with semaphore: + logger.debug( + msg=f"Starting Variant Recoder batch {batch_idx + 1}/{total} ({len(batch)} HGVS strings)", + extra=job_manager.logging_context(), + ) + result = await run_variant_recoder(batch) + logger.debug( + msg=f"Completed Variant Recoder batch {batch_idx + 1}/{total} ({len(result)} variants recoded)", + extra=job_manager.logging_context(), + ) + return result + + total_recoder_batches = len(recoder_batch_list) + recoder_results = await asyncio.gather( + *[ + _recoder_with_semaphore(list(recoder_batch), idx, total_recoder_batches) + for idx, recoder_batch in enumerate(recoder_batch_list) + ], + return_exceptions=True, + ) + + successful_batches = sum(1 for r in recoder_results if not isinstance(r, Exception)) + + first_exception = next((r for r in recoder_results if isinstance(r, Exception)), None) + if first_exception is not None: + logger.error( + msg=f"Variant Recoder error ({successful_batches}/{total_recoder_batches} batches succeeded): {str(first_exception)}", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.errored( + exception=first_exception, + data={ + "initial_vep_batches_processed": len(batches), + "variant_recoder_batches_processed": successful_batches, + "missing_hgvs_count": len(all_missing_hgvs), + }, + ) + + for result in recoder_results: + hgvs_to_genomic.update(result) # type: ignore[arg-type] + + job_manager.save_to_context( + { + "variant_recoder_batches_processed": len(recoder_batch_list), + "recoded_variants_count": len(hgvs_to_genomic), + } + ) + job_manager.update_progress( + 66, + 100, + f"Completed Variant Recoder for {len(recoder_batch_list)} batches ({len(hgvs_to_genomic)} variants recoded)", + ) + logger.info( + msg=f"Completed Variant Recoder processing. {len(hgvs_to_genomic)} variants successfully recoded.", + extra=job_manager.logging_context(), + ) + + # --- Phase 3: VEP pass on the recoded genomic HGVS strings --- + recoded_vep_batch_list = list(batched(list(hgvs_to_genomic.values()), _VEP_BATCH_SIZE)) + all_recoded_consequences: dict[str, str | None] = {} + + for recoded_vep_batch_idx, recoded_vep_batch in enumerate(recoded_vep_batch_list): + try: + logger.debug( + msg=f"Processing recoded HGVS VEP batch {recoded_vep_batch_idx + 1}/{len(recoded_vep_batch_list)}", + extra=job_manager.logging_context(), + ) + + recoded_vep_consequences = await get_functional_consequence(recoded_vep_batch) + all_recoded_consequences.update(recoded_vep_consequences) + + progress_pct = 66 + int((recoded_vep_batch_idx + 1) / len(recoded_vep_batch_list) * 33) + job_manager.update_progress( + progress_pct, + 100, + f"Processed recoded VEP batch {recoded_vep_batch_idx + 1}/{len(recoded_vep_batch_list)}", + ) + job_manager.save_to_context( + { + "recoded_vep_batches_processed": recoded_vep_batch_idx + 1, + "recoded_consequences_count": len(all_recoded_consequences), + } + ) + + except Exception as e: + logger.error( + msg=f"VEP processing error for recoded batch {recoded_vep_batch_idx + 1}: {str(e)}", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.errored( + exception=e, + data={ + "initial_vep_batches_processed": len(batches), + "variant_recoder_batches_processed": len(recoder_batch_list), + "recoded_vep_batches_processed": recoded_vep_batch_idx + 1, + "missing_hgvs_count": len(all_missing_hgvs), + }, + ) + + logger.info( + msg=f"Completed recoded VEP processing. {len(all_recoded_consequences)} recoded consequences retrieved.", + extra=job_manager.logging_context(), + ) + + # Map most-severe consequence from recoded genomic HGVS back to the original HGVS. + for original_hgvs, recoded_hgvs_list in hgvs_to_genomic.items(): + recoded_consequences_for_variant = [ + c for recoded_hgvs in recoded_hgvs_list if (c := all_recoded_consequences.get(recoded_hgvs)) + ] + + if recoded_consequences_for_variant: + most_severe = next( + (c for c in VEP_CONSEQUENCES if c in recoded_consequences_for_variant), + None, + ) + if most_severe: + all_consequences[original_hgvs] = most_severe + logger.debug( + msg=f"Selected most severe consequence '{most_severe}' for {original_hgvs}", + extra=job_manager.logging_context(), + ) + else: + logger.debug( + msg=f"Could not retrieve functional consequences for any recoded variants of {original_hgvs}", + extra=job_manager.logging_context(), + ) + + recoder_missing_hgvs = all_missing_hgvs - set(hgvs_to_genomic.keys()) + + # --- Phase 4: Annotate outcomes and update mapped variants in a single pass --- + + # HGVS strings that went through both VEP passes but still have no consequence. + all_processed_hgvs = {h for h, _ in hgvs_and_mapped_variant_id_pairs} + vep_failed_hgvs = all_processed_hgvs - set(all_consequences.keys()) - recoder_missing_hgvs + + variants_processed = 0 + variants_with_consequences = 0 + variants_without_consequences = 0 + variants_recoder_failed = 0 + + for hgvs_string, mapped_variant_id in hgvs_and_mapped_variant_id_pairs: + mapped_variant = mapped_variants_by_id.get(mapped_variant_id) # type: ignore + if mapped_variant is None: + continue + + consequence = all_consequences.get(hgvs_string) + if consequence: + mapped_variant.vep_functional_consequence = consequence + mapped_variant.vep_access_date = date.today() + job_manager.db.add(mapped_variant) + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.SUCCESS, + annotation_data={"annotation_metadata": {"functional_consequence": consequence}}, + ) + variants_with_consequences += 1 + logger.debug( + msg=f"Set consequence '{consequence}' for mapped variant {mapped_variant_id} (HGVS: {hgvs_string})", + extra=job_manager.logging_context(), + ) + elif hgvs_string in vep_failed_hgvs: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, + annotation_data={ + "error_message": "VEP could not determine a functional consequence for this variant, even after Variant Recoder fallback.", + }, + ) + variants_without_consequences += 1 + logger.debug( + msg=f"Recorded VEP failure for mapped_variant_id {mapped_variant_id} (HGVS: {hgvs_string})", + extra=job_manager.logging_context(), + ) + elif hgvs_string in recoder_missing_hgvs: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, + annotation_data={ + "error_message": "Variant Recoder could not recode this HGVS string to a genomic equivalent.", + }, + ) + variants_recoder_failed += 1 + logger.debug( + msg=f"Recorded Variant Recoder failure for mapped_variant_id {mapped_variant_id} (HGVS: {hgvs_string})", + extra=job_manager.logging_context(), + ) + else: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.UNKNOWN, + annotation_data={ + "error_message": "Variant was not classified by any VEP outcome branch. This is a bug.", + }, + ) + variants_without_consequences += 1 + logger.warning( + msg=f"Unexpected state: mapped_variant_id {mapped_variant_id} (HGVS: {hgvs_string}) was not classified by any outcome branch.", + extra=job_manager.logging_context(), + ) + + variants_processed += 1 + + annotation_manager.flush() + job_manager.db.flush() + + job_manager.update_progress( + 100, + 100, + f"Completed VEP functional consequence prediction for {variants_with_consequences}/{variants_processed} variants.", + ) + logger.info( + msg=f"Completed VEP prediction: {variants_with_consequences} with consequences, {variants_without_consequences} without, {variants_recoder_failed} recoder failed", + extra=job_manager.logging_context(), + ) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "variants_processed": variants_processed, + "variants_with_consequences": variants_with_consequences, + "variants_without_consequences": variants_without_consequences, + "variants_recoder_failed": variants_recoder_failed, + } + ) diff --git a/src/mavedb/worker/jobs/pipeline_management/__init__.py b/src/mavedb/worker/jobs/pipeline_management/__init__.py new file mode 100644 index 000000000..95470f75e --- /dev/null +++ b/src/mavedb/worker/jobs/pipeline_management/__init__.py @@ -0,0 +1,12 @@ +""" +Pipeline management job entrypoints. + +This module exposes job functions for pipeline management, such as starting a pipeline. +Import job functions here and add them to __all__ for job discovery and import convenience. +""" + +from .start_pipeline import start_pipeline + +__all__ = [ + "start_pipeline", +] diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py new file mode 100644 index 000000000..a5b0bd40d --- /dev/null +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -0,0 +1,62 @@ +import logging + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import FailureCategory +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Start the pipeline associated with the given job. + + This job initializes and starts the pipeline execution process. + It sets up the necessary pipeline management context and triggers + the pipeline coordination. + + NOTE: This function requires a dedicated 'start_pipeline' job run record + in the database. This job run must be created prior to invoking this function + and should be associated with the pipeline to be started. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Initializes and starts the pipeline execution. + + Returns: + dict: Result indicating success and any exception details + """ + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "start_pipeline", + "resource": f"pipeline_for_job_{job_id}", + "correlation_id": None, + } + ) + job_manager.update_progress(0, 100, "Coordinating pipeline for the first time.") + logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) + + if not job_manager.pipeline_id: + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason="No pipeline associated with this job.", failure_category=FailureCategory.SYSTEM_ERROR + ) + + # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator + # will have started the pipeline for us already, but doesn't coordinate on start automatically. + redis = job_manager.redis or ctx["redis"] + pipeline_manager = PipelineManager(job_manager.db, redis, job_manager.pipeline_id) + await pipeline_manager.coordinate_pipeline() + + logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded(data={"pipeline_id": job_manager.pipeline_id}) diff --git a/src/mavedb/worker/jobs/py.typed b/src/mavedb/worker/jobs/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py new file mode 100644 index 000000000..94cd76e32 --- /dev/null +++ b/src/mavedb/worker/jobs/registry.py @@ -0,0 +1,206 @@ +"""Job registry for worker configuration. + +This module provides a centralized registry of all available worker jobs +as simple lists for ARQ worker configuration. +""" + +from datetime import timedelta +from typing import Callable, List + +from arq.cron import CronJob, cron + +from mavedb.lib.types.workflow import JobDefinition +from mavedb.models.enums.job_pipeline import JobType +from mavedb.worker.jobs.data_management import ( + refresh_materialized_views, + refresh_published_variants_view, +) +from mavedb.worker.jobs.external_services import ( + link_gnomad_variants, + poll_uniprot_mapping_jobs_for_score_set, + populate_hgvs_for_score_set, + populate_variant_translations_for_score_set, + populate_vep_for_score_set, + refresh_clinvar_controls, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, + submit_uniprot_mapping_jobs_for_score_set, + warm_clingen_cache, +) +from mavedb.worker.jobs.pipeline_management import start_pipeline +from mavedb.worker.jobs.system import cleanup_stalled_jobs +from mavedb.worker.jobs.variant_processing import ( + create_variants_for_score_set, + map_variants_for_score_set, +) + +# All job functions for ARQ worker +BACKGROUND_FUNCTIONS: List[Callable] = [ + # Variant processing jobs + create_variants_for_score_set, + map_variants_for_score_set, + # External service jobs + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, + warm_clingen_cache, + refresh_clinvar_controls, + submit_uniprot_mapping_jobs_for_score_set, + poll_uniprot_mapping_jobs_for_score_set, + link_gnomad_variants, + populate_hgvs_for_score_set, + populate_variant_translations_for_score_set, + populate_vep_for_score_set, + # Data management jobs + refresh_materialized_views, + refresh_published_variants_view, + # Pipeline management jobs + start_pipeline, + # System maintenance jobs + cleanup_stalled_jobs, +] + +# Cron job definitions for ARQ worker +BACKGROUND_CRONJOBS: List[CronJob] = [ + cron( + refresh_materialized_views, + name="refresh_all_materialized_views", + hour=20, + minute=0, + keep_result=timedelta(minutes=2).total_seconds(), + ), + cron( + cleanup_stalled_jobs, + name="cleanup_stalled_jobs_cron", + minute={15, 45}, # Run at :15 and :45 past each hour (every 30 minutes) + keep_result=timedelta(minutes=25).total_seconds(), + ), +] + + +STANDALONE_JOB_DEFINITIONS: dict[Callable, JobDefinition] = { + create_variants_for_score_set: { + "dependencies": [], + "params": { + "score_set_id": None, + "updater_id": None, + "correlation_id": None, + "scores_file_key": None, + "counts_file_key": None, + "score_columns_metadata": None, + "count_columns_metadata": None, + }, + "function": "create_variants_for_score_set", + "key": "create_variants_for_score_set", + "type": JobType.VARIANT_CREATION, + }, + map_variants_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "updater_id": None, "correlation_id": None}, + "function": "map_variants_for_score_set", + "key": "map_variants_for_score_set", + "type": JobType.VARIANT_MAPPING, + }, + submit_score_set_mappings_to_car: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_score_set_mappings_to_car", + "key": "submit_score_set_mappings_to_car", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + submit_score_set_mappings_to_ldh: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_score_set_mappings_to_ldh", + "key": "submit_score_set_mappings_to_ldh", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + warm_clingen_cache: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "warm_clingen_cache", + "key": "warm_clingen_cache", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + refresh_clinvar_controls: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "refresh_clinvar_controls", + "key": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + submit_uniprot_mapping_jobs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_uniprot_mapping_jobs_for_score_set", + "key": "submit_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + poll_uniprot_mapping_jobs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "poll_uniprot_mapping_jobs_for_score_set", + "key": "poll_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + link_gnomad_variants: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "link_gnomad_variants", + "key": "link_gnomad_variants", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + populate_hgvs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_hgvs_for_score_set", + "key": "populate_hgvs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + populate_variant_translations_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_variant_translations_for_score_set", + "key": "populate_variant_translations_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + populate_vep_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_vep_for_score_set", + "key": "populate_vep_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + refresh_materialized_views: { + "dependencies": [], + "params": {"correlation_id": None}, + "function": "refresh_materialized_views", + "key": "refresh_materialized_views", + "type": JobType.DATA_MANAGEMENT, + }, + refresh_published_variants_view: { + "dependencies": [], + "params": {"correlation_id": None}, + "function": "refresh_published_variants_view", + "key": "refresh_published_variants_view", + "type": JobType.DATA_MANAGEMENT, + }, + cleanup_stalled_jobs: { + "dependencies": [], + "params": {"correlation_id": None}, + "function": "cleanup_stalled_jobs", + "key": "cleanup_stalled_jobs", + "type": JobType.SYSTEM_MAINTENANCE, + }, +} +""" +Standalone job definitions for direct job submission outside of pipelines. +All job definitions in this dict must correspond to a job function in BACKGROUND_FUNCTIONS +and must not have any dependencies on other jobs. +""" + + +__all__ = [ + "BACKGROUND_FUNCTIONS", + "BACKGROUND_CRONJOBS", + "STANDALONE_JOB_DEFINITIONS", +] diff --git a/src/mavedb/worker/jobs/system/__init__.py b/src/mavedb/worker/jobs/system/__init__.py new file mode 100644 index 000000000..dff693db1 --- /dev/null +++ b/src/mavedb/worker/jobs/system/__init__.py @@ -0,0 +1,9 @@ +"""System maintenance jobs for worker health and job lifecycle management. + +This package contains jobs that maintain the worker system itself, including: +- cleanup_stalled_jobs: Periodic cleanup of zombie/stalled jobs +""" + +from mavedb.worker.jobs.system.cleanup import cleanup_stalled_jobs + +__all__ = ["cleanup_stalled_jobs"] diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py new file mode 100644 index 000000000..653404318 --- /dev/null +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -0,0 +1,471 @@ +"""Periodic cleanup job for detecting and handling stalled/zombie jobs. + +This module provides a janitor job that runs periodically to find jobs that have +been stuck in intermediate states (QUEUED, RUNNING, PENDING) beyond reasonable +timeouts and handles them appropriately. + +Jobs can get stuck due to: +- Worker crashes during execution +- Race conditions during enqueue (process crash between state change and ARQ enqueue) +- Network issues preventing state updates +- Database deadlocks or transaction failures + +The cleanup job acts as a safety net to ensure jobs don't remain in limbo forever. +""" + +import logging +from datetime import datetime, timedelta, timezone + +from arq import ArqRedis +from arq.jobs import Job as ArqJob +from arq.jobs import JobStatus as ArqJobStatus +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.slack import send_slack_error, send_slack_job_failure +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.constants import ACTIVE_JOB_STATUSES, TERMINAL_PIPELINE_STATUSES +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.utils import arq_job_id + +logger = logging.getLogger(__name__) + +# Timeout thresholds for detecting stalled jobs (in minutes). +# RUNNING_TIMEOUT_MINUTES must stay below ArqWorkerSettings.job_timeout (currently 2 hours) +# to avoid marking legitimately running jobs as stalled. +RUNNING_TIMEOUT_MINUTES = 90 # RUNNING jobs should complete within 90 min (30 min buffer under ARQ timeout) +PENDING_TIMEOUT_MINUTES = 5 # PENDING jobs which are actionable within pipelines should be enqueued within 5 minutes +PIPELINE_STUCK_TIMEOUT_MINUTES = ( + 5 # Pipelines in non-terminal states with no active jobs should resolve within 5 minutes +) + + +async def _handle_stalled_job_retry( + job: JobRun, + manager: JobManager, + redis: ArqRedis, + stall_reason: str, + db: Session, +) -> bool: + """Handle retry and enqueue for a stalled job. + + For pipeline jobs, the dependency state determines the recovery path before + any retry bookkeeping occurs: + + - Unfulfillable dependency (terminal failure/cancel): skip directly without + consuming retry budget — the job can never run regardless of retries. + - Dependency not yet met (still running/pending): fail+retry back to PENDING + so the pipeline manager will enqueue it once the dependency completes. + - Dependency satisfied (or standalone job): fail+retry+enqueue via ARQ. + + Args: + job: The stalled job to handle + manager: JobManager for this job + redis: ARQ Redis connection + stall_reason: Human-readable reason for stalling + db: Database session + + Returns: + True if job was successfully handled, False if permanently failed + """ + # For pipeline jobs, decide the recovery path upfront based on dependency state. + # This keeps the three outcomes — skip, wait, enqueue — distinct and avoids + # consuming the retry budget for jobs that can never run. + if job.pipeline_id is not None: + pipeline_manager = PipelineManager(db, redis, job.pipeline_id) + + should_skip, skip_reason = pipeline_manager.should_skip_job_due_to_dependencies(job) + if should_skip: + # Dependency is permanently unsatisfiable — skip directly without fail/retry. + logger.info( + f"Skipping stalled pipeline job {job.urn} due to unsatisfiable dependencies: {skip_reason}", + extra=manager.logging_context(), + ) + manager.skip_job( + result=JobExecutionOutcome.skipped( + data={"reason": skip_reason, "timestamp": datetime.now().isoformat()} + ) + ) + return True + + if not pipeline_manager.can_enqueue_job(job): + # Dependencies exist but aren't terminal yet — retry back to PENDING and let + # the pipeline manager enqueue the job when the dependency completes. + logger.info( + f"Stalled pipeline job {job.urn} dependencies not yet met - leaving in PENDING for pipeline manager", + extra=manager.logging_context(), + ) + manager.fail_job( + result=JobExecutionOutcome.failed( + reason=stall_reason, data={"reason": stall_reason}, failure_category=FailureCategory.TIMEOUT + ), + ) + job.failure_category = FailureCategory.TIMEOUT + db.flush() + + if not manager.should_retry(): + job.failure_category = FailureCategory.SYSTEM_ERROR + db.flush() + logger.warning( + f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() + ) + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=stall_reason, + failure_category=str(FailureCategory.SYSTEM_ERROR), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) + return False + + await manager.prepare_retry(reason=stall_reason) + db.flush() + return True + + # Standalone job or pipeline job whose dependencies are satisfied — fail, retry, and enqueue. + manager.fail_job( + result=JobExecutionOutcome.failed( + reason=stall_reason, data={"reason": stall_reason}, failure_category=FailureCategory.TIMEOUT + ), + ) + job.failure_category = FailureCategory.TIMEOUT + db.flush() + + if not manager.should_retry(): + job.failure_category = FailureCategory.SYSTEM_ERROR + db.flush() + logger.warning( + f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() + ) + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=stall_reason, + failure_category=str(FailureCategory.SYSTEM_ERROR), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) + return False + + await manager.prepare_retry(reason=stall_reason) + db.flush() + + try: + manager.prepare_queue() # Transition to QUEUED + db.flush() + result = await redis.enqueue_job(job.job_function, job.id, _job_id=arq_job_id(job)) + + if result is None: + raise RuntimeError( + f"Failed to enqueue job {job.urn} when retrying stalled job - Redis did not return a job ID" + ) + + logger.info(f"Successfully retried and enqueued stalled job {job.urn}", extra=manager.logging_context()) + return True + + except Exception as e: + logger.error(f"Failed to enqueue stalled job {job.urn}: {e}", extra=manager.logging_context()) + error_msg = f"Failed to enqueue after stall recovery: {e}" + manager.fail_job( + result=JobExecutionOutcome.failed( + reason=error_msg, data={"reason": error_msg}, failure_category=FailureCategory.SYSTEM_ERROR + ), + ) + job.failure_category = FailureCategory.SYSTEM_ERROR # Enqueue failures during cleanup are not retryable + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=error_msg, + failure_category=str(FailureCategory.SYSTEM_ERROR), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) + return False + + +@with_guaranteed_job_run_record("cron_job") +@with_job_management +async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Detect and handle jobs that have stalled in intermediate states. + + This job runs periodically (every 15 minutes) to find jobs that have been + stuck in QUEUED, RUNNING, or PENDING states beyond reasonable timeouts + and handles them appropriately. + + Stalled job detection criteria: + - QUEUED: Present in DB as QUEUED but absent from ARQ's Redis queue + (process crashed between prepare_queue and redis.enqueue_job) + - RUNNING: Started > 60 minutes ago but not finished (worker likely crashed) + - PENDING: Created > 5 minutes ago in a pipeline and currently runnable + (coordination failure) + - Pipeline stuck: Non-terminal pipeline with no active jobs older than 5 minutes + (coordinate_pipeline() crashed before writing final status) + + Actions taken: + - If job has retries remaining: Mark PENDING for retry (will be re-enqueued by pipeline) + - If max retries reached: Mark FAILED with SYSTEM_ERROR category + + Args: + ctx: ARQ worker context containing database session and redis connection + job_id: ID of the current job run + job_manager: JobManager instance for managing the current job run + + Returns: + JobExecutionOutcome with counts of cleaned up jobs by state + + Example: + Job stalled in QUEUED (crash during enqueue): + - Job marked QUEUED but process crashed before ARQ enqueue + - After 10 minutes, janitor detects and retries (or fails if max retries reached) + + Job stalled in RUNNING (worker crash): + - Worker started job, marked it RUNNING, then crashed + - After 60 minutes (longer than ARQ timeout), janitor detects and retries + """ + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "cleanup_stalled_jobs", + "resource": "stalled_jobs", + "correlation_id": None, + "thresholds": { + "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, + "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, + }, + } + ) + job_manager.update_progress(0, 100, "Starting cleanup of stalled jobs.") + logger.debug(msg="Began cleanup of stalled jobs.", extra=job_manager.logging_context()) + + # To properly handle retries and state transitions, we need the Redis connection to enqueue retry jobs + assert job_manager.redis is not None, "Redis connection is required for cleanup_stalled_jobs" + + now = datetime.now(timezone.utc) + cleaned_jobs: dict[str, list[str]] = { + "queued": [], + "running": [], + "pending": [], + } + + # Find all QUEUED jobs that have never started. The Redis presence check below + # is the definitive stall gate: a job is only acted on if it is absent from + # ARQ's queue, meaning the process crashed after writing QUEUED to the DB but + # before calling redis.enqueue_job(). No time threshold is needed here. + queued_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.QUEUED, + JobRun.started_at.is_(None), # Never started + ) + ).all() + + job_manager.save_to_context({"stalled_queued_jobs_count": len(queued_jobs)}) + job_manager.update_progress(10, 100, f"Found {len(queued_jobs)} stalled QUEUED jobs to evaluate.") + logger.debug("Cleaning stalled QUEUED jobs.", extra=job_manager.logging_context()) + + for job in queued_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + elapsed_minutes = (now - job.created_at).total_seconds() / 60 + + # Confirm the job is genuinely missing from ARQ's Redis queue before acting. + # A healthy job waiting for a worker slot appears QUEUED in the DB and is also + # present in Redis; only a crashed-enqueue job has the DB state without the + # corresponding Redis entry. + arq_status = await ArqJob(arq_job_id(job), job_manager.redis).status() + if arq_status in (ArqJobStatus.queued, ArqJobStatus.in_progress, ArqJobStatus.deferred): + logger.debug( + f"QUEUED job {job.urn} is present in ARQ Redis (status={arq_status.value}); skipping cleanup", + extra=manager.logging_context(), + ) + continue + + logger.warning( + f"Detected stalled QUEUED job {job.urn} " + f"(created {job.created_at}, queued for {elapsed_minutes:.1f} minutes, " + f"absent from ARQ Redis)", + extra=manager.logging_context(), + ) + + # Use unified retry handler + stall_reason = f"Job stalled in QUEUED state for {elapsed_minutes:.1f} minutes" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["queued"].append(job.urn) + + job_manager.save_to_context({"cleaned_queued_jobs": queued_jobs}) + logger.debug("Completed cleaning stalled QUEUED jobs.", extra=job_manager.logging_context()) + + # Find RUNNING jobs that have been running too long OR have missing started_at + # These likely indicate worker crashes (worker died mid-execution) or data inconsistencies + running_threshold = now - timedelta(minutes=RUNNING_TIMEOUT_MINUTES) + running_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.RUNNING, + (JobRun.started_at < running_threshold) | (JobRun.started_at.is_(None)), + JobRun.finished_at.is_(None), + ) + ).all() + + job_manager.save_to_context({"stalled_running_jobs_count": len(running_jobs)}) + job_manager.update_progress(50, 100, f"Found {len(running_jobs)} stalled RUNNING jobs to evaluate.") + logger.debug("Cleaning stalled RUNNING jobs.", extra=job_manager.logging_context()) + + for job in running_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + if not job.started_at: + logger.error( + f"RUNNING job {job.urn} has no started_at timestamp, cannot evaluate for stalling", + extra=manager.logging_context(), + ) + send_slack_error( + f"Error in cleanup_stalled_jobs: RUNNING job {job.urn} has no started_at timestamp, cannot evaluate for stalling" + ) + continue + + elapsed_minutes = (now - job.started_at).total_seconds() / 60 + + logger.warning( + f"Detected stalled RUNNING job {job.urn} " + f"(started {job.started_at}, running for {elapsed_minutes:.1f} minutes)", + extra=manager.logging_context(), + ) + + stall_reason = f"Job stalled in RUNNING state for {elapsed_minutes:.1f} minutes (likely worker crash)" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["running"].append(job.urn) + + job_manager.save_to_context({"cleaned_running_jobs": running_jobs}) + logger.debug("Completed cleaning stalled RUNNING jobs.", extra=job_manager.logging_context()) + + # Find PENDING jobs that have been pending too long and should have moved on. + # For pipeline jobs, treat them as stalled when they are either ready to run + # now or permanently blocked by terminal dependency outcomes. Jobs waiting on + # non-terminal dependencies are still in a legitimate waiting state. + pending_threshold = now - timedelta(minutes=PENDING_TIMEOUT_MINUTES) + pending_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.PENDING, + JobRun.created_at < pending_threshold, + ) + ).all() + + stalled_pending_jobs: list[JobRun] = [] + for job in pending_jobs: + if job.pipeline_id is None: + stalled_pending_jobs.append(job) + continue + + pipeline_manager = PipelineManager(job_manager.db, job_manager.redis, job.pipeline_id) + should_skip, _ = pipeline_manager.should_skip_job_due_to_dependencies(job) + if pipeline_manager.can_enqueue_job(job) or should_skip: + stalled_pending_jobs.append(job) + + job_manager.save_to_context({"stalled_pending_jobs_count": len(stalled_pending_jobs)}) + job_manager.update_progress(80, 100, f"Found {len(stalled_pending_jobs)} stalled PENDING jobs to evaluate.") + logger.debug("Cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) + + for job in stalled_pending_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + elapsed_minutes = (now - job.created_at).total_seconds() / 60 + + logger.warning( + f"Detected stalled PENDING job {job.urn} " + f"(created {job.created_at}, pending for {elapsed_minutes:.1f} minutes)", + extra=manager.logging_context(), + ) + + stall_reason = f"Job stalled in PENDING state for {elapsed_minutes:.1f} minutes" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["pending"].append(job.urn) + + job_manager.save_to_context({"cleaned_pending_jobs": stalled_pending_jobs}) + logger.debug("Completed cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) + + # Find pipelines that are stuck in a non-terminal state but have no active jobs remaining. + # This happens when coordinate_pipeline() crashed or was never reached after all jobs + # finished, leaving the pipeline perpetually RUNNING or CREATED. + pipeline_stuck_threshold = now - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES) + stuck_pipelines = job_manager.db.scalars( + select(Pipeline).where( + Pipeline.status.notin_([s.value for s in TERMINAL_PIPELINE_STATUSES]), + Pipeline.created_at < pipeline_stuck_threshold, + ~Pipeline.job_runs.any(JobRun.status.in_([s.value for s in ACTIVE_JOB_STATUSES])), + ) + ).all() + + fixed_pipelines: list[str] = [] + job_manager.save_to_context({"stuck_pipelines_count": len(stuck_pipelines)}) + job_manager.update_progress(90, 100, f"Found {len(stuck_pipelines)} stuck pipelines to resolve.") + logger.debug("Resolving stuck pipelines.", extra=job_manager.logging_context()) + + for pipeline in stuck_pipelines: + elapsed_minutes = (now - pipeline.created_at).total_seconds() / 60 + logger.warning( + f"Detected stuck pipeline {pipeline.urn} in status {pipeline.status} " + f"(created {pipeline.created_at}, {elapsed_minutes:.1f} minutes ago, no active jobs)", + extra=job_manager.logging_context(), + ) + try: + pipeline_manager = PipelineManager(job_manager.db, job_manager.redis, pipeline.id) + await pipeline_manager.coordinate_pipeline() + job_manager.db.commit() + fixed_pipelines.append(pipeline.urn) + logger.info( + f"Resolved stuck pipeline {pipeline.urn}: status now {pipeline.status}", + extra=job_manager.logging_context(), + ) + except Exception as e: + job_manager.db.rollback() + logger.error( + f"Failed to resolve stuck pipeline {pipeline.urn}: {e}", + extra=job_manager.logging_context(), + ) + send_slack_error(e) + + job_manager.save_to_context({"fixed_pipelines": fixed_pipelines}) + logger.debug("Completed resolving stuck pipelines.", extra=job_manager.logging_context()) + + total_cleaned = sum(len(jobs) for jobs in cleaned_jobs.values()) + + if total_cleaned > 0: + logger.info( + f"Cleanup complete: {total_cleaned} stalled jobs handled - " + f"{len(cleaned_jobs['queued'])} queued, " + f"{len(cleaned_jobs['running'])} running, " + f"{len(cleaned_jobs['pending'])} pending; " + f"{len(fixed_pipelines)} stuck pipelines resolved", + extra=job_manager.logging_context(), + ) + else: + logger.debug("Cleanup complete: No stalled jobs found", extra=job_manager.logging_context()) + + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "total_cleaned": total_cleaned, + "queued_jobs": cleaned_jobs["queued"], + "running_jobs": cleaned_jobs["running"], + "pending_jobs": cleaned_jobs["pending"], + "fixed_pipelines": fixed_pipelines, + "timestamp": now.isoformat(), + "thresholds": { + "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, + "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, + "pipeline_stuck_timeout_minutes": PIPELINE_STUCK_TIMEOUT_MINUTES, + }, + } + ) diff --git a/src/mavedb/worker/jobs/system/py.typed b/src/mavedb/worker/jobs/system/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/utils/__init__.py b/src/mavedb/worker/jobs/utils/__init__.py new file mode 100644 index 000000000..4bdb3409e --- /dev/null +++ b/src/mavedb/worker/jobs/utils/__init__.py @@ -0,0 +1,28 @@ +"""Worker job utility functions and constants. + +This module provides shared utilities used across worker jobs: +- Job state management and context setup +- Retry logic with exponential backoff +- Configuration constants for queues and timeouts + +These utilities help ensure consistent behavior and error handling +across all worker job implementations. +""" + +from .constants import ( + ENQUEUE_BACKOFF_ATTEMPT_LIMIT, + LINKING_BACKOFF_IN_SECONDS, + MAPPING_BACKOFF_IN_SECONDS, + MAPPING_CURRENT_ID_NAME, + MAPPING_QUEUE_NAME, +) +from .setup import validate_job_params + +__all__ = [ + "validate_job_params", + "MAPPING_QUEUE_NAME", + "MAPPING_CURRENT_ID_NAME", + "MAPPING_BACKOFF_IN_SECONDS", + "LINKING_BACKOFF_IN_SECONDS", + "ENQUEUE_BACKOFF_ATTEMPT_LIMIT", +] diff --git a/src/mavedb/worker/jobs/utils/constants.py b/src/mavedb/worker/jobs/utils/constants.py new file mode 100644 index 000000000..cca5a02cc --- /dev/null +++ b/src/mavedb/worker/jobs/utils/constants.py @@ -0,0 +1,17 @@ +"""Constants used across worker jobs. + +This module centralizes configuration constants used by various worker jobs +including queue names, timeouts, and retry limits. This provides a single +source of truth for job configuration values. +""" + +### Mapping job constants +MAPPING_QUEUE_NAME = "vrs_mapping_queue" +MAPPING_CURRENT_ID_NAME = "vrs_mapping_current_job_id" +MAPPING_BACKOFF_IN_SECONDS = 15 + +### Linking job constants +LINKING_BACKOFF_IN_SECONDS = 15 * 60 + +### Backoff constants +ENQUEUE_BACKOFF_ATTEMPT_LIMIT = 5 diff --git a/src/mavedb/worker/jobs/utils/py.typed b/src/mavedb/worker/jobs/utils/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/utils/setup.py b/src/mavedb/worker/jobs/utils/setup.py new file mode 100644 index 000000000..b569bb0e9 --- /dev/null +++ b/src/mavedb/worker/jobs/utils/setup.py @@ -0,0 +1,24 @@ +"""Job state management utilities. + +This module provides utilities for managing job state and context across +the worker job lifecycle. It handles setup of logging context, correlation +IDs, and other state information needed for job traceability and monitoring. +""" + +import logging + +from mavedb.models.job_run import JobRun + +logger = logging.getLogger(__name__) + + +def validate_job_params(required_params: list[str], job: JobRun) -> None: + """ + Validate that the given job has all required parameters present in its job_params. + """ + if not job.job_params: + raise ValueError("Job has no job_params defined.") + + for param in required_params: + if param not in job.job_params: + raise ValueError(f"Missing required job param: {param}") diff --git a/src/mavedb/worker/jobs/variant_processing/__init__.py b/src/mavedb/worker/jobs/variant_processing/__init__.py new file mode 100644 index 000000000..a6df09753 --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/__init__.py @@ -0,0 +1,17 @@ +"""Variant processing job functions. + +This module exports jobs responsible for variant creation and mapping: +- Variant creation from uploaded score/count data +- VRS mapping to standardized genomic coordinates +- Queue management for mapping workflows +""" + +from .creation import create_variants_for_score_set +from .mapping import ( + map_variants_for_score_set, +) + +__all__ = [ + "create_variants_for_score_set", + "map_variants_for_score_set", +] diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py new file mode 100644 index 000000000..a519bda07 --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -0,0 +1,280 @@ +"""Variant creation jobs for score sets. + +This module contains jobs responsible for creating and validating variants +from uploaded score and count data. It handles the full variant creation +pipeline including data validation, standardization, and database persistence. +""" + +import io +import logging + +import pandas as pd +from sqlalchemy import delete, null, select + +from mavedb.data_providers.services import CSV_UPLOAD_S3_BUCKET_NAME, RESTDataProvider, s3_client +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.enums.job_pipeline import FailureCategory +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """ + Create variants for a given ScoreSet based on uploaded score and count data. + + Args: + ctx: The job context dictionary. + job_id: The ID of the job being executed. + job_manager: Manager for job lifecycle and DB operations. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet to create variants for. + - correlation_id (str): Correlation ID for tracing requests across services. + - updater_id (int): The ID of the user performing the update. + - scores_file_key (str): S3 key for the uploaded scores CSV file. + - counts_file_key (str): S3 key for the uploaded counts CSV file. + - score_columns_metadata (dict): Metadata for score columns. + - count_columns_metadata (dict): Metadata for count columns. + + Side Effects: + - Creates Variant and MappedVariant records in the database. + + Returns: + dict: Result indicating success and any exception details + """ + # Handle everything prior to score set fetch in an outer layer. Any issues prior to + # fetching the score set should fail the job outright and we will be unable to set + # a processing state on the score set itself. + logger.info(msg="Starting create_variants_for_score_set job", extra=job_manager.logging_context()) + hdp: RESTDataProvider = ctx["hdp"] + + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = [ + "score_set_id", + "correlation_id", + "updater_id", + "scores_file_key", + "counts_file_key", + "score_columns_metadata", + "count_columns_metadata", + ] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + + # Main processing block. Handled in a try/except to ensure we can set score set state appropriately, + # which is handled independently of the job state. + # TODO:647 In a future iteration, we should rely on the job manager itself for maintaining processing + # state for better cohesion. This try/except is redundant in it's duties with the job manager. + try: + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + score_file_key = job.job_params["scores_file_key"] # type: ignore + count_file_key = job.job_params["counts_file_key"] # type: ignore + score_columns_metadata = job.job_params["score_columns_metadata"] # type: ignore + count_columns_metadata = job.job_params["count_columns_metadata"] # type: ignore + + job_manager.save_to_context( + { + "score_set_id": score_set.id, + "updater_id": updater_id, + "correlation_id": correlation_id, + "score_file_key": score_file_key, + "count_file_key": count_file_key, + "bucket_name": CSV_UPLOAD_S3_BUCKET_NAME, + } + ) + logger.debug(msg="Fetching file resources from S3 for variant creation", extra=job_manager.logging_context()) + + s3 = s3_client() + scores = io.BytesIO() + s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=score_file_key, Fileobj=scores) + scores.seek(0) + scores_df = pd.read_csv(scores) + + # Counts file is optional + counts_df = None + if count_file_key: + counts = io.BytesIO() + s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=count_file_key, Fileobj=counts) + counts.seek(0) + counts_df = pd.read_csv(counts) + + logger.debug(msg="Successfully fetched file resources from S3", extra=job_manager.logging_context()) + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "create_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant creation job.") + logger.info(msg="Started variant creation job", extra=job_manager.logging_context()) + + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + + score_set.modified_by = updated_by + score_set.processing_state = ProcessingState.processing + score_set.mapping_state = MappingState.pending_variant_processing + + job_manager.save_to_context( + {"processing_state": score_set.processing_state.name, "mapping_state": score_set.mapping_state.name} + ) + + # Flush initial score set state + job_manager.db.add(score_set) + job_manager.db.flush() + job_manager.db.refresh(score_set) + + job_manager.update_progress(10, 100, "Validated score set metadata and beginning data validation.") + + if not score_set.target_genes: + logger.warning( + msg="No targets are associated with this score set; could not create variants.", + extra=job_manager.logging_context(), + ) + raise ValueError("Can't create variants when score set has no targets.") + + validated_scores, validated_counts, validated_score_columns_metadata, validated_count_columns_metadata = ( + validate_and_standardize_dataframe_pair( + scores_df=scores_df, + counts_df=counts_df, + score_columns_metadata=score_columns_metadata, + count_columns_metadata=count_columns_metadata, + targets=score_set.target_genes, + hdp=hdp, + ) + ) + + job_manager.update_progress(80, 100, "Data validation complete; creating variants in database.") + + score_set.dataset_columns = { + "score_columns": columns_for_dataset(validated_scores), + "count_columns": columns_for_dataset(validated_counts), + "score_columns_metadata": validated_score_columns_metadata + if validated_score_columns_metadata is not None + else {}, + "count_columns_metadata": validated_count_columns_metadata + if validated_count_columns_metadata is not None + else {}, + } + + # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. + if score_set.variants: + existing_variants = job_manager.db.scalars( + select(Variant.id).where(Variant.score_set_id == score_set.id) + ).all() + job_manager.db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) + job_manager.db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) + + job_manager.save_to_context({"deleted_variants": len(existing_variants)}) + score_set.num_variants = 0 + + logger.info(msg="Deleted existing variants from score set.", extra=job_manager.logging_context()) + + job_manager.db.flush() + job_manager.db.refresh(score_set) + + variants_data = create_variants_data(validated_scores, validated_counts, None) + create_variants(job_manager.db, score_set, variants_data) + + except ValidationError as e: + job_manager.db.rollback() + + score_set.processing_state = ProcessingState.failed + score_set.mapping_state = MappingState.not_attempted + score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} + if score_set.num_variants: + score_set.processing_errors["exception"] = ( + f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" + ) + + job_manager.save_to_context( + { + "processing_state": score_set.processing_state.name, + "mapping_state": score_set.mapping_state.name, + **format_raised_exception_info_as_dict(e), + "created_variants": 0, + } + ) + + # Persist score set state to survive any decorator rollback. + job_manager.db.add(score_set) + job_manager.db.commit() + + logger.error( + msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() + ) + + return JobExecutionOutcome.failed( + reason=str(e), data={"score_set_id": score_set.id}, failure_category=FailureCategory.VALIDATION_ERROR + ) + + except Exception as e: + job_manager.db.rollback() + + score_set.processing_state = ProcessingState.failed + score_set.mapping_state = MappingState.not_attempted + score_set.processing_errors = {"exception": str(e), "detail": []} + if score_set.num_variants: + score_set.processing_errors["exception"] = ( + f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" + ) + + job_manager.save_to_context( + { + "processing_state": score_set.processing_state.name, + "mapping_state": score_set.mapping_state.name, + **format_raised_exception_info_as_dict(e), + "created_variants": 0, + } + ) + + # Persist score set state to survive any decorator rollback. + job_manager.db.add(score_set) + job_manager.db.commit() + + logger.error( + msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() + ) + raise + + # Success path + score_set.processing_state = ProcessingState.success + score_set.mapping_state = MappingState.queued + score_set.processing_errors = null() + + job_manager.save_to_context( + { + "processing_state": score_set.processing_state.name, + "mapping_state": score_set.mapping_state.name, + "created_variants": score_set.num_variants, + } + ) + + job_manager.db.add(score_set) + job_manager.db.flush() + job_manager.db.refresh(score_set) + + logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) + return JobExecutionOutcome.succeeded(data={"score_set_id": score_set.id, "variant_count": score_set.num_variants}) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py new file mode 100644 index 000000000..ebeebe559 --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -0,0 +1,338 @@ +"""Variant mapping jobs using VRS (Variant Representation Specification). + +This module handles the mapping of variants to standardized genomic coordinates +using the VRS mapping service. It includes queue management, retry logic, +and coordination with downstream services like ClinGen and UniProt. +""" + +import asyncio +import functools +import logging +from datetime import date +from typing import Any + +from sqlalchemy import cast, null, select +from sqlalchemy.dialects.postgresql import JSONB + +from mavedb.data_providers.services import vrs_mapper +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.exceptions import ( + NonexistentMappingReferenceError, + NonexistentMappingResultsError, + NonexistentMappingScoresError, +) +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.mapping import ANNOTATION_LAYERS, EXCLUDED_PREMAPPED_ANNOTATION_KEYS +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, FailureCategory +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Map variants for a given score set using VRS.""" + # Handle everything prior to score set fetch in an outer layer. Any issues prior to + # fetching the score set should fail the job outright and we will be unable to set + # a processing state on the score set itself. + + job = job_manager.get_job() + + _job_required_params = [ + "score_set_id", + "correlation_id", + "updater_id", + ] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + + # Handle everything within try/except to persist appropriate mapping state + try: + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "map_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant mapping job.") + logger.info(msg="Started variant mapping job", extra=job_manager.logging_context()) + + # TODO#372: non-nullable URNs + if not score_set.urn: # pragma: no cover + raise ValueError("Score set URN is required for variant mapping.") + + # Setup score set state for mapping + score_set.mapping_state = MappingState.processing + score_set.mapping_errors = null() + score_set.modified_by = updated_by + score_set.modification_date = date.today() + + job_manager.db.add(score_set) + job_manager.db.flush() + + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + job_manager.update_progress(10, 100, "Score set prepared for variant mapping.") + logger.debug(msg="Score set prepared for variant mapping.", extra=job_manager.logging_context()) + + # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. + vrs = vrs_mapper() + blocking = functools.partial(vrs.map_score_set, score_set.urn) + loop = asyncio.get_running_loop() + + mapping_results = None + + logger.debug(msg="Mapping variants using VRS mapping service.", extra=job_manager.logging_context()) + job_manager.update_progress(30, 100, "Mapping variants using VRS mapping service.") + mapping_results = await loop.run_in_executor(ctx["pool"], blocking) + + logger.debug(msg="Done mapping variants.", extra=job_manager.logging_context()) + job_manager.update_progress(80, 100, "Processing mapped variants.") + + ## Check our assumptions about mapping results and handle errors appropriately. + + # Ensure we have mapping results + if not mapping_results: + raise NonexistentMappingResultsError("Mapping results were not returned from VRS mapping service.") + + # Ensure we have mapped scores + mapped_scores = mapping_results.get("mapped_scores") + if not mapped_scores: + internal_err = mapping_results.get( + "error_message", "No variants were mapped and no error message was provided." + ) + raise NonexistentMappingScoresError(internal_err) + + # Ensure we have reference metadata + reference_metadata = mapping_results.get("reference_sequences") + if not reference_metadata: + raise NonexistentMappingReferenceError("Reference metadata missing from mapping results.") + + # Process and store mapped variants + for target_gene_identifier in reference_metadata: + target_gene = next( + (target_gene for target_gene in score_set.target_genes if target_gene.name == target_gene_identifier), + None, + ) + + if not target_gene: + raise ValueError( + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." + ) + + job_manager.save_to_context({"processing_target_gene": target_gene.id}) + logger.debug(f"Processing target gene {target_gene.name}.", extra=job_manager.logging_context()) + + # allow for multiple annotation layers + pre_mapped_metadata: dict[str, Any] = {} + post_mapped_metadata: dict[str, Any] = {} + + # add gene-level info + gene_info = reference_metadata[target_gene_identifier].get("gene_info") + if gene_info: + target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") + post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") + + job_manager.save_to_context({"mapped_hgnc_name": target_gene.mapped_hgnc_name}) + logger.debug("Added mapped HGNC name to target gene.", extra=job_manager.logging_context()) + + # add annotation layer info + for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: + layer_premapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( + "computed_reference_sequence" + ) + if layer_premapped: + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { + k: layer_premapped[k] + for k in set(list(layer_premapped.keys())) - EXCLUDED_PREMAPPED_ANNOTATION_KEYS + } + job_manager.save_to_context({"pre_mapped_layer_exists": True}) + + layer_postmapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( + "mapped_reference_sequence" + ) + if layer_postmapped: + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped + job_manager.save_to_context({"post_mapped_layer_exists": True}) + + logger.debug( + f"Added annotation layer mapping metadata for {annotation_layer}.", + extra=job_manager.logging_context(), + ) + + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) + job_manager.db.add(target_gene) + logger.debug("Added mapping metadata to target gene.", extra=job_manager.logging_context()) + + total_variants = len(mapped_scores) + job_manager.save_to_context({"total_variants_to_process": total_variants}) + job_manager.update_progress(90, 100, "Saving mapped variants.") + + successful_mapped_variants = 0 + logger.info( + f"Processing {total_variants} mapped variants for score set {score_set.urn}.", + extra=job_manager.logging_context(), + ) + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job.id) + for mapped_score in mapped_scores: + variant_urn = mapped_score.get("mavedb_id") + variant = job_manager.db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() + + job_manager.save_to_context({"processing_variant": variant.id}) + logger.debug(f"Processing variant {variant.id}.", extra=job_manager.logging_context()) + + # there should only be one current mapped variant per variant id, so update old mapped variant to current = false + existing_mapped_variant = ( + job_manager.db.query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + + if existing_mapped_variant: + job_manager.save_to_context({"existing_mapped_variant": existing_mapped_variant.id}) + existing_mapped_variant.current = False + job_manager.db.add(existing_mapped_variant) + logger.debug(msg="Set existing mapped variant to current = false.", extra=job_manager.logging_context()) + + annotation_was_successful = mapped_score.get("pre_mapped") and mapped_score.get("post_mapped") + if annotation_was_successful: + successful_mapped_variants += 1 + job_manager.save_to_context({"successful_mapped_variants": successful_mapped_variants}) + + mapped_variant = MappedVariant( + pre_mapped=mapped_score.get("pre_mapped", null()), + post_mapped=mapped_score.get("post_mapped", null()), + hgvs_assay_level=get_hgvs_from_post_mapped(mapped_score.get("post_mapped", {})), + variant_id=variant.id, + modification_date=date.today(), + mapped_date=mapping_results["mapped_date_utc"], + vrs_version=mapped_score.get("vrs_version", null()), + mapping_api_version=mapping_results["dcd_mapping_version"], + error_message=mapped_score.get("error_message", null()), + current=True, + ) + + annotation_manager.add_annotation( + variant_id=variant.id, # type: ignore + annotation_type=AnnotationType.VRS_MAPPING, + version=mapping_results.get("dcd_mapping_version"), + status=AnnotationStatus.SUCCESS if annotation_was_successful else AnnotationStatus.FAILED, + failure_category=None + if annotation_was_successful + else AnnotationFailureCategory.EXTERNAL_SERVICE_REJECTED, + annotation_data={ + "error_message": mapped_score.get("error_message", null()), + "annotation_metadata": { + "mapped_assay_level_hgvs": get_hgvs_from_post_mapped(mapped_score.get("post_mapped", {})), + }, + }, + current=True, + ) + + job_manager.db.add(mapped_variant) + logger.debug(msg="Added new mapped variant to session.", extra=job_manager.logging_context()) + + annotation_manager.flush() + + if successful_mapped_variants == 0: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "All variants failed to map."} + elif successful_mapped_variants < total_variants: + score_set.mapping_state = MappingState.incomplete + else: + score_set.mapping_state = MappingState.complete + + job_manager.save_to_context( + { + "successful_mapped_variants": successful_mapped_variants, + "mapping_state": score_set.mapping_state.name, + "mapping_errors": score_set.mapping_errors, + "inserted_mapped_variants": len(mapped_scores), + } + ) + + # Flush score set state; the decorator will commit on return via the success/return paths below. + job_manager.db.add(score_set) + job_manager.db.flush() + + except (NonexistentMappingResultsError, NonexistentMappingScoresError, NonexistentMappingReferenceError) as e: + logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} + logger.error(msg="Known error during variant mapping.", extra=logging_context) + + job_manager.db.rollback() + + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": str(e)} + + # Persist score set state to survive any decorator rollback. + job_manager.db.add(score_set) + job_manager.db.commit() + return JobExecutionOutcome.failed( + reason=str(e), + data={"score_set_id": score_set.id, "mapped_count": 0, "total_count": 0}, + failure_category=FailureCategory.DATA_ERROR, + ) + + except Exception as e: + logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} + logger.error(msg="Encountered an unexpected error while parsing mapped variants.", extra=logging_context) + + job_manager.db.rollback() + + score_set.mapping_state = MappingState.failed + if not score_set.mapping_errors: + score_set.mapping_errors = { + "error_message": f"Encountered an unexpected error while parsing mapped variants. This job will be retried up to {job.max_retries} times (this was attempt {job.retry_count})." + } + + # Persist score set state to survive any decorator rollback. + job_manager.db.add(score_set) + job_manager.db.commit() + + raise + + logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) + + if successful_mapped_variants == 0: + logger.error(msg="No variants were successfully mapped.", extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason="No variants were successfully mapped.", + data={ + "score_set_id": score_set.id, + "mapped_count": 0, + "unmapped_count": total_variants, + "total_count": total_variants, + }, + failure_category=FailureCategory.VRS_MAPPING_FAILED, + ) + + logger.info(msg="Variant mapping job completed successfully.", extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.succeeded( + data={ + "score_set_id": score_set.id, + "mapped_count": successful_mapped_variants, + "unmapped_count": total_variants - successful_mapped_variants, + "total_count": total_variants, + } + ) diff --git a/src/mavedb/worker/jobs/variant_processing/py.typed b/src/mavedb/worker/jobs/variant_processing/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs_overview.md b/src/mavedb/worker/jobs_overview.md new file mode 100644 index 000000000..bfca70c6a --- /dev/null +++ b/src/mavedb/worker/jobs_overview.md @@ -0,0 +1,213 @@ +# Job System Overview + +## Core Concepts + +| Concept | What It Is | Where It Lives | +|---------|-----------|----------------| +| **Job** | An async function that performs a unit of work (e.g., create variants, submit to ClinGen) | `jobs//.py` | +| **Pipeline** | A collection of jobs with dependency ordering, executed as a workflow | `Pipeline` model + `PIPELINE_DEFINITIONS` | +| **JobRun** | A database record tracking a single job execution: status, params, progress, errors, retries | `models/job_run.py` | +| **JobDependency** | A record expressing that one job depends on another (with a dependency type) | `models/job_dependency.py` | +| **JobManager** | Manages individual job state transitions (start, progress, complete, retry) | `lib/managers/job_manager.py` | +| **PipelineManager** | Coordinates pipeline execution: dependency resolution, job enqueueing, status transitions | `lib/managers/pipeline_manager.py` | +| **Decorator** | Wraps job functions to add lifecycle management, error handling, and pipeline coordination | `lib/decorators/` | +| **JobExecutionOutcome** | Dataclass returned by every job function to indicate success, failure, skip, or error | `lib/types/workflow.py` | + +## Two Execution Flows + +### Flow 1: Pipeline Jobs (Most Common) + +This is how variant processing works end-to-end: + +``` +Router (score_sets.py) + │ + ├─ 1. PipelineFactory.create_pipeline("validate_map_annotate_score_set", ...) + │ └─ Creates in DB: + │ • Pipeline record (status=CREATED) + │ • start_pipeline JobRun (entrypoint) + │ • create_variants_for_score_set JobRun + │ • map_variants_for_score_set JobRun (depends on create_variants) + │ • submit_to_car JobRun (depends on map_variants) + │ • link_gnomad JobRun (depends on submit_to_car) + │ • ... more annotation jobs with dependencies + │ • JobDependency records linking them + │ + ├─ 2. worker.enqueue_job("start_pipeline", entrypoint.id) + │ └─ Enqueues the start_pipeline job in ARQ/Redis + │ + └─ 3. Returns HTTP response immediately (fire-and-forget) + +ARQ Worker picks up start_pipeline + │ + ├─ 4. @with_pipeline_management decorator: + │ ├─ Creates task-local DB session (ensure_session_ctx) + │ ├─ Starts pipeline (status → RUNNING) + │ ├─ Wraps function with @with_job_management + │ │ ├─ Marks start_pipeline job as RUNNING + │ │ ├─ Runs start_pipeline function body + │ │ │ └─ Calls PipelineManager.coordinate_pipeline() + │ │ └─ Marks start_pipeline job as SUCCEEDED + │ └─ After job completion, calls coordinate_pipeline() again + │ ├─ Finds create_variants (PENDING, no dependencies) → QUEUED → enqueue in ARQ + │ └─ Other jobs still have unmet dependencies → stay PENDING + │ + ├─ 5. ARQ picks up create_variants_for_score_set + │ ├─ @with_pipeline_management runs job, marks SUCCEEDED + │ └─ coordinate_pipeline() finds map_variants (dependency met) → enqueue + │ + ├─ 6. ARQ picks up map_variants_for_score_set + │ ├─ @with_pipeline_management runs job, marks SUCCEEDED + │ └─ coordinate_pipeline() finds submit_to_car, submit_uniprot, etc. → enqueue + │ + ├─ 7... Continues until all jobs complete + │ + └─ 8. Final coordinate_pipeline() → all jobs SUCCEEDED → pipeline status → SUCCEEDED +``` + +### Flow 2: Standalone/Cron Jobs + +Used for system maintenance tasks that don't belong to a pipeline: + +``` +ARQ Cron Scheduler (or manual enqueue) + │ + ├─ 1. @with_guaranteed_job_run_record("cron_job") + │ └─ Creates a JobRun record in DB (since no PipelineFactory did it) + │ + ├─ 2. @with_job_management + │ ├─ Marks job RUNNING + │ ├─ Injects JobManager into function kwargs + │ ├─ Runs the job function + │ └─ Marks job SUCCEEDED/FAILED/ERRORED based on return value + │ + └─ 3. No pipeline coordination (job has no pipeline_id) +``` + +Example: `cleanup_stalled_jobs` runs every 30 minutes via ARQ cron to find and handle stuck jobs. + +## Key Models + +### JobRun (`models/job_run.py`) + +The central record for every job execution: + +| Field | Purpose | +|-------|---------| +| `id` | Primary key, passed as `job_id` to job functions | +| `urn` | Human-readable identifier (e.g., `mavedb:job_run:abc123`), used as ARQ `_job_id` | +| `job_type` | Category string (e.g., `"variant_creation"`, `"cron_job"`) | +| `job_function` | Function name (e.g., `"create_variants_for_score_set"`) | +| `job_params` | JSONB dict of runtime parameters (score_set_id, correlation_id, etc.) | +| `status` | Current `JobStatus` enum value | +| `pipeline_id` | FK to `Pipeline` (null for standalone jobs) | +| `max_retries` | Maximum retry attempts (default: 3) | +| `retry_count` | Current retry attempt count | +| `progress_current/total/message` | Progress tracking fields | +| `error_message/error_traceback` | Error details on failure | +| `failure_category` | `FailureCategory` enum for retry classification | +| `metadata_` | JSONB for retry history, result snapshots, etc. | +| `correlation_id` | End-to-end request tracing ID | + +### Pipeline (`models/pipeline.py`) + +Groups related jobs into a workflow: + +| Field | Purpose | +|-------|---------| +| `id` | Primary key | +| `name` | Pipeline definition name (e.g., `"validate_map_annotate_score_set"`) | +| `status` | Current `PipelineStatus` enum value | +| `correlation_id` | Shared tracing ID for all jobs in pipeline | +| `job_runs` | Relationship to all `JobRun` records in this pipeline | + +### JobDependency (`models/job_dependency.py`) + +Expresses execution ordering between jobs: + +| Field | Purpose | +|-------|---------| +| `id` | FK to the dependent job (the one that waits) | +| `depends_on_job_id` | FK to the prerequisite job | +| `dependency_type` | `SUCCESS_REQUIRED` or `COMPLETION_REQUIRED` | + +## Status Enums + +### JobStatus + +``` +PENDING ──► QUEUED ──► RUNNING ──► SUCCEEDED + │ + ├──► FAILED (business logic failure) + ├──► ERRORED (unhandled exception) + ├──► CANCELLED (pipeline cancelled remaining jobs) + └──► SKIPPED (dependency unfulfillable or feature disabled) + +FAILED/ERRORED ──► PENDING (via prepare_retry, if retryable) +``` + +### PipelineStatus + +``` +CREATED ──► RUNNING ──► SUCCEEDED (all jobs succeeded) + │ + ├──► FAILED (any job failed/errored) + ├──► PARTIAL (mix of succeeded + skipped/cancelled, no failures) + ├──► CANCELLED (manually cancelled) + └──► PAUSED ──► RUNNING (via unpause) +``` + +### DependencyType + +| Type | Meaning | +|------|---------| +| `SUCCESS_REQUIRED` | Dependent job runs only if prerequisite **succeeded** | +| `COMPLETION_REQUIRED` | Dependent job runs if prerequisite reached any **completed** state (succeeded, failed, or errored) | + +### FailureCategory + +Classifies why a job failed, used to determine retry eligibility: + +- **Retryable**: `NETWORK_ERROR`, `TIMEOUT`, `SERVICE_UNAVAILABLE` +- **Non-retryable**: `VALIDATION_ERROR`, `DATA_ERROR`, `SYSTEM_ERROR`, etc. + +See `models/enums/job_pipeline.py` for the full list. + +## How Job Parameters Flow + +Parameters originate from the router/script and flow through the pipeline to individual jobs: + +``` +Router (score_sets.py) + │ + │ pipeline_params = { + │ "score_set_id": 42, + │ "correlation_id": "abc-123", + │ "updater_id": 7, + │ "scores_file_key": "42/7/1234-scores.csv", + │ ... + │ } + │ + ├─► PipelineFactory.create_pipeline(pipeline_params=pipeline_params) + │ │ + │ ├─► Reads PIPELINE_DEFINITIONS["validate_map_annotate_score_set"] + │ │ Each job_definition has a "params" dict with None placeholders: + │ │ {"score_set_id": None, "correlation_id": None, ...} + │ │ + │ ├─► JobFactory.create_job_run() merges pipeline_params into each job's params: + │ │ JobRun.job_params = {"score_set_id": 42, "correlation_id": "abc-123", ...} + │ │ + │ └─► Each JobRun record now has its own copy of the params it needs + │ + └─► In the job function: + job = job_manager.get_job() + score_set_id = job.job_params["score_set_id"] # → 42 +``` + +## See Also + +- [Job Decorators](job_decorators.md) — How lifecycle management works internally +- [Job Managers](job_managers.md) — Manager class APIs and commit discipline +- [Pipeline Management](pipeline_management.md) — Pipeline lifecycle and coordination details +- [Job Registry](job_registry.md) — How to register jobs and step-by-step guides +- [Best Practices](best_practices.md) — Coding patterns and conventions for job code diff --git a/src/mavedb/worker/lib/__init__.py b/src/mavedb/worker/lib/__init__.py new file mode 100644 index 000000000..8ab179892 --- /dev/null +++ b/src/mavedb/worker/lib/__init__.py @@ -0,0 +1,7 @@ +""" +Worker library modules for job management and pipeline coordination. +""" + +from .managers import JobManager, PipelineManager + +__all__ = ["JobManager", "PipelineManager"] diff --git a/src/mavedb/worker/lib/decorators/__init__.py b/src/mavedb/worker/lib/decorators/__init__.py new file mode 100644 index 000000000..4bef68d5e --- /dev/null +++ b/src/mavedb/worker/lib/decorators/__init__.py @@ -0,0 +1,28 @@ +""" +Decorator utilities for job and pipeline management. + +This module exposes decorators for managing job and pipeline lifecycle hooks, error handling, +and logging in worker functions. Use these decorators to ensure consistent state management +and observability for background jobs and pipelines. + +Available decorators: +- with_job_management: Handles job context and state transitions +- with_pipeline_management: Handles pipeline context and coordination in addition to job management + +Example usage:: + from mavedb.worker.lib.decorators import managed_workflow + + @with_pipeline_management + async def my_worker_function_in_a_pipeline(...): + ... + + @with_job_management + async def my_standalone_job_function(...): + ... +""" + +from .job_guarantee import with_guaranteed_job_run_record +from .job_management import with_job_management +from .pipeline_management import with_pipeline_management + +__all__ = ["with_job_management", "with_pipeline_management", "with_guaranteed_job_run_record"] diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py new file mode 100644 index 000000000..e880e5972 --- /dev/null +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -0,0 +1,114 @@ +""" +Job Guarantee Decorator - Ensures a JobRun record is persisted before job execution. + +This decorator guarantees that a corresponding JobRun record is created and tracked for the decorated +function in the database before execution begins. It is designed to be stacked before managed job +decorators (such as with_job_management) to provide a consistent audit trail and robust error handling +for all job entrypoints, including cron-triggered jobs. + +NOTE +- This decorator must be applied before any job management decorators. +- This decorator is not supported as part of pipeline management; stacking it + with pipeline management decorators is not allowed and it should only be used with + standalone jobs. + +Features: +- Persists JobRun with job_type, function name, and parameters +- Integrates cleanly with managed job and pipeline decorators + +Example: + @with_guaranteed_job_run_record("cron_job") + @with_job_management + async def my_cron_job(ctx, ...): + ... +""" + +import functools +from typing import Any, Awaitable, Callable, TypeVar + +from sqlalchemy.orm import Session + +from mavedb import __version__ +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_session_ctx, is_test_mode + +F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) + + +def with_guaranteed_job_run_record(job_type: str) -> Callable[[F], F]: + """ + Async decorator to ensure a JobRun record is created and persisted before executing the job function. + Should be applied before the managed job decorator. + + Args: + job_type (str): The type/category of the job (e.g., "cron_job", "data_processing"). + + Returns: + Decorated async function with job run persistence guarantee. + + Example: + ``` + @with_guaranteed_job_run_record("cron_job") + @with_job_management + async def my_cron_job(ctx, ...): + ... + ``` + """ + + def decorator(func: F) -> F: + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + with ensure_session_ctx(ctx=ensure_ctx(args)): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + + # If a job_id was already provided (e.g. from a script that + # pre-created the JobRun), validate it exists and use it. + if len(args) > 1 and isinstance(args[1], int): + _validate_job_exists(ensure_ctx(args), args[1]) + return await func(*args, **kwargs) + + # The job id must be passed as the second argument to the wrapped function. + job = _create_job_run(job_type, func, args, kwargs) + args = list(args) + args.insert(1, job.id) + args = tuple(args) + + return await func(*args, **kwargs) + + return async_wrapper # type: ignore + + return decorator + + +def _validate_job_exists(ctx: dict, job_id: int) -> None: + """Verify that a pre-provided job_id corresponds to an existing JobRun record.""" + db: Session = ctx["db"] + exists = db.query(JobRun.id).filter(JobRun.id == job_id).first() is not None + if not exists: + raise ValueError(f"Provided job_id {job_id} does not correspond to an existing JobRun record") + + +def _create_job_run( + job_type: str, func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict +) -> JobRun: + """ + Creates and persists a JobRun record for a function before job execution. + """ + # Extract context (implicit first argument by ARQ convention) + ctx = ensure_ctx(args) + db: Session = ctx["db"] + + job_run = JobRun( + job_type=job_type, + job_function=func.__name__, + status=JobStatus.PENDING, + mavedb_version=__version__, + ) # type: ignore[call-arg] + db.add(job_run) + db.commit() + + return job_run diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py new file mode 100644 index 000000000..d4209b9c2 --- /dev/null +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -0,0 +1,192 @@ +""" +Managed Job Decorator - Unified decorator for complete job lifecycle management. + +Provides automatic job lifecycle tracking with support for async functions. +Includes JobManager injection for advanced operations and robust error handling. +""" + +import functools +import inspect +import logging +from typing import Any, Awaitable, Callable, TypeVar, cast + +from arq import ArqRedis +from sqlalchemy.orm import Session + +from mavedb.lib.slack import send_slack_error, send_slack_job_error, send_slack_job_failure +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode +from mavedb.worker.lib.managers import JobManager +from mavedb.worker.lib.managers.constants import TERMINAL_JOB_STATUSES +from mavedb.worker.lib.managers.utils import classify_exception + +logger = logging.getLogger(__name__) + +F = TypeVar("F", bound=Callable[..., Any]) + + +def with_job_management(func: F) -> F: + """ + Decorator that adds automatic job lifecycle management to ARQ worker functions. + + Features: + - Job start/completion tracking with error handling + - JobManager injection for advanced operations + - Robust error handling with guaranteed state persistence + + The decorator injects a 'job_manager' parameter into the function that provides + access to progress updates and the underlying JobManager. + + Args: + func: The async function to decorate + + Returns: + Decorated async function with lifecycle management + """ + if not inspect.iscoroutinefunction(func): # pragma: no cover + raise ValueError("with_job_management decorator can only be applied to async functions") + + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + with ensure_session_ctx(ctx=ensure_ctx(args)): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + + return await _execute_managed_job(func, args, kwargs) + + return cast(F, async_wrapper) + + +async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict) -> Any: + """Execute a managed ARQ job with full lifecycle tracking.""" + try: + ctx = ensure_ctx(args) + db_session: Session = ctx["db"] + job_id = ensure_job_id(args) + + if "redis" not in ctx: + raise ValueError("Redis connection not found in job context") + redis_pool: ArqRedis = ctx["redis"] + except Exception as e: + logger.critical(f"Failed to initialize job management context: {e}") + send_slack_error(e) + raise + + try: + # Initialize JobManager + job_manager = JobManager(db_session, redis_pool, job_id) + + # Inject the job manager into kwargs for access within the function + kwargs["job_manager"] = job_manager + + # Check if the job was cancelled before ARQ picked it up. This race + # occurs when a sibling job fails, the coordinator cancels remaining + # QUEUED jobs in the DB, but those jobs are already in the Redis queue + # waiting for ARQ to start them. + current_status = job_manager.get_job_status() + if current_status in TERMINAL_JOB_STATUSES: + logger.info(f"Job {job_id} already in terminal state {current_status}; skipping execution") + return JobExecutionOutcome.skipped(data={"reason": f"Job already in terminal state: {current_status}"}) + + # Mark job as started and persist state + job_manager.start_job() + db_session.commit() + + # Execute the async function + result = await func(*args, **kwargs) + + # Refresh job state after function execution + job = job_manager.get_job() + + if result.status == JobStatus.FAILED: + job_manager.fail_job(result=result) + if not job_manager.should_retry(): + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=result.error or "", + failure_category=str(result.failure_category or ""), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) + + elif result.status == JobStatus.ERRORED: + job_manager.error_job(result=result) + if not job_manager.should_retry(): + send_slack_job_error( + job_urn=job.urn, + job_function=job.job_function, + err=result.exception or Exception(result.error or "Unknown error"), + failure_category=str(result.failure_category or ""), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) + + elif result.status == JobStatus.SKIPPED: + job_manager.skip_job(result=result) + else: + job_manager.succeed_job(result=result) + db_session.commit() + + if job_manager.should_retry(): + await job_manager.prepare_retry(reason="Job did not complete successfully") + db_session.commit() + + return result + + except Exception as e: + # Prioritize salvaging lifecycle state + will_retry = False + try: + db_session.rollback() + + # Build errored result — this is an unhandled exception + result = JobExecutionOutcome.errored(exception=e, failure_category=classify_exception(e)) + + # Mark job as errored + job_manager.error_job(result=result) + db_session.commit() + + if job_manager.should_retry(): + will_retry = True + + # Prepare job for retry and persist state + await job_manager.prepare_retry(reason=str(e)) + db_session.commit() + + # Short circuit raising the exception. We indicate to the caller + # we did encounter a terminal failure and coordination should proceed. + return result + + except Exception as inner_e: + logger.critical(f"Failed to mark job {job_id} as errored: {inner_e}") + send_slack_error(inner_e) + + # Re-raise the outer exception immediately to prevent duplicate notifications + finally: + logger.error(f"Job {job_id} failed: {e}") + # Only alert when the job is permanently terminal — if it will retry, + # the next attempt may succeed and no human action is required. + if not will_retry: + try: + job = job_manager.get_job() + send_slack_job_error( + job_urn=job.urn, + job_function=job.job_function, + err=e, + failure_category=str(classify_exception(e)), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) + except Exception: + send_slack_error(e) + + # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. + # We don't mind that we lose ARQs built in job marking, since we perform our own job + # lifecycle management via with_job_management. + return result diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py new file mode 100644 index 000000000..cd3f2d7d7 --- /dev/null +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -0,0 +1,189 @@ +""" +Managed Job Decorator - Unified decorator for complete job lifecycle management. + +Provides automatic job lifecycle tracking with support for both sync and async functions. +Includes JobManager injection for advanced operations and robust error handling. +""" + +import functools +import inspect +import logging +from typing import Any, Awaitable, Callable, TypeVar, cast + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.slack import send_slack_error +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators import with_job_management +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode +from mavedb.worker.lib.managers import PipelineManager +from mavedb.worker.lib.managers.utils import classify_exception + +logger = logging.getLogger(__name__) + +F = TypeVar("F", bound=Callable[..., Any]) + + +def with_pipeline_management(func: F) -> F: + """ + Decorator that adds automatic pipeline lifecycle management to ARQ worker functions. Practically, + this means calling `PipelineManager.coordinate_pipeline()` after the decorated function completes. + + This decorator performs no pipeline coordination prior to function execution; it only + coordinates the pipeline after the function has run (whether successfully or with failure). + As a result, this decorator is best suited for jobs that represent discrete steps within a pipeline. + Pipelines are expected to be pre-defined and associated with jobs prior to execution and should be transitioned + to a running state by other means (e.g. a dedicated pipeline starter job). Attempting to start pipelines + within this decorator is not supported, and doing so may lead to unexpected behavior. + + Because pipeline management depends on job management, this decorator is built on top of the + `with_job_management` decorator. + + This decorator may be added to jobs which may or may not belong to a pipeline. If the job does not + belong to a pipeline, the decorator will simply skip pipeline coordination steps. Although pipeline + membership is optional, the decorator still will always enforce job lifecycle management via + `with_job_management`. + + Features: + - Pipeline lifecycle tracking + - Job lifecycle tracking via with_job_management + - Robust error handling, logging, and alerting on failures + + Example: + @with_pipeline_management + async def my_job_function(ctx, param1, param2): + ... job logic ... + + On decorator exit, pipeline coordination is attempted. + + Args: + func: The async function to decorate + + Returns: + Decorated async function with lifecycle management + """ + if not inspect.iscoroutinefunction(func): # pragma: no cover + raise ValueError("with_pipeline_management decorator can only be applied to async functions") + + # Wrap the function with job management. It isn't as simple as stacking decorators + # as we can only call job management after setting up pipeline management. + + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + with ensure_session_ctx(ctx=ensure_ctx(args)): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + + return await _execute_managed_pipeline(func, args, kwargs) + + return cast(F, async_wrapper) + + +async def _execute_managed_pipeline( + func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict +) -> Any: + """ + Execute the managed pipeline function with lifecycle management. + + Args: + func: The async function to execute. + args: Positional arguments for the function. + kwargs: Keyword arguments for the function. + + Returns: + Any: The result of the function execution. + + Raises: + Exception: Propagates any exception raised during function execution. + """ + try: + ctx = ensure_ctx(args) + job_id = ensure_job_id(args) + db_session: Session = ctx["db"] + + if "redis" not in ctx: + raise ValueError("Redis connection not found in pipeline context") + redis_pool: ArqRedis = ctx["redis"] + except Exception as e: + logger.critical(f"Failed to initialize pipeline management context: {e}") + send_slack_error(e) + raise + + pipeline_manager = None + pipeline_id = None + try: + # Attempt to load the pipeline ID from the job. + # - If pipeline_id is not None, initialize PipelineManager + # - If None, skip pipeline coordination. We do not enforce every job to belong to a pipeline. + # - If error occurs, handle below + pipeline_id = db_session.execute(select(JobRun.pipeline_id).where(JobRun.id == job_id)).scalar_one() + if pipeline_id: + pipeline_manager = PipelineManager(db=db_session, redis=redis_pool, pipeline_id=pipeline_id) + + logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline.") + + # If the pipeline is still in the created state, start it now. From this context, + # we do not wish to coordinate the pipeline. Doing so would result in the current + # job being re-queued before it has been marked as running, leading to potential state + # inconsistencies. + if pipeline_manager and pipeline_manager.get_pipeline_status() == PipelineStatus.CREATED: + await pipeline_manager.start_pipeline(coordinate=False) + db_session.commit() + + logger.info(f"Pipeline {pipeline_id} associated with job {job_id} started successfully") + + # Wrap the function with job management, then execute. This ensures both: + # - Job lifecycle management is nested within pipeline management + # - Exceptions from the job management layer are caught here for pipeline coordination + job_managed_func = with_job_management(func) + result = await job_managed_func(*args, **kwargs) + + # Attempt to coordinate pipeline next steps after successful job execution + if pipeline_manager: + await pipeline_manager.coordinate_pipeline() + + # Commit any changes made during pipeline coordination + db_session.commit() + + logger.info(f"Pipeline {pipeline_id} associated with job {job_id} coordinated successfully") + else: + logger.info(f"No pipeline associated with job {job_id}; skipping coordination") + + return result + + except Exception as e: + try: + # Rollback any uncommitted changes + db_session.rollback() + + # Attempt one final coordination to clean up any stubborn pipeline state + if pipeline_manager: + await pipeline_manager.coordinate_pipeline() + + # Commit any changes made during final coordination + db_session.commit() + + except Exception as inner_e: + logger.critical( + f"Unable to perform cleanup coordination on pipeline {pipeline_id} associated with job {job_id} after error: {inner_e}" + ) + send_slack_error(inner_e) + + # No further work here. We can rely on the notification hooks below to alert on the original failure + # and should allow result generation to proceed as normal so the job can be logged. + finally: + logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") + + # Build errored result for the unhandled exception + result = JobExecutionOutcome.errored(exception=e, failure_category=classify_exception(e)) + send_slack_error(e) + + # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. + # We don't mind that we lose ARQs built in job marking, since we perform our own job + # lifecycle management via with_job_management. + return result diff --git a/src/mavedb/worker/lib/decorators/py.typed b/src/mavedb/worker/lib/decorators/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/lib/decorators/utils.py b/src/mavedb/worker/lib/decorators/utils.py new file mode 100644 index 000000000..0186d3fa8 --- /dev/null +++ b/src/mavedb/worker/lib/decorators/utils.py @@ -0,0 +1,66 @@ +import os +from contextlib import contextmanager +from contextvars import ContextVar + +from mavedb.db.session import db_session + +# Task-local DB session storage. Each asyncio Task (i.e., each concurrent ARQ job) +# gets its own copy of this variable, preventing concurrent jobs from sharing or +# closing each other's sessions via the shared ARQ `ctx` dict. +_task_db_session: ContextVar = ContextVar("_task_db_session", default=None) + + +def is_test_mode() -> bool: + """Check if the application is running in test mode based on the MAVEDB_TEST_MODE environment variable. + + Returns: + bool: True if in test mode, False otherwise. + """ + # Although not ideal, we use an environment variable to detect whether + # the application is in test mode. In the context of decorators, test + # mode makes them no-ops to facilitate unit testing without side effects. + # + # This is necessary because decorators are applied at import time, making + # it difficult to mock their behavior in tests when they must be imported + # up front and provided to the ARQ worker. + # + # This pattern allows us to control decorator behavior in tests without + # altering production code paths. + return os.getenv("MAVEDB_TEST_MODE") == "1" + + +@contextmanager +def ensure_session_ctx(ctx): + existing = _task_db_session.get() + if existing is not None: + # Session already exists for this task (from an outer decorator). + # Refresh ctx["db"] so downstream code in _execute_managed_* reads + # this task's session, not a stale value left by another task. + ctx["db"] = existing + yield existing + else: + with db_session() as session: + _task_db_session.set(session) + ctx["db"] = session + try: + yield session + finally: + _task_db_session.set(None) + + +def ensure_ctx(args) -> dict: + # Extract context (first argument by ARQ convention) + if not args or len(args) < 1 or not isinstance(args[0], dict): + raise ValueError("Managed functions must receive context as first argument") + + ctx = args[0] + return ctx + + +def ensure_job_id(args) -> int: + # Extract job_id (second argument by MaveDB convention) + if not args or len(args) < 2 or not isinstance(args[1], int): + raise ValueError("Job ID not found in function arguments") + + job_id = args[1] + return job_id diff --git a/src/mavedb/worker/lib/managers/__init__.py b/src/mavedb/worker/lib/managers/__init__.py new file mode 100644 index 000000000..e870ccfab --- /dev/null +++ b/src/mavedb/worker/lib/managers/__init__.py @@ -0,0 +1,66 @@ +"""Manager classes and shared utilities for job and pipeline coordination. + +This package provides managers for job lifecycle and pipeline coordination, +along with shared constants, exceptions, and types used across the worker system. + +Main Classes: + JobManager: Individual job lifecycle management + PipelineManager: Pipeline coordination and dependency management + +Shared Utilities: + Constants: Job statuses, timeouts, retry limits + Exceptions: Standardized error hierarchy + Types: TypedDict definitions and common type hints + +Example Usage: + >>> from mavedb.worker.lib.managers import JobManager, PipelineManager + >>> from mavedb.worker.lib.managers import JobStateError, TERMINAL_JOB_STATUSES + >>> + >>> job_manager = JobManager(db, redis, job_id) + >>> pipeline_manager = PipelineManager(db, redis) + >>> + >>> # Individual job operations + >>> job_manager.start_job() + >>> job_manager.succeed_job({"output": "success"}) + >>> + >>> # Pipeline coordination + >>> await pipeline_manager.coordinate_after_completion(True) +""" + +# Main manager classes +# Commonly used constants +# Main manager classes +from .base_manager import BaseManager +from .constants import ( + ACTIVE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) + +# Exception hierarchy +from .exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from .job_manager import JobManager +from .pipeline_manager import PipelineManager + +# Type definitions +from .types import RetryHistoryEntry + +__all__ = [ + # Main classes + "BaseManager", + "JobManager", + "PipelineManager", + # Constants + "ACTIVE_JOB_STATUSES", + "TERMINAL_JOB_STATUSES", + # Exceptions + "DatabaseConnectionError", + "JobStateError", + "JobTransitionError", + "PipelineCoordinationError", + # Types + "RetryHistoryEntry", +] diff --git a/src/mavedb/worker/lib/managers/base_manager.py b/src/mavedb/worker/lib/managers/base_manager.py new file mode 100644 index 000000000..de0fe67f4 --- /dev/null +++ b/src/mavedb/worker/lib/managers/base_manager.py @@ -0,0 +1,42 @@ +"""Base manager class providing common database transaction handling. + +This module provides the BaseManager class that encapsulates common database +session management patterns used across all manager classes. +""" + +import logging +from abc import ABC +from typing import Optional + +from arq import ArqRedis +from sqlalchemy.orm import Session + +logger = logging.getLogger(__name__) + + +class BaseManager(ABC): + """Base class for all manager classes providing common interface. + + Provides standardized pattern for initializing a manager with database + and Redis connections. + + Features: + - Common initialization pattern + + Attributes: + db: SQLAlchemy database session for queries and transactions + redis: ARQ Redis client for job queue operations + """ + + def __init__(self, db: Session, redis: Optional[ArqRedis]): + """Initialize base manager with database and Redis connections. + + Args: + db: SQLAlchemy database session for job and pipeline queries + redis(Optional[ArqRedis]): ARQ Redis client for job queue operations + + Raises: + DatabaseConnectionError: Cannot connect to database + """ + self.db = db + self.redis = redis diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py new file mode 100644 index 000000000..4d084e4ac --- /dev/null +++ b/src/mavedb/worker/lib/managers/constants.py @@ -0,0 +1,78 @@ +"""Constants for job management and pipeline coordination. + +This module defines commonly used job status groupings that are used throughout +the job management system for state validation, dependency checking, and +pipeline coordination. +""" + +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus + +# Job status constants for common groupings +STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING, JobStatus.RUNNING] +"""Job statuses that can be transitioned to RUNNING state. + +RUNNING is included to handle recovery after a worker crash: ARQ re-delivers +the job but the DB still shows RUNNING from the dead process. start_job() +logs a warning and resets the timestamp in this case.""" + +COMPLETED_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.ERRORED] +"""Job statuses indicating finished execution (completed states).""" + +TERMINAL_JOB_STATUSES = [ + JobStatus.SUCCEEDED, + JobStatus.FAILED, + JobStatus.ERRORED, + JobStatus.CANCELLED, + JobStatus.SKIPPED, +] +"""Job statuses indicating finished execution (terminal states).""" + +TERMINAL_PROGRESS_MESSAGES: dict[JobStatus, str] = { + JobStatus.SUCCEEDED: "Job completed", + JobStatus.FAILED: "Job failed", + JobStatus.ERRORED: "Job errored", + JobStatus.CANCELLED: "Job cancelled", + JobStatus.SKIPPED: "Job skipped", +} +""" +Generic progress messages to set when a job is completed with a terminal status. +This ensures that all jobs have a consistent final progress message without requiring +each job function to set it manually. +""" + +CANCELLED_JOB_STATUSES = [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED, JobStatus.ERRORED] +"""Job statuses that should stop execution (termination conditions).""" + +RETRYABLE_JOB_STATUSES = [JobStatus.FAILED, JobStatus.ERRORED, JobStatus.CANCELLED, JobStatus.SKIPPED] +"""Job statuses that can be retried.""" + +ACTIVE_JOB_STATUSES = [JobStatus.PENDING, JobStatus.QUEUED, JobStatus.RUNNING] +"""Job statuses that can be cancelled/skipped when pipeline fails.""" + +RETRYABLE_FAILURE_CATEGORIES = ( + FailureCategory.NETWORK_ERROR, + FailureCategory.TIMEOUT, + FailureCategory.SERVICE_UNAVAILABLE, +) +"""Failure categories that are considered retryable errors.""" + +# Pipeline coordination constants +STARTABLE_PIPELINE_STATUSES = [PipelineStatus.PAUSED, PipelineStatus.CREATED] +"""Pipeline statuses that can be transitioned to RUNNING state.""" + +TERMINAL_PIPELINE_STATUSES = [ + PipelineStatus.SUCCEEDED, + PipelineStatus.FAILED, + PipelineStatus.PARTIAL, + PipelineStatus.CANCELLED, +] +"""Pipeline statuses indicating finished execution (terminal states).""" + +CANCELLED_PIPELINE_STATUSES = [PipelineStatus.CANCELLED, PipelineStatus.FAILED] +"""Pipeline statuses indicating the pipeline has been cancelled or failed.""" + +CANCELLABLE_PIPELINE_STATUSES = [PipelineStatus.CREATED, PipelineStatus.RUNNING, PipelineStatus.PAUSED] +"""Pipeline statuses that can be cancelled/skipped.""" + +RUNNING_PIPELINE_STATUSES = [PipelineStatus.RUNNING] +"""Pipeline statuses indicating active execution.""" diff --git a/src/mavedb/worker/lib/managers/exceptions.py b/src/mavedb/worker/lib/managers/exceptions.py new file mode 100644 index 000000000..48fa4b839 --- /dev/null +++ b/src/mavedb/worker/lib/managers/exceptions.py @@ -0,0 +1,63 @@ +""" +Manager Exceptions for explicit error handling. +""" + + +class ManagerError(Exception): + """Base exception for Manager operations.""" + + pass + + +## Pipeline Manager Exceptions + + +class PipelineManagerError(ManagerError): + """Pipeline Manager specific errors.""" + + pass + + +class PipelineCoordinationError(PipelineManagerError): + """Pipeline coordination failed - may be recoverable.""" + + pass + + +class PipelineTransitionError(PipelineManagerError): + """Pipeline is in wrong state for requested operation.""" + + pass + + +class PipelineStateError(PipelineManagerError): + """Critical pipeline state operations failed - database issues preventing state persistence.""" + + pass + + +## Job Manager Exceptions + + +class JobManagerError(ManagerError): + """Job Manager specific errors.""" + + pass + + +class JobStateError(JobManagerError): + """Critical job state operations failed - database issues preventing state persistence.""" + + pass + + +class JobTransitionError(JobManagerError): + """Job is in wrong state for requested operation.""" + + pass + + +class DatabaseConnectionError(JobStateError): + """Database connection issues preventing any operations.""" + + pass diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py new file mode 100644 index 000000000..0425d155e --- /dev/null +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -0,0 +1,940 @@ +"""Job lifecycle management for individual job state transitions. + +This module provides the JobManager class for managing individual job state transitions +with atomic operations and explicit error handling to ensure data consistency. +Pipeline coordination is handled separately by the PipelineManager. + +Example usage: + >>> from mavedb.worker.lib.job_manager import JobManager + >>> + >>> # Initialize with database and Redis connections + >>> job_manager = JobManager(db_session, redis_client, job_id=123) + >>> + >>> # Start job execution + >>> job_manager.start_job() + >>> + >>> # Update progress during execution + >>> job_manager.update_progress(50, 100, "Processing variants...") + >>> + >>> # Complete job (pipeline coordination handled separately) + >>> job_manager.complete_job( + ... status=JobStatus.SUCCEEDED, + ... result={"variants_processed": 1000} + ... ) + +Error Handling: + The JobManager uses specific exception types to distinguish between different + failure modes, allowing callers to implement appropriate recovery strategies: + + - DatabaseConnectionError: Database connectivity issues + - JobStateError: Critical state persistence failures + - JobTransitionError: Invalid state transitions +""" + +import logging +import traceback +from datetime import datetime +from typing import Any, Optional + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.base_manager import BaseManager +from mavedb.worker.lib.managers.constants import ( + CANCELLED_JOB_STATUSES, + RETRYABLE_FAILURE_CATEGORIES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, + TERMINAL_PROGRESS_MESSAGES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from mavedb.worker.lib.managers.types import RetryHistoryEntry +from mavedb.worker.lib.managers.utils import classify_exception + +logger = logging.getLogger(__name__) + + +class JobManager(BaseManager): + """Manages individual job lifecycle with atomic state transitions. + + The JobManager provides a high-level interface for managing individual job execution + while ensuring database consistency. It handles job state transitions, progress updates, + and retry logic. Pipeline coordination is handled separately by the PipelineManager. + + Key Features: + - Atomic state transitions with rollback on failure + - Explicit exception handling for different failure modes + - Progress tracking and retry mechanisms + - Automatic session cleanup on object manipulation failures + - Focus on individual job lifecycle only + + Note: + To avoid persisting inconsistent job state to the database, any failures + during job manipulation (e.g., fetching job, updating fields) will result + in a safe rollback of the current transaction. This ensures that partial + updates do not corrupt job state. This manager DOES NOT COMMIT database + changes, only flushes them. Commit responsibility lies with the caller. + + Usage Patterns: + + Basic job execution: + >>> manager = JobManager(db, redis, job_id=123) + >>> manager.start_job() + >>> manager.update_progress(25, message="Starting validation") + >>> manager.succeed_job(result={"count": 100}) + + Progress tracking convenience: + >>> manager.set_progress_total(1000, "Processing 1000 records") + >>> for record in records: + ... process_record(record) + ... manager.increment_progress() # Increment by 1 + ... if manager.is_cancelled(): + ... break + + Job failure handling: + >>> try: + ... process_data() + ... except ValidationError as e: + ... manager.fail_job(error=e, result={"partial_results": partial_data}) + + Direct completion control: + >>> manager.complete_job(status=JobStatus.SUCCEEDED, result=data) + + Error handling: + >>> try: + ... manager.complete_job(status=JobStatus.SUCCEEDED, result=data) + ... except JobStateError as e: + ... logger.critical(f"Critical state failure: {e}") + ... # Job completion failed - state not saved + + Job retry: + >>> try: + ... manager.retry_job(reason="Transient network error") + ... except JobTransitionError as e: + ... logger.error(f"Cannot retry job in current state: {e}") + + Exception Hierarchy: + - DatabaseConnectionError: Cannot connect to database + - JobStateError: Critical state persistence failures + - JobTransitionError: Invalid state transitions (e.g., start already running job) + + Thread Safety: + JobManager is not thread-safe. Each instance should be used by a single + worker thread and should not be shared across concurrent operations. + """ + + def __init__(self, db: Session, redis: Optional[ArqRedis], job_id: int): + """Initialize JobManager for a specific job. + + Args: + db: Active SQLAlchemy session for database operations. Session should + be configured for the appropriate database and have proper + transaction isolation. + redis: ARQ Redis client for job queue operations. Must be connected + and ready for enqueue operations. Optional; can be None if Redis is not used. + job_id: Unique identifier of the job to manage. Must correspond to + an existing JobRun record in the database. + + Raises: + DatabaseConnectionError: If the job cannot be fetched from database, + indicating connectivity issues or invalid job_id. + + Example: + >>> db_session = get_database_session() + >>> redis_client = get_arq_redis_client() + >>> manager = JobManager(db_session, redis_client, 12345) + >>> # Manager is now ready to handle job 12345 + """ + super().__init__(db, redis) + + self.context: dict[str, Any] = {} + self.job_id = job_id + job = self.get_job() + self.pipeline_id = job.pipeline_id if job else None + + self.save_to_context( + {"job_id": str(self.job_id), "pipeline_id": str(self.pipeline_id) if self.pipeline_id else None} + ) + + def save_to_context(self, ctx: dict) -> dict[str, Any]: + for k, v in ctx.items(): + self.context[k] = v + + return self.context + + def logging_context(self) -> dict[str, Any]: + return self.context + + def start_job(self) -> None: + """Mark job as started and initialize execution tracking. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job from QUEUED, PENDING, or RUNNING to RUNNING state, setting start + timestamp and a default progress message. This method should be called + once at the beginning of job execution. + + If the job is already RUNNING (stale from a crashed worker that ARQ re-delivered), + a warning is logged and the start timestamp is reset. + + State Changes: + - Sets status to JobStatus.RUNNING + - Records started_at timestamp + - Initializes progress to 0/100 + - Sets progress_message to "Job began execution" + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save job start state to database + JobTransitionError: Job not in valid state to start (must be QUEUED, PENDING, or RUNNING) + + Example: + >>> manager = JobManager(db, redis, 123) + >>> manager.start_job() # Job 123 now marked as RUNNING + >>> # Proceed with job execution logic... + """ + job_run = self.get_job() + if job_run.status not in STARTABLE_JOB_STATUSES: + self.save_to_context({"job_status": str(job_run.status)}) + logger.error( + "Invalid job start attempt: status not in STARTABLE_JOB_STATUSES", extra=self.logging_context() + ) + raise JobTransitionError(f"Cannot start job {self.job_id} from status {job_run.status}") + + # Recovery path: job is already RUNNING from a previous worker that crashed. + # ARQ re-delivered the job, so we reset the timestamp and proceed. + if job_run.status == JobStatus.RUNNING: + logger.warning( + f"Job {self.job_id} already RUNNING (previous worker likely crashed) — resetting start time", + extra=self.logging_context(), + ) + + try: + job_run.status = JobStatus.RUNNING + job_run.started_at = datetime.now() + job_run.progress_message = "Job began execution" + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job start state", extra=self.logging_context()) + raise JobStateError(f"Failed to update job start state: {e}") + + self.save_to_context({"job_status": str(job_run.status)}) + logger.info("Job marked as started", extra=self.logging_context()) + + def complete_job(self, status: JobStatus, result: JobExecutionOutcome) -> None: + """Mark job as completed with the specified final status. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job to a terminal status (SUCCEEDED, FAILED, ERRORED, CANCELLED, SKIPPED), + recording the finished_at timestamp, result data, and error details if applicable. + + Args: + status: Final job status - must be a terminal status. + result: JobExecutionOutcome containing status, data, error, and exception. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + JobTransitionError: Invalid terminal status provided + """ + # Validate terminal status + if status not in TERMINAL_JOB_STATUSES: + self.save_to_context({"job_status": str(status)}) + logger.error("Invalid job completion status: not in TERMINAL_JOB_STATUSES", extra=self.logging_context()) + raise JobTransitionError( + f"Cannot complete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ) + + job_run = self.get_job() + try: + job_run.status = status + job_run.metadata_["result"] = { + "status": result.status.value, + "data": result.data, + "error": result.error, + "exception_details": format_raised_exception_info_as_dict(result.exception) + if result.exception + else None, + } + job_run.finished_at = datetime.now() + + if status in (JobStatus.FAILED, JobStatus.ERRORED): + if result.failure_category: + job_run.failure_category = result.failure_category + elif result.exception: + job_run.failure_category = classify_exception(result.exception) + else: + job_run.failure_category = FailureCategory.UNKNOWN + + if result.error: + job_run.error_message = result.error + + if result.exception: + job_run.error_message = str(result.exception) + job_run.error_traceback = traceback.format_exc() + + if job_run.failure_category: + self.save_to_context({"failure_category": str(job_run.failure_category)}) + + # For consistency, the job manager is responsible for setting terminal progress messages, + # not jobs themselves. + if status in TERMINAL_PROGRESS_MESSAGES: + job_run.progress_message = TERMINAL_PROGRESS_MESSAGES[status] + + # SUCCEEDED jobs will always be fully complete; + # CANCELLED/SKIPPED null the numeric fields because those jobs never completed (or were cut off); + # FAILED/ERRORED leave numeric fields intact so the UI can show how far the job progressed. + if status == JobStatus.SUCCEEDED: + job_run.progress_current = job_run.progress_total + elif status in (JobStatus.CANCELLED, JobStatus.SKIPPED): + job_run.progress_current = None + job_run.progress_total = None + + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job completion state", extra=self.logging_context() + ) + raise JobStateError(f"Failed to update job completion state: {e}") + + self.save_to_context({"job_status": str(job_run.status)}) + logger.info("Job marked as completed", extra=self.logging_context()) + + def fail_job(self, result: JobExecutionOutcome) -> None: + """Mark job as failed (controlled business logic failure). + + Use this for failures where the job determined the outcome was unsuccessful + but no unhandled exception occurred (e.g., validation errors, missing data). + + Args: + result: JobExecutionOutcome with status=FAILED and a reason string. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + """ + self.complete_job(status=JobStatus.FAILED, result=result) + + def error_job(self, result: JobExecutionOutcome) -> None: + """Mark job as errored (unhandled exception / system crash). + + Use this for failures caused by unhandled exceptions where the job crashed + rather than gracefully determining failure (e.g., DB connection lost, unexpected TypeError). + + Args: + result: JobExecutionOutcome with status=ERRORED, an exception, and an error string. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + """ + self.complete_job(status=JobStatus.ERRORED, result=result) + + def succeed_job(self, result: JobExecutionOutcome) -> None: + """Mark job as succeeded and record results. + + Args: + result: JobExecutionOutcome with status=SUCCEEDED and optional data payload. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + """ + self.complete_job(status=JobStatus.SUCCEEDED, result=result) + + def cancel_job(self, result: JobExecutionOutcome) -> None: + """Mark job as cancelled. + + Args: + result: JobExecutionOutcome with cancellation details. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + """ + self.complete_job(status=JobStatus.CANCELLED, result=result) + + def skip_job(self, result: JobExecutionOutcome) -> None: + """Mark job as skipped (intentionally not executed). + + Args: + result: JobExecutionOutcome with status=SKIPPED and optional reason in data. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + """ + self.complete_job(status=JobStatus.SKIPPED, result=result) + + async def prepare_retry(self, reason: str = "retry_requested") -> None: + """Prepare a failed job for retry by resetting state to PENDING. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Resets a failed job back to PENDING status so it can be re-enqueued + by the pipeline coordination system. This is similar to job completion + but transitions to PENDING instead of a terminal state. + + Args: + reason: Human-readable reason for the retry (e.g., "transient_network_error", + "memory_limit_exceeded"). Used for debugging and audit trails. + + State Changes: + - Increments retry_count + - Resets status from FAILED, SKIPPED, CANCELLED to PENDING + - Clears error_message, error_traceback, failure_category + - Clears finished_at timestamp + - Adds retry attempt to metadata history + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobTransitionError: Job not in FAILED state (cannot retry) + JobStateError: Cannot save retry state changes + + Examples: + Basic retry preparation: + >>> try: + ... manager.prepare_retry("network_timeout") + ... except JobTransitionError: + ... logger.error("Cannot retry job - not in failed state") + + Conditional retry with limits: + >>> job = manager.get_job() + >>> if job and job.retry_count < 3: + ... manager.prepare_retry(f"attempt_{job.retry_count + 1}") + ... # PipelineManager will handle enqueueing + ... else: + ... logger.error("Max retries exceeded") + + Retry History: + Each retry attempt is recorded in job metadata with: + - retry_attempt: Sequential attempt number + - timestamp: When retry was initiated + - result: Previous execution results (for debugging) + - reason: Provided retry reason + + Note: + After calling this method, use PipelineManager.enqueue_ready_jobs() + to actually enqueue the job for execution. + """ + job_run = self.get_job() + if job_run.status not in RETRYABLE_JOB_STATUSES: + self.save_to_context({"job_status": str(job_run.status)}) + logger.error("Invalid job retry status: status not in RETRYABLE_JOB_STATUSES", extra=self.logging_context()) + raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") + + try: + # Snapshot error state before clearing for retry history + current_result: dict = job_run.metadata_.get("result", {}) + previous_error_message = job_run.error_message or "" + + job_run.status = JobStatus.PENDING + job_run.retry_count = (job_run.retry_count or 0) + 1 + job_run.progress_message = "Job retry prepared" + job_run.error_message = None + job_run.error_traceback = None + job_run.failure_category = None + job_run.finished_at = None + job_run.started_at = None + + # Add summary-only retry history entry. + retry_history: list[RetryHistoryEntry] = job_run.metadata_.setdefault("retry_history", []) + retry_history.append( + { + "attempt": job_run.retry_count, + "timestamp": datetime.now().isoformat(), + "status": current_result.get("status", "unknown"), + "error_message": previous_error_message, + "reason": reason, + } + ) + job_run.metadata_.pop("result", None) # Clear previous result + flag_modified(job_run, "metadata_") + + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job retry state", extra=self.logging_context()) + raise JobStateError(f"Failed to update job retry state: {e}") + + self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) + logger.info("Job successfully prepared for retry", extra=self.logging_context()) + + def prepare_queue(self) -> None: + """Prepare job for enqueueing by setting QUEUED status. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job from PENDING to QUEUED status before ARQ enqueueing. + This ensures proper state tracking and validates the transition. + + Raises: + JobTransitionError: Job not in PENDING state + JobStateError: Cannot save state change + """ + job_run = self.get_job() + if job_run.status != JobStatus.PENDING: + self.save_to_context({"job_status": str(job_run.status)}) + logger.error("Invalid job queue attempt: status not PENDING", extra=self.logging_context()) + raise JobTransitionError(f"Cannot queue job {self.job_id} from status {job_run.status}") + + try: + job_run.status = JobStatus.QUEUED + job_run.progress_message = "Job queued for execution" + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job queue state", extra=self.logging_context()) + raise JobStateError(f"Failed to update job queue state: {e}") + + self.save_to_context({"job_status": str(job_run.status)}) + logger.debug("Job successfully prepared for queueing", extra=self.logging_context()) + + def reset_job(self) -> None: + """Reset job to initial state for re-execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Resets all job state fields to their initial values, allowing the job + to be re-executed from scratch. This is useful for testing or manual + re-runs of jobs without retaining any prior execution history. + + State Changes: + - Sets status to PENDING + - Clears started_at and finished_at timestamps + - Resets progress to 0/100 with default message + - Clears error details and failure category + - Resets retry_count to 0 + - Clears metadata + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save reset state changes + Examples: + Basic job reset: + >>> manager.reset_job() + >>> # Job is now reset to initial state for re-execution + """ + job_run = self.get_job() + try: + job_run.status = JobStatus.PENDING + job_run.started_at = None + job_run.finished_at = None + job_run.progress_current = None + job_run.progress_total = None + job_run.progress_message = None + job_run.error_message = None + job_run.error_traceback = None + job_run.failure_category = None + job_run.retry_count = 0 + job_run.metadata_ = {} + + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while resetting job state", extra=self.logging_context()) + raise JobStateError(f"Failed to reset job state: {e}") + + self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) + logger.info("Job successfully reset to initial state", extra=self.logging_context()) + + def update_progress( + self, current: int, total: int = 100, message: Optional[str] = None, *, commit: bool = True + ) -> None: + """Update job progress information during execution and optionally commit immediately. + + Provides real-time progress updates for long-running jobs. By default, commits + the progress update immediately to the database for real-time visibility, acting + as a checkpoint operation. This commits ALL pending changes in the current session, + so progress updates should only be called at safe transaction boundaries. + + Args: + current: Current progress value (e.g., records processed so far) + total: Total expected progress value (default: 100 for percentage) + message: Optional human-readable progress description + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions where + progress should only be committed at job completion. + + Examples: + Checkpoint-style progress (default - commits immediately): + >>> for i, record in enumerate(records): + ... process_record(record) + ... if i % 100 == 0: # Checkpoint every 100 records + ... manager.update_progress( + ... current=i, + ... total=len(records), + ... message=f"Processed {i}/{len(records)} records" + ... ) # Commits progress + all pending work + + Progress without commit (complex transactions): + >>> manager.update_progress(25, 100, "Validating input", commit=False) + >>> # Progress must be committed later by caller after transaction is complete + + Handling progress failures: + >>> try: + ... manager.update_progress(75, message="Almost done") + ... except DatabaseConnectionError: + ... logger.debug("Progress update failed, continuing job") + ... # Job continues normally + + Important: + When commit=True (default), this commits ALL pending changes in the database + session, not just the progress update. Only call update_progress() at points + where it's safe to commit accumulated work (e.g., after processing a batch + of independent records). This checkpoint pattern reduces transaction size and + provides real-time visibility into job progress. + + Note: + Progress updates are best-effort operations. If a progress update or commit + fails, the job may choose to continue execution normally. Failed progress + updates are logged at debug level. + """ + job_run = self.get_job() + try: + job_run.progress_current = current + job_run.progress_total = total + if message: + job_run.progress_message = message + + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job progress", extra=self.logging_context()) + raise JobStateError(f"Failed to update job progress state: {e}") + + self.save_to_context( + { + "job_progress_current": current, + "job_progress_total": total, + "job_progress_message": message, + "commit": commit, + } + ) + + if commit: + try: + self.db.commit() + logger.debug("Updated progress and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + # Rollback to avoid inconsistent state + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated progress successfully for job (no commit)", extra=self.logging_context()) + + def update_status_message(self, message: str, *, commit: bool = True) -> None: + """Update job status message and optionally commit immediately. + + Convenience method for updating the progress message while keeping + current progress values unchanged. Useful for status updates during + long-running operations. By default, commits the update immediately + as a checkpoint operation. + + Args: + message: Human-readable status message describing current activity + commit: Whether to commit message immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save status message update or commit checkpoint + + Examples: + Update with checkpoint (default): + >>> manager.update_status_message("Connecting to external API...") + >>> # Do API work + >>> manager.update_status_message("Processing API response...") + + Update without commit: + >>> manager.update_status_message("Starting...", commit=False) + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. + """ + job_run = self.get_job() + try: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job status message", extra=self.logging_context() + ) + raise JobStateError(f"Failed to update job status message state: {e}") + + self.save_to_context({"job_progress_message": message, "commit": commit}) + + if commit: + try: + self.db.commit() + logger.debug("Updated status message and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated status message successfully for job (no commit)", extra=self.logging_context()) + + def increment_progress(self, amount: int = 1, message: Optional[str] = None, *, commit: bool = True) -> None: + """Increment job progress by a specified amount and optionally commit immediately. + + Convenience method for incrementing progress without needing to track + the current progress value. Useful for batch processing where you want + to increment by 1 for each item processed. By default, commits the progress + update immediately as a checkpoint operation. + + Args: + amount: Amount to increment progress by (default: 1) + message: Optional message to update along with progress + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save progress update or commit checkpoint + + Examples: + Checkpoint-style increments (default - commits immediately): + >>> for item in items: + ... process_item(item) + ... manager.increment_progress() # Increment and commit checkpoint + + Process in batches with checkpoints: + >>> for batch in batches: + ... process_batch(batch) + ... manager.increment_progress(len(batch), f"Processed batch {i}") + + Increment without commit: + >>> manager.increment_progress(1, commit=False) # No commit + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. + """ + job_run = self.get_job() + try: + current = job_run.progress_current or 0 + new_current = current + amount + job_run.progress_current = new_current + if message: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while incrementing job progress", extra=self.logging_context() + ) + raise JobStateError(f"Failed to increment job progress state: {e}") + + self.save_to_context( + { + "job_progress_current": new_current, + "job_progress_total": job_run.progress_total, + "job_progress_message": message or "", + "commit": commit, + } + ) + + if commit: + try: + self.db.commit() + logger.debug("Incremented progress and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Incremented progress successfully for job (no commit)", extra=self.logging_context()) + + def set_progress_total(self, total: int, message: Optional[str] = None, *, commit: bool = True) -> None: + """Update the total progress value and optionally commit immediately. + + Convenience method for updating progress total when it's discovered during + job execution (e.g., after counting records to process). By default, commits + the update immediately as a checkpoint operation. + + Args: + total: New total progress value + message: Optional message to update along with total + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save progress total update or commit checkpoint + + Examples: + Set total with checkpoint (default): + >>> records = load_all_records() # Discovers actual count + >>> manager.set_progress_total(len(records), f"Processing {len(records)} records") + + Set total without commit: + >>> manager.set_progress_total(1000, commit=False) + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. + """ + job_run = self.get_job() + try: + job_run.progress_total = total + if message: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job progress total", extra=self.logging_context() + ) + raise JobStateError(f"Failed to update job progress total state: {e}") + + self.save_to_context({"job_progress_total": total, "job_progress_message": message, "commit": commit}) + + if commit: + try: + self.db.commit() + logger.debug("Updated progress total and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated progress total successfully for job (no commit)", extra=self.logging_context()) + + def is_cancelled(self) -> bool: + """Check if job has been cancelled or should stop execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for checking if the job should stop execution due to + cancellation, pipeline failure, or other termination conditions. Jobs + can use this for graceful shutdown. + + Returns: + bool: True if job should stop execution, False if it can continue + + Raises: + DatabaseConnectionError: Cannot fetch job status from database + + Example: + >>> for item in large_dataset: + ... if manager.is_cancelled(): + ... logger.info("Job cancelled, stopping gracefully") + ... break + ... process_item(item) + """ + return self.get_job_status() in CANCELLED_JOB_STATUSES + + def should_retry(self) -> bool: + """Check if job should be retried based on error type and retry count. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method that implements common retry logic. Checks current + retry count against maximum and evaluates if the error type is retryable. + + Returns: + bool: True if job should be retried, False otherwise + + Raises: + DatabaseConnectionError: Cannot fetch job info from database + + Examples: + >>> try: + ... result = do_work() + ... except NetworkError as e: + ... manager.fail_job(e, result) + ... if manager.should_retry(): + ... manager.retry_job() + ... else: + ... manager.fail_job(e, result) + """ + job_run = self.get_job() + try: + self.save_to_context( + { + "job_retry_count": job_run.retry_count, + "job_max_retries": job_run.max_retries, + "job_failure_category": str(job_run.failure_category) if job_run.failure_category else None, + "job_status": str(job_run.status), + } + ) + + # Check if job is in a failure state (FAILED or ERRORED) + if job_run.status not in (JobStatus.FAILED, JobStatus.ERRORED): + logger.debug("Job cannot be retried: not in a failure state", extra=self.logging_context()) + return False + + # Check retry count + current_retries = job_run.retry_count or 0 + if current_retries >= job_run.max_retries: + logger.debug("Job cannot be retried: max retries reached", extra=self.logging_context()) + return False + + # Check if failure category is retryable + if job_run.failure_category not in RETRYABLE_FAILURE_CATEGORIES: + logger.debug("Job cannot be retried: failure category not retryable", extra=self.logging_context()) + return False + + logger.debug("Job is retryable", extra=self.logging_context()) + return True + + except (AttributeError, TypeError, KeyError, ValueError) as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Unexpected error checking retry eligibility", extra=self.logging_context()) + raise JobStateError(f"Failed to check retry eligibility state: {e}") + + def get_job_status(self) -> JobStatus: # pragma: no cover + """Get current job status for monitoring and debugging. + + Provides non-blocking access to job status without affecting job + execution. Used by decorators and monitoring systems to check job state. + + Returns: + JobStatus: Current job status (QUEUED, RUNNING, SUCCEEDED, + FAILED, etc.). + + Raises: + DatabaseConnectionError: Cannot connect to database, SQL query failed, + or job not found (indicates data inconsistency) + + Examples: + >>> status = manager.get_job_status() + >>> if status == JobStatus.RUNNING: + ... logger.info("Job is currently executing") + """ + return self.get_job().status + + def get_job(self) -> JobRun: + """Get complete job information for monitoring and debugging. + + Retrieves full JobRun instance with all fields populated. Used by + decorators and monitoring systems that need access to job metadata, + progress, error details, or other comprehensive job information. + + Returns: + JobRun: Complete job instance with all fields. + + Raises: + DatabaseConnectionError: Cannot connect to database, SQL query failed, + or job not found (indicates data inconsistency) + + Example: + >>> job = manager.get_job() + >>> if job: + ... logger.info(f"Job {job.urn} progress: {job.progress_current}/{job.progress_total}") + ... if job.error_message: + ... logger.error(f"Job error: {job.error_message}") + """ + try: + return self.db.execute(select(JobRun).where(JobRun.id == self.job_id)).scalar_one() + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Unexpected error fetching job info", extra=self.logging_context()) + raise DatabaseConnectionError(f"Failed to fetch job {self.job_id}: {e}") diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py new file mode 100644 index 000000000..68f56c9d4 --- /dev/null +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -0,0 +1,1290 @@ +"""Pipeline coordination management for job dependencies and status. + +This module provides the PipelineManager class for coordinating pipeline execution, +managing job dependencies, and updating pipeline status. The PipelineManager is +separated from individual job lifecycle management to provide clean separation of concerns. + +Example usage: + >>> from mavedb.worker.lib.pipeline_manager import PipelineManager + >>> + >>> # Initialize with database and Redis connections + >>> pipeline_manager = PipelineManager(db_session, redis_client, pipeline_id=456) + >>> + >>> # Coordinate after a job completes + >>> await pipeline_manager.coordinate_pipeline() + >>> + >>> # Update pipeline status + >>> new_status = pipeline_manager.transition_pipeline_status() + >>> + >>> # Cancel remaining jobs when pipeline fails + >>> cancelled_count = pipeline_manager.cancel_remaining_jobs( + ... reason="Dependency failed" + ... ) + >>> + >>> # Pause/unpause pipeline + >>> was_paused = pipeline_manager.pause_pipeline("Maintenance") + >>> was_unpaused = await pipeline_manager.unpause_pipeline("Complete") + +Error Handling: + The PipelineManager uses the same exception hierarchy as JobManager for consistency: + + - DatabaseConnectionError: Database connectivity issues + - JobStateError: Critical state persistence failures + - PipelineCoordinationError: Pipeline coordination failures +""" + +import logging +from datetime import datetime, timedelta, timezone +from typing import Sequence + +from arq import ArqRedis +from sqlalchemy import and_, func, select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from mavedb.lib.slack import send_slack_message +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.managers import BaseManager, JobManager +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + CANCELLED_JOB_STATUSES, + CANCELLED_PIPELINE_STATUSES, + RUNNING_PIPELINE_STATUSES, + TERMINAL_PIPELINE_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + PipelineCoordinationError, + PipelineStateError, + PipelineTransitionError, +) +from mavedb.worker.lib.managers.utils import ( + arq_job_id, + construct_bulk_cancellation_result, + job_dependency_is_met, + job_should_be_skipped_due_to_unfulfillable_dependency, +) + +logger = logging.getLogger(__name__) + + +class PipelineManager(BaseManager): + """Manages pipeline coordination and job dependencies with atomic operations. + + The PipelineManager provides a focused interface for coordinating pipeline execution + without coupling to individual job lifecycle management. It handles dependency + checking, status updates, and pipeline-wide operations like cancellation. + + Key Features: + - Atomic pipeline status transitions with rollback on failure + - Dependency-based job enqueueing with race condition prevention + - Pipeline-wide cancellation with proper error handling + - Separation from individual job lifecycle management + - Consistent exception handling and logging + + Usage Patterns: + + Pipeline coordination after job completion: + >>> manager = PipelineManager(db, redis, pipeline_id=123) + >>> await manager.coordinate_pipeline() + + Manual pipeline operations: + >>> # Update pipeline status based on current job states + >>> new_status = manager.transition_pipeline_status() + >>> + >>> # Cancel remaining jobs + >>> cancelled_count = manager.cancel_remaining_jobs( + ... reason="Manual cancellation" + ... ) + >>> + >>> # Pause pipeline execution + >>> was_paused = manager.pause_pipeline( + ... reason="System maintenance" + ... ) + >>> + >>> # Resume pipeline execution + >>> was_unpaused = await manager.unpause_pipeline( + ... reason="Maintenance complete" + ... ) + + Dependency management: + >>> # Check if a job can be enqueued + >>> can_run = manager.can_enqueue_job(job) + >>> + >>> # Enqueue all ready jobs (independent and dependent) + >>> await manager.enqueue_ready_jobs() + + Pipeline monitoring: + >>> # Get detailed progress statistics + >>> progress = manager.get_pipeline_progress() + >>> print(f"Pipeline {progress['completion_percentage']:.1f}% complete") + >>> + >>> # Get job counts by status + >>> counts = manager.get_job_counts_by_status() + >>> print(f"Failed jobs: {counts.get(JobStatus.FAILED, 0)}") + + Job retry and pipeline restart: + >>> # Retry all failed jobs + >>> retried_count = await manager.retry_failed_jobs() + >>> + >>> # Restart entire pipeline + >>> restarted = await manager.restart_pipeline("Fixed issue") + + Thread Safety: + PipelineManager is not thread-safe. Each instance should be used by a single + worker thread and should not be shared across concurrent operations. + """ + + def __init__(self, db: Session, redis: ArqRedis, pipeline_id: int): + """Initialize pipeline manager with database and Redis connections. + + Args: + db: SQLAlchemy database session for job and pipeline queries + redis: ARQ Redis client for job queue operations. Note that although the Redis + client is optional for base managers, PipelineManager requires it for + job coordination. + pipeline_id: ID of the pipeline this manager instance will coordinate + + Raises: + DatabaseConnectionError: Cannot connect to database + + Example: + >>> db_session = get_database_session() + >>> redis_client = get_arq_redis_client() + >>> manager = PipelineManager(db_session, redis_client, pipeline_id=456) + """ + super().__init__(db, redis) + self.pipeline_id = pipeline_id + self.get_pipeline() # Validate pipeline exists on init + + async def start_pipeline(self, coordinate: bool = True) -> None: + """Start the pipeline + + Entry point to start pipeline execution. Sets pipeline status to RUNNING + and enqueues independent jobs using coordinate pipeline if coordinate is True. + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue ready jobs + + Example: + >>> # Start a new pipeline + >>> await pipeline_manager.start_pipeline() + """ + status = self.get_pipeline_status() + + if status != PipelineStatus.CREATED: + logger.error( + f"Pipeline {self.pipeline_id} is in a non-created state (current status: {status}) and may not be started" + ) + raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is in state {status} and may not be started") + + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} started successfully") + + # Allow controllable coordination logic. By default, we want to coordinate + # immediately after starting to enqueue independent jobs. However, if a job + # has already been enqueued and is beginning execution and starts the pipeline, + # as a result of its job management decorator, we want to skip coordination here + # so we do not double-enqueue jobs. + if coordinate: + await self.coordinate_pipeline() + + async def coordinate_pipeline(self) -> None: + """Coordinate pipeline after a job completes. + + This is the main coordination entry point called after jobs complete. + It updates pipeline status and enqueues ready jobs or cancels remaining jobs + based on the completion result. The method operates on the entire pipeline + state rather than tracking individual job completions. + + Raises: + DatabaseConnectionError: Cannot query job or pipeline info + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue jobs or cancel remaining jobs + JobStateError: Critical job state persistence failure + JobTransitionError: Job cannot be transitioned from current state to new state + + + Example: + >>> # Called after successful job completion + >>> await pipeline_manager.coordinate_pipeline() + """ + new_status = self.transition_pipeline_status() + self.db.flush() + + if new_status in CANCELLED_PIPELINE_STATUSES: + self.cancel_remaining_jobs(reason="Pipeline failed or cancelled") + + # Only enqueue new jobs if pipeline is running + if new_status in RUNNING_PIPELINE_STATUSES: + await self.enqueue_ready_jobs() + + # After enqueuing jobs, re-evaluate pipeline status in case it changed. + # We only expect the status to change if jobs with unsatisfiable dependencies were skipped. + self.transition_pipeline_status() + self.db.flush() + + def transition_pipeline_status(self) -> PipelineStatus: + """Update pipeline status based on current job states. + + Analyzes the status distribution of all jobs in the pipeline to determine + the appropriate pipeline status. Updates pipeline status and finished_at + timestamp when the status changes to a terminal state. + + Returns: + PipelineStatus: The current pipeline status after update. If unchanged, the + previous status is returned. + + Raises: + DatabaseConnectionError: Cannot query job statuses or pipeline info + JobStateError: Cannot update pipeline status or corrupted job data + + Status Logic: + - FAILED: Any job ERRORED, or a non-leaf job FAILED + - RUNNING: Any job is RUNNING or QUEUED (and no non-leaf FAILED/ERRORED) + - SUCCEEDED: All jobs SUCCEEDED + - PARTIAL: All jobs terminal with mix of SUCCEEDED/FAILED(leaf)/SKIPPED/CANCELLED + - CANCELLED: All remaining jobs are CANCELLED or SKIPPED with no SUCCEEDED + - No Change: If pipeline is PAUSED, CANCELLED, or has no jobs: status remains unchanged + + Example: + >>> new_status = pipeline_manager.transition_pipeline_status() + >>> print(f"Pipeline status is now {new_status}") + """ + pipeline = self.get_pipeline() + status_counts = self.get_job_counts_by_status() + + old_status = pipeline.status + try: + total_jobs = sum(status_counts.values()) + if old_status in TERMINAL_PIPELINE_STATUSES: + logger.debug(f"Pipeline {self.pipeline_id} is in terminal status {old_status}; skipping update") + return old_status # No change from terminal state + + if old_status == PipelineStatus.PAUSED: + logger.debug(f"Pipeline {self.pipeline_id} is paused; skipping status update") + return old_status # No change from paused state + + # The pipeline must not be in a terminal state (from above), but has no jobs. Consider it complete. + if total_jobs == 0: + logger.debug(f"No jobs found in pipeline {self.pipeline_id} - considering pipeline complete") + + self.set_pipeline_status(PipelineStatus.SUCCEEDED) + return PipelineStatus.SUCCEEDED + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Invalid job status data for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Corrupted job status data for pipeline {self.pipeline_id}: {e}") + + # The pipeline is not in a terminal state and has jobs - determine new status + try: + new_status = self._compute_new_status(old_status, status_counts, total_jobs) + + if pipeline.status != new_status: + self.set_pipeline_status(new_status) + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Object manipulation failed updating pipeline status for {self.pipeline_id}: {e}") + raise PipelineStateError(f"Failed to update pipeline status for {self.pipeline_id}: {e}") + + if new_status != old_status: + logger.info(f"Pipeline {self.pipeline_id} status successfully updated to {new_status} from {old_status}") + else: + logger.debug(f"No status change for pipeline {self.pipeline_id} (remains {old_status})") + + return new_status + + def _compute_new_status( + self, + old_status: PipelineStatus, + status_counts: dict[JobStatus, int], + total_jobs: int, + ) -> PipelineStatus: + """Determine the new pipeline status from the current job status distribution. + + Called by transition_pipeline_status after guard clauses (terminal, paused, no-jobs) + have been checked. Dispatches to _compute_status_with_leaf_failures when all + failed jobs are leaves, allowing sibling jobs to continue running. + + Args: + old_status: The pipeline's current status (used as fallback when pending jobs exist). + status_counts: Mapping of JobStatus to job count for this pipeline. + total_jobs: Total number of jobs in the pipeline (sum of status_counts). + + Returns: + PipelineStatus: The new pipeline status. + """ + if status_counts.get(JobStatus.ERRORED, 0) > 0: + return PipelineStatus.FAILED + + if status_counts.get(JobStatus.FAILED, 0) > 0: + failed_jobs = self.get_failed_jobs() + if any(not self.is_leaf_job(job) for job in failed_jobs): + return PipelineStatus.FAILED + # All failures are leaf failures — delegate to leaf-aware logic so that + # sibling jobs can continue running rather than failing the pipeline. + return self._compute_status_with_leaf_failures(old_status, status_counts, total_jobs) + + if status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: + return PipelineStatus.RUNNING + + # Pending jobs still exist, don't change the status. + # These might be picked up soon, or they may be proactively + # skipped later if dependencies cannot be met. + # + # Although there is a tension between having only pending + # and succeeded jobs (which would suggest partial/succeeded), + # we leave the status as-is until jobs are actually processed. + # + # *A pipeline with a terminal status must not have pending jobs* + if status_counts.get(JobStatus.PENDING, 0) > 0: + return old_status + + if status_counts.get(JobStatus.SUCCEEDED, 0) > 0: + succeeded = status_counts.get(JobStatus.SUCCEEDED, 0) + failed = status_counts.get(JobStatus.FAILED, 0) + skipped = status_counts.get(JobStatus.SKIPPED, 0) + cancelled = status_counts.get(JobStatus.CANCELLED, 0) + + if succeeded == total_jobs: + logger.debug(f"All jobs succeeded in pipeline {self.pipeline_id}") + return PipelineStatus.SUCCEEDED + + if (succeeded + failed + skipped + cancelled) == total_jobs: + # All FAILED jobs here are leaves (non-leaf FAILED would have returned FAILED above) + logger.debug(f"Pipeline {self.pipeline_id} completed partially: {status_counts}") + return PipelineStatus.PARTIAL + + logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + send_slack_message(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + return PipelineStatus.PARTIAL + + return PipelineStatus.CANCELLED + + def _compute_status_with_leaf_failures( + self, + old_status: PipelineStatus, + status_counts: dict[JobStatus, int], + total_jobs: int, + ) -> PipelineStatus: + """Determine pipeline status when all failed jobs are leaves (no dependents). + + Leaf failures do not fail the pipeline. The pipeline stays RUNNING while sibling + jobs are still active, and settles to PARTIAL (not FAILED) once all jobs are + terminal. This mirrors the no-failure path in _compute_new_status, but skips the + SUCCEEDED case since at least one FAILED job is present. + + Args: + old_status: The pipeline's current status (used as fallback when pending jobs exist). + status_counts: Mapping of JobStatus to job count for this pipeline. + total_jobs: Total number of jobs in the pipeline (sum of status_counts). + + Returns: + PipelineStatus: RUNNING, old_status (no change), PARTIAL, or CANCELLED. + """ + if status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: + return PipelineStatus.RUNNING + + if status_counts.get(JobStatus.PENDING, 0) > 0: + return old_status + + if status_counts.get(JobStatus.SUCCEEDED, 0) > 0: + succeeded = status_counts.get(JobStatus.SUCCEEDED, 0) + failed = status_counts.get(JobStatus.FAILED, 0) + skipped = status_counts.get(JobStatus.SKIPPED, 0) + cancelled = status_counts.get(JobStatus.CANCELLED, 0) + + if (succeeded + failed + skipped + cancelled) == total_jobs: + logger.debug(f"Pipeline {self.pipeline_id} completed partially with leaf failures: {status_counts}") + return PipelineStatus.PARTIAL + + logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + send_slack_message(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + return PipelineStatus.PARTIAL + + return PipelineStatus.CANCELLED + + async def enqueue_ready_jobs(self) -> None: + """Find and enqueue all jobs that are ready to run. + + Identifies pending jobs in the pipeline (including retries) whose dependencies + are satisfied, updates their status to QUEUED, and enqueues them in ARQ. + This handles both independent jobs and jobs with dependencies, as well as + jobs that have been prepared for retry. + + Does not enqueue jobs if the pipeline is paused. + + Raises: + DatabaseConnectionError: Cannot query pending jobs or job dependencies + JobStateError: Cannot update job state to QUEUED (critical failure) + PipelineCoordinationError: One or more jobs failed to enqueue in ARQ + + Process: + 1. Ensure pipeline is running (skip enqueues if not) + 2. Query all PENDING jobs in pipeline (includes retries) + 3. Check dependency requirements for each job + 4. For jobs ready to run: flush status change and enqueue in ARQ + + Note: + - This method handles both independent and dependent jobs uniformly - + any job in PENDING status that meets its dependency requirements + (including jobs with no dependencies) will be enqueued, unless the + pipeline is paused. + + Examples: + Basic usage: + >>> # Enqueue all ready jobs in the pipeline + >>> await pipeline_manager.enqueue_ready_jobs() + + Handling coordination errors: + >>> try: + ... await pipeline_manager.enqueue_ready_jobs() + ... except PipelineCoordinationError as e: + ... logger.error(f"Failed to enqueue some jobs: {e}") + ... # Optionally cancel pipeline or take other recovery actions + """ + current_status = self.get_pipeline_status() + if current_status not in RUNNING_PIPELINE_STATUSES: + logger.error(f"Pipeline {self.pipeline_id} is not running - skipping job enqueue") + raise PipelineStateError( + f"Pipeline {self.pipeline_id} is in status {current_status} and cannot enqueue jobs" + ) + + jobs_to_queue: list[JobRun] = [] + for job in self.get_pending_jobs(): + job_manager = JobManager(self.db, self.redis, job.id) + + # Attempt to enqueue the job if dependencies are met + if self.can_enqueue_job(job): + job_manager.prepare_queue() + jobs_to_queue.append(job) + continue + + should_skip, reason = self.should_skip_job_due_to_dependencies(job) + if should_skip: + job_manager.update_status_message(f"Job skipped: {reason}") + job_manager.skip_job( + result=JobExecutionOutcome.skipped(data={"reason": reason, "timestamp": datetime.now().isoformat()}) + ) + logger.info(f"Skipped job {job.urn} due to unreachable dependencies: {reason}") + continue + + # Commit status changes (QUEUED and skipped) before the async Redis + # enqueue loop. This releases PostgreSQL row-level locks held by flush(). + # Without committing here, a downstream job started by ARQ during one of + # the await yields in the enqueue loop could attempt a synchronous UPDATE + # on a locked row, blocking the event loop and deadlocking the worker + # (psycopg2 is synchronous, so the blocked UPDATE freezes asyncio). + self.db.commit() + + if not jobs_to_queue: + logger.debug(f"No ready jobs to enqueue in pipeline {self.pipeline_id}") + return + + successfully_enqueued = [] + for job in jobs_to_queue: + await self._enqueue_in_arq(job, is_retry=job.retry_count > 0) + successfully_enqueued.append(job.urn) + logger.info(f"Successfully enqueued job {job.urn}") + + logger.info(f"Successfully enqueued {len(successfully_enqueued)} jobs: {successfully_enqueued}.") + + def cancel_remaining_jobs(self, reason: str = "Pipeline cancelled") -> None: + """Cancel all remaining jobs in the pipeline when the pipeline fails. + + Finds all active pipeline jobs and marks them as SKIPPED or CANCELLED + to prevent further execution when the pipeline has failed. Records the + cancellation reason and timestamp for audit purposes. + + Args: + reason: Human-readable reason for cancellation + + Raises: + DatabaseConnectionError: Cannot query jobs to cancel + PipelineCoordinationError: Failed to cancel one or more jobs + """ + remaining_jobs = self.get_active_jobs() + if not remaining_jobs: + logger.debug(f"No jobs to cancel in pipeline {self.pipeline_id}") + else: + bulk_cancellation_result = construct_bulk_cancellation_result(reason) + + for job in remaining_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + + # Skip PENDING jobs, cancel RUNNING/QUEUED jobs + if job_manager.get_job_status() == JobStatus.PENDING: + job_manager.skip_job(result=bulk_cancellation_result) + logger.debug(f"Skipped job {job.urn}: {reason}") + else: + job_manager.cancel_job(result=bulk_cancellation_result) + logger.debug(f"Cancelled job {job.urn}: {reason}") + + logger.info(f"Cancelled all remaining jobs in pipeline {self.pipeline_id}") + + async def cancel_pipeline(self, reason: str = "Pipeline cancelled") -> None: + """Cancel the entire pipeline and all remaining jobs. + + Sets the pipeline status to CANCELLED and cancels all PENDING and QUEUED + jobs in the pipeline. Records the cancellation reason for audit purposes. + + Args: + reason: Human-readable reason for pipeline cancellation + + Raises: + DatabaseConnectionError: Cannot query or update pipeline/jobs + PipelineCoordinationError: Failed to cancel pipeline or jobs + + Example: + >>> # Cancel a running pipeline due to external event + >>> await pipeline_manager.cancel_pipeline( + ... reason="User requested cancellation" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status in TERMINAL_PIPELINE_STATUSES: + logger.error(f"Pipeline {self.pipeline_id} is already in terminal status {current_status}") + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be cancelled" + ) + + self.set_pipeline_status(PipelineStatus.CANCELLED) + self.db.flush() + logger.info(f"Pipeline {self.pipeline_id} cancelled: {reason}") + + await self.coordinate_pipeline() + + async def pause_pipeline(self, reason: str = "Pipeline paused") -> None: + """Pause the pipeline to stop further job execution. + + Sets the pipeline status to PAUSED, preventing new jobs from being enqueued + while allowing currently running jobs to complete. This provides a way to + temporarily halt pipeline execution without cancelling remaining jobs. + + Args: + reason: Human-readable reason for pausing the pipeline + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + JobStateError: Cannot update pipeline state + PipelineTransitionError: Pipeline cannot be paused due to current state + + Example: + >>> # Pause pipeline for maintenance + >>> was_paused = manager.pause_pipeline( + ... reason="System maintenance" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status in TERMINAL_PIPELINE_STATUSES: + logger.error(f"Pipeline {self.pipeline_id} cannot be paused (current status: {current_status})") + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be paused" + ) + + if current_status == PipelineStatus.PAUSED: + logger.error(f"Pipeline {self.pipeline_id} is already paused") + raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is already paused") + + self.set_pipeline_status(PipelineStatus.PAUSED) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} paused (was {current_status}): {reason}") + await self.coordinate_pipeline() + + async def unpause_pipeline(self, reason: str = "Pipeline unpaused") -> None: + """Unpause the pipeline and resume job execution. + + Sets the pipeline status from PAUSED back to RUNNING and enqueues any + jobs that are ready to run. This resumes normal pipeline execution + after a pause. + + Args: + reason: Human-readable reason for unpausing the pipeline + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue ready jobs after unpause + + Example: + >>> # Resume pipeline after maintenance + >>> was_unpaused = await manager.unpause_pipeline( + ... reason="Maintenance complete" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status != PipelineStatus.PAUSED: + logger.error( + f"Pipeline {self.pipeline_id} is not paused (current status: {current_status}) and may not be unpaused" + ) + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is not paused (current status: {current_status}) and may not be unpaused" + ) + + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} unpaused (was {current_status}): {reason}") + await self.coordinate_pipeline() + + async def restart_pipeline(self) -> None: + """Restart the entire pipeline from the beginning. + + Resets ALL jobs in the pipeline to PENDING status, resets pipeline state to RUNNING, and re-enqueues + independent jobs. This is useful for recovering from pipeline-wide issues. + + Raises: + PipelineCoordinationError: If restart operations fail + DatabaseConnectionError: If database operations fail + + Example: + >>> success = await manager.restart_pipeline("Fixed configuration issue") + >>> print(f"Pipeline restart: {'successful' if success else 'failed'}") + """ + all_jobs = self.get_all_jobs() + if not all_jobs: + logger.debug(f"No jobs found for pipeline {self.pipeline_id} restart") + return + + # Reset all jobs to PENDING status + for job in all_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + job_manager.reset_job() + + # Reset pipeline status to created + self.set_pipeline_status(PipelineStatus.CREATED) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} reset for restart successfully") + await self.start_pipeline() + + def can_enqueue_job(self, job: JobRun) -> bool: + """Check if a job can be enqueued based on dependency requirements. + + Validates that all job dependencies are satisfied according to their + dependency types before allowing enqueue. Prevents premature execution + of jobs that depend on incomplete predecessors. + + Args: + job: JobRun instance to check dependencies for + + Returns: + bool: True if all dependencies are satisfied and job can be enqueued, + False if dependencies are still pending + + Raises: + DatabaseConnectionError: Cannot query job dependencies + JobStateError: Corrupted dependency data detected + + Dependency Types: + - SUCCESS_REQUIRED: Dependent job must have SUCCEEDED status + - COMPLETION_REQUIRED: Dependent job must be SUCCEEDED or FAILED + """ + for dependency, dependent_job in self.get_dependencies_for_job(job): + try: + if not job_dependency_is_met( + dependency_type=dependency.dependency_type, + dependent_job_status=dependent_job.status, + ): + logger.debug(f"Job {job.urn} cannot be enqueued; dependency on job {dependent_job.urn} not met") + return False + + except (AttributeError, KeyError, TypeError, ValueError) as e: + logger.debug(f"Invalid dependency data detected for job {job.id}: {e}") + raise PipelineStateError(f"Corrupted dependency data during enqueue check for job {job.id}: {e}") + + logger.debug(f"All dependencies satisfied for job {job.urn}; ready to enqueue") + return True + + def should_skip_job_due_to_dependencies(self, job: JobRun) -> tuple[bool, str]: + """Check if a job's dependencies are unsatisfiable and the job should be skipped. + + Validates whether a job's dependencies can still be met based on the + current status of dependent jobs. This helps identify jobs that should + be skipped because their dependencies are in terminal non-success states. + + Args: + job: JobRun instance to check dependencies for + + Returns: + tuple[bool, str]: (True, reason) if dependencies cannot be met and job + should be skipped, (False, "") if dependencies may + still be satisfied + + Raises: + DatabaseConnectionError: Cannot query job dependencies + PipelineStateError: Critical state persistence failure + + Notes: + - A job is considered unreachable if any of its dependencies that + require SUCCESS have FAILED, SKIPPED, or CANCELLED status. + - A job is considered unreachable if any of its dependencies that + require COMPLETION have SKIPPED or CANCELLED status. + + Examples: + Basic usage: + >>> should_skip, reason = manager.should_skip_job_due_to_dependencies(job) + >>> if should_skip: + ... print(f"Job should be skipped: {reason}") + >>> else: + ... print("Job dependencies may still be satisfied") + """ + for dependency, dep_job in self.get_dependencies_for_job(job): + try: + should_skip, reason = job_should_be_skipped_due_to_unfulfillable_dependency( + dependency_type=dependency.dependency_type, + dependent_job_status=dep_job.status, + ) + + if should_skip: + logger.debug(f"Job {job.urn} should be skipped due to dependency on job {dep_job.urn}: {reason}") + # guaranteed to be str if should_skip is True + return True, reason # type: ignore + + except (AttributeError, KeyError, TypeError, ValueError) as e: + logger.debug(f"Invalid dependency data detected for job {job.id}: {e}") + raise PipelineStateError(f"Corrupted dependency data during skip check for job {job.id}: {e}") + + logger.debug(f"Job {job.urn} dependencies may still be satisfied; not skipping") + return False, "" + + async def retry_failed_jobs(self) -> None: + """Retry all failed jobs in the pipeline. + + Resets failed jobs to PENDING status and re-enqueues them for execution. + Only affects jobs with FAILED status; other jobs remain unchanged. + + Raises: + PipelineCoordinationError: If job retry fails + DatabaseConnectionError: If database operations fail + + Example: + >>> await manager.retry_failed_jobs() + >>> print("Successfully retried failed jobs") + """ + failed_jobs = self.get_failed_jobs() + if not failed_jobs: + logger.debug(f"No failed jobs found for pipeline {self.pipeline_id}") + return + + for job in failed_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + await job_manager.prepare_retry() + + # Ensure the pipeline status is set to running so jobs are picked up + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + await self.coordinate_pipeline() + + async def retry_unsuccessful_jobs(self) -> None: + """Retry all unsuccessful jobs in the pipeline. + + Resets unsuccessful jobs (CANCELLED, SKIPPED, FAILED) to PENDING status + and re-enqueues them for execution. This is useful for recovering from + partial failures or interruptions. + + Raises: + PipelineCoordinationError: If job retry fails + DatabaseConnectionError: If database operations fail + + Example: + >>> await manager.retry_unsuccessful_jobs() + >>> print("Successfully retried unsuccessful jobs") + """ + unsuccessful_jobs = self.get_unsuccessful_jobs() + if not unsuccessful_jobs: + logger.debug(f"No unsuccessful jobs found for pipeline {self.pipeline_id}") + return + + for job in unsuccessful_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + await job_manager.prepare_retry() + + # Ensure the pipeline status is set to running so jobs are picked up + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + await self.coordinate_pipeline() + + async def retry_pipeline(self) -> None: + """Retry all unsuccessful jobs in the pipeline. + + Convenience method to retry all jobs that did not complete successfully, + including CANCELLED, SKIPPED, and FAILED jobs. Resets their status to PENDING + and re-enqueues them for execution. + + This is equivalent to calling `retry_unsuccessful_jobs` but provides a clearer + semantic for pipeline-level retries. + """ + await self.retry_unsuccessful_jobs() + + def get_jobs_by_status(self, status: list[JobStatus]) -> Sequence[JobRun]: + """Get all jobs in the pipeline with a specific status. + + Args: + status: JobStatus to filter jobs by + + Returns: + Sequence[JobRun]: List of jobs with the specified status ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> running_jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + >>> print(f"Found {len(running_jobs)} running jobs") + """ + try: + return ( + self.db.execute( + select(JobRun) + .where(and_(JobRun.pipeline_id == self.pipeline_id, JobRun.status.in_(status))) + .order_by(JobRun.created_at) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug( + f"Database query failed getting jobs with status {status} for pipeline {self.pipeline_id}: {e}" + ) + raise DatabaseConnectionError(f"Failed to get jobs with status {status}: {e}") + + def get_pending_jobs(self) -> Sequence[JobRun]: + """Get all PENDING jobs in the pipeline. + + Convenience method for fetching all pending jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.PENDING]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of pending jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> pending_jobs = manager.get_pending_jobs() + >>> print(f"Found {len(pending_jobs)} pending jobs") + """ + return self.get_jobs_by_status([JobStatus.PENDING]) + + def get_running_jobs(self) -> Sequence[JobRun]: + """Get all RUNNING jobs in the pipeline. + + Convenience method for fetching all running jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.RUNNING]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of running jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> running_jobs = manager.get_running_jobs() + >>> print(f"Found {len(running_jobs)} running jobs") + """ + return self.get_jobs_by_status([JobStatus.RUNNING]) + + def get_active_jobs(self) -> Sequence[JobRun]: + """Get all active jobs in the pipeline. + + Convenience method for fetching all active jobs. This is equivalent + to calling get_jobs_by_status(ACTIVE_JOB_STATUSES) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of remaining jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> active_jobs = manager.get_active_jobs() + >>> print(f"Found {len(active_jobs)} active jobs") + """ + return self.get_jobs_by_status(ACTIVE_JOB_STATUSES) + + def get_failed_jobs(self) -> Sequence[JobRun]: + """Get all failed jobs in the pipeline. + + Convenience method for fetching all failed jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.FAILED]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of failed jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> failed_jobs = manager.get_failed_jobs() + >>> print(f"Found {len(failed_jobs)} failed jobs for potential retry") + """ + return self.get_jobs_by_status([JobStatus.FAILED]) + + def get_unsuccessful_jobs(self) -> Sequence[JobRun]: + """Get all unsuccessful jobs in the pipeline. + + Convenience method for fetching all unsuccessful (but terminated) jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED]) + but provides clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of unsuccessful jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> unsuccessful_jobs = manager.get_unsuccessful_jobs() + >>> print(f"Found {len(unsuccessful_jobs)} unsuccessful jobs") + """ + return self.get_jobs_by_status(CANCELLED_JOB_STATUSES) + + def get_all_jobs(self) -> Sequence[JobRun]: + """Get all jobs in the pipeline regardless of status. + + Returns: + Sequence[JobRun]: List of all jobs in pipeline ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Examples: + >>> all_jobs = manager.get_all_jobs() + >>> print(f"Total jobs in pipeline: {len(all_jobs)}") + """ + try: + return ( + self.db.execute( + select(JobRun).where(JobRun.pipeline_id == self.pipeline_id).order_by(JobRun.created_at) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting all jobs for pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get all jobs: {e}") + + def get_dependencies_for_job(self, job: JobRun) -> Sequence[tuple[JobDependency, JobRun]]: + """Get all dependencies for a specific job. + + Args: + job: JobRun instance to fetch dependencies for + + Returns: + Sequence[Row[tuple[JobDependency, JobRun]]]: List of dependencies with associated JobRun instances + + Raises: + DatabaseConnectionError: Cannot query job dependencies + + Examples: + >>> dependencies = manager.get_dependencies_for_job(job) + >>> for dependency, dep_job in dependencies: + ... print(f"Job {job.urn} depends on job {dep_job.urn} with dependency type {dependency.dependency_type}") + """ + try: + # Although the returned type wraps tuples in a row, the contents are still accessible as tuples. + # This allows unpacking as shown in the example, and we can ignore the type checker warning so + # callers can have access to the simpler interface. + return self.db.execute( + select(JobDependency, JobRun) + .join(JobRun, JobDependency.depends_on_job_id == JobRun.id) + .where(JobDependency.id == job.id) + ).all() # type: ignore + except SQLAlchemyError as e: + logger.debug(f"SQL query failed for dependencies of job {job.id}: {e}") + raise DatabaseConnectionError(f"Failed to get job dependencies for job {job.id}: {e}") + + def get_dependents_for_job(self, job: JobRun) -> Sequence[JobRun]: + """Get all jobs in this pipeline that depend on the given job. + + Args: + job: The upstream JobRun to find dependents for + + Returns: + Sequence[JobRun]: Jobs in this pipeline that list job as a dependency + + Raises: + DatabaseConnectionError: Cannot query job dependency information + """ + try: + return ( + self.db.execute( + select(JobRun) + .join(JobDependency, JobDependency.id == JobRun.id) + .where( + JobDependency.depends_on_job_id == job.id, + JobRun.pipeline_id == self.pipeline_id, + ) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug(f"SQL query failed for dependents of job {job.id}: {e}") + raise DatabaseConnectionError(f"Failed to get job dependents for job {job.id}: {e}") + + def is_leaf_job(self, job: JobRun) -> bool: + """Return True if no other job in this pipeline depends on job. + + Args: + job: JobRun to check + + Returns: + bool: True if job has no dependents in this pipeline + """ + return len(self.get_dependents_for_job(job)) == 0 + + def get_failed_leaf_jobs(self) -> list[JobRun]: + """Get all failed jobs in this pipeline that are leaves (no dependents). + + Returns: + list[JobRun]: Failed jobs with no dependents in this pipeline + + Raises: + DatabaseConnectionError: Cannot query job or dependency information + """ + try: + non_leaf_ids = set( + self.db.execute( + select(JobDependency.depends_on_job_id) + .join(JobRun, JobDependency.id == JobRun.id) + .where(JobRun.pipeline_id == self.pipeline_id) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug(f"SQL query failed getting non-leaf job IDs for pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get non-leaf job IDs for pipeline {self.pipeline_id}: {e}") + + return [job for job in self.get_failed_jobs() if job.id not in non_leaf_ids] + + def get_pipeline(self) -> Pipeline: + """Get the Pipeline instance for this manager. + + Returns: + Pipeline: The Pipeline instance associated with this manager + + Raises: + DatabaseConnectionError: Cannot query pipeline information + + Examples: + >>> pipeline = manager.get_pipeline() + >>> print(f"Pipeline ID: {pipeline.id}, Status: {pipeline.status}") + """ + + try: + return self.db.execute(select(Pipeline).where(Pipeline.id == self.pipeline_id)).scalar_one() + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get pipeline {self.pipeline_id}: {e}") + + def get_job_counts_by_status(self) -> dict[JobStatus, int]: + """Get count of jobs by status for monitoring. + + Returns a simple dictionary mapping job statuses to their counts, + useful for dashboard displays and monitoring systems. + + Returns: + dict[JobStatus, int]: Dictionary mapping JobStatus to count + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> counts = manager.get_job_counts_by_status() + >>> print(f"Failed jobs: {counts.get(JobStatus.FAILED, 0)}") + """ + try: + job_counts = self.db.execute( + select(JobRun.status, func.count(JobRun.id)) + .where(JobRun.pipeline_id == self.pipeline_id) + .group_by(JobRun.status) + ).all() + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting job counts for pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get job counts for pipeline {self.pipeline_id}: {e}") + + return {status: count for status, count in job_counts} + + def get_pipeline_progress(self) -> dict: + """Get detailed pipeline progress statistics. + + Provides comprehensive pipeline progress information including job counts, + completion percentage, duration, and estimated completion time. + + Returns: + dict: Pipeline progress statistics with the following keys: + - total_jobs: Total number of jobs in pipeline + - completed_jobs: Number of jobs in terminal states + - successful_jobs: Number of successfully completed jobs + - failed_jobs: Number of failed jobs + - running_jobs: Number of currently running jobs + - pending_jobs: Number of jobs waiting to run + - completion_percentage: Percentage of jobs completed (0-100) + - duration: Time pipeline has been running (in seconds) + - status_counts: Dictionary of job counts by status + + Raises: + DatabaseConnectionError: Cannot query pipeline or job information + + Example: + >>> progress = manager.get_pipeline_progress() + >>> print(f"Pipeline {progress['completion_percentage']:.1f}% complete") + """ + status_counts = self.get_job_counts_by_status() + pipeline = self.get_pipeline() + + try: + total_jobs = sum(status_counts.values()) + + if total_jobs == 0: + return { + "total_jobs": 0, + "completed_jobs": 0, + "successful_jobs": 0, + "failed_jobs": 0, + "running_jobs": 0, + "pending_jobs": 0, + "completion_percentage": 100.0, + "duration": 0, + "status_counts": {}, + } + + # Calculate progress metrics + successful_jobs = status_counts.get(JobStatus.SUCCEEDED, 0) + failed_jobs = status_counts.get(JobStatus.FAILED, 0) + running_jobs = status_counts.get(JobStatus.RUNNING, 0) + status_counts.get(JobStatus.QUEUED, 0) + pending_jobs = status_counts.get(JobStatus.PENDING, 0) + skipped_jobs = status_counts.get(JobStatus.SKIPPED, 0) + cancelled_jobs = status_counts.get(JobStatus.CANCELLED, 0) + + completed_jobs = successful_jobs + failed_jobs + skipped_jobs + cancelled_jobs + completion_percentage = (completed_jobs / total_jobs) * 100 if total_jobs > 0 else 0 + + # Calculate duration + duration = 0 + if pipeline.created_at: + # `pipeline.created_at` is stored as a timezone-aware timestamp (TIMESTAMPTZ), + # so compare against a timezone-aware "now" to avoid mixing naive/aware datetimes. + end_time = pipeline.finished_at or datetime.now(timezone.utc) + duration = int((end_time - pipeline.created_at).total_seconds()) + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Invalid data detected calculating progress for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Corrupted data during progress calculation for pipeline {self.pipeline_id}: {e}") + + return { + "total_jobs": total_jobs, + "completed_jobs": completed_jobs, + "successful_jobs": successful_jobs, + "failed_jobs": failed_jobs, + "running_jobs": running_jobs, + "pending_jobs": pending_jobs, + "completion_percentage": completion_percentage, + "duration": duration, + "status_counts": status_counts, + } + + def get_pipeline_status(self) -> PipelineStatus: + """Get the current status of the pipeline. + + Returns: + PipelineStatus: Current status of the pipeline + + Raises: + DatabaseConnectionError: Cannot query pipeline information + + Example: + >>> status = manager.get_pipeline_status() + >>> print(f"Pipeline status: {status}") + """ + return self.get_pipeline().status + + def set_pipeline_status(self, new_status: PipelineStatus) -> None: + """Set the status of the pipeline. + + Args: + new_status: PipelineStatus enum value to set the pipeline to + + Raises: + DatabaseConnectionError: Cannot query or update pipeline information + PipelineStateError: Cannot update pipeline status + + Example: + >>> manager.set_pipeline_status(PipelineStatus.PAUSED) + >>> print("Pipeline paused") + + Note: + This method does not perform any validation on the status transition, + nor does it attempt to coordinate the pipeline after the status change + or flush the change to the database. + """ + pipeline = self.get_pipeline() + try: + pipeline.status = new_status + + # Ensure finished_at is set/cleared appropriately + if new_status in TERMINAL_PIPELINE_STATUSES: + pipeline.finished_at = datetime.now() + else: + pipeline.finished_at = None + + # Ensure started_at is set/cleared appropriately + if new_status == PipelineStatus.CREATED: + pipeline.started_at = None + elif new_status == PipelineStatus.RUNNING and pipeline.started_at is None: + pipeline.started_at = datetime.now() + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Object manipulation failed setting status for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Failed to set pipeline status for {self.pipeline_id}: {e}") + + logger.info(f"Pipeline {self.pipeline_id} status set to {new_status}") + + async def _enqueue_in_arq(self, job: JobRun, is_retry: bool) -> None: + """Enqueue a job in ARQ with proper error handling and retry delay. + + Args: + job: JobRun instance to enqueue + is_retry: Whether this is a retry attempt + + Raises: + PipelineCoordinationError: If ARQ enqueuing fails + """ + if not self.redis: + logger.error(f"Redis client is not configured for PipelineManager; cannot enqueue job {job.urn}") + raise PipelineCoordinationError("Redis client is not configured for job enqueueing; cannot proceed.") + + try: + defer_by = timedelta(seconds=job.retry_delay_seconds if is_retry and job.retry_delay_seconds else 0) + arq_success = await self.redis.enqueue_job( + job.job_function, job.id, _defer_by=defer_by, _job_id=arq_job_id(job) + ) + except Exception as e: + logger.debug(f"ARQ enqueue operation failed for job {job.urn}: {e}") + raise PipelineCoordinationError(f"Failed to enqueue job in ARQ: {e}") + + if arq_success: + logger.info(f"{'Retried' if is_retry else 'Enqueued'} job {job.urn} in ARQ") + else: + logger.info(f"Job {job.urn} has already been enqueued in ARQ") diff --git a/src/mavedb/worker/lib/managers/py.typed b/src/mavedb/worker/lib/managers/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py new file mode 100644 index 000000000..4b6d09a8f --- /dev/null +++ b/src/mavedb/worker/lib/managers/types.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from typing import TypedDict + + +class RetryHistoryEntry(TypedDict): + attempt: int + timestamp: str + status: str # JobStatus.value from the failed attempt + error_message: str # Brief summary of the error + reason: str # Why the retry was triggered + + +class PipelineProgress(TypedDict): + total_jobs: int + completed_jobs: int + successful_jobs: int + failed_jobs: int + running_jobs: int + pending_jobs: int + completion_percentage: float + duration: int # seconds + status_counts: dict diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py new file mode 100644 index 000000000..d6a266647 --- /dev/null +++ b/src/mavedb/worker/lib/managers/utils.py @@ -0,0 +1,157 @@ +"""Utility functions for job and pipeline management. + +This module provides helper functions for common operations in job and pipeline +management, such as creating standardized result structures, data formatting, and +dependency checking. +""" + +import logging +from datetime import datetime +from typing import Literal, Optional, Union + +import redis.exceptions +import requests.exceptions +import sqlalchemy.exc + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES + +logger = logging.getLogger(__name__) + + +# Exception-to-failure-category mapping for automatic classification of unhandled exceptions. +# Job authors can always pass an explicit category on the outcome for domain-specific failures. +# This mapping only covers infrastructure-level exceptions that the decorator can reasonably classify. +# +# Order matters: classify_exception() returns on the first isinstance() match, so more +# specific types must appear before their parents (e.g. requests.Timeout before OSError). +EXCEPTION_TO_FAILURE_CATEGORY: dict[type[Exception], FailureCategory] = { + # requests — all inherit from OSError, so these must come first to get precise categories. + requests.exceptions.Timeout: FailureCategory.TIMEOUT, + requests.exceptions.ConnectionError: FailureCategory.NETWORK_ERROR, + # SQLAlchemy — independent hierarchy, not caught by builtins. + sqlalchemy.exc.OperationalError: FailureCategory.NETWORK_ERROR, + sqlalchemy.exc.DisconnectionError: FailureCategory.NETWORK_ERROR, + sqlalchemy.exc.InterfaceError: FailureCategory.NETWORK_ERROR, + # Redis — independent hierarchy (redis.ConnectionError != builtins.ConnectionError). + redis.exceptions.TimeoutError: FailureCategory.TIMEOUT, + redis.exceptions.ConnectionError: FailureCategory.NETWORK_ERROR, + # Builtins — catch-all for anything not matched above (e.g. raw socket errors). + ConnectionError: FailureCategory.NETWORK_ERROR, + TimeoutError: FailureCategory.TIMEOUT, + OSError: FailureCategory.NETWORK_ERROR, +} + + +def classify_exception(exc: Exception) -> FailureCategory: + """Map an exception to a FailureCategory. Uses isinstance to match parent classes.""" + for exc_type, category in EXCEPTION_TO_FAILURE_CATEGORY.items(): + if isinstance(exc, exc_type): + return category + return FailureCategory.UNKNOWN + + +def arq_job_id(job: JobRun) -> str: + """Compute the ARQ job id for the current attempt of a JobRun. + + ARQ uses the job id as a Redis key (``arq:job:`` while queued, ``arq:in-progress:`` while running, + ``arq:result:`` after completion). Because those keys also act as a deduplication check at enqueue + time, reusing the same id across retries is unsafe: the in-flight attempt's teardown can clobber + or be blocked by the next attempt. Embedding ``retry_count`` guarantees each attempt occupies a disjoint key + namespace while staying deterministic — any caller that holds the JobRun can recompute the id. + """ + return f"{job.urn}#{job.retry_count or 0}" + + +def construct_bulk_cancellation_result(reason: str) -> JobExecutionOutcome: + """Construct a standardized JobExecutionOutcome for bulk job cancellations. + + Args: + reason: Human-readable reason for the cancellation + + Returns: + JobExecutionOutcome with cancellation metadata + """ + return JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={ + "reason": reason, + "timestamp": datetime.now().isoformat(), + }, + error=reason, + exception=None, + ) + + +def job_dependency_is_met(dependency_type: Optional[DependencyType], dependent_job_status: JobStatus) -> bool: + """Check if a job dependency is met based on the dependency type and the status of the dependent job. + + Args: + dependency_type: Type of dependency ('hard' or 'soft') + dependent_job_status: Status of the dependent job + + Returns: + bool: True if the dependency is met, False otherwise + + Notes: + - For 'hard' dependencies, the dependent job must have succeeded. + - For 'soft' dependencies, the dependent job must be in a terminal state. + - If no dependency type is specified, the dependency is considered met. + """ + if not dependency_type: + logger.debug("No dependency type specified; assuming dependency is met.") + return True + + if dependency_type == DependencyType.SUCCESS_REQUIRED: + if dependent_job_status != JobStatus.SUCCEEDED: + logger.debug(f"Dependency not met: dependent job did not succeed ({dependent_job_status}).") + return False + + if dependency_type == DependencyType.COMPLETION_REQUIRED: + if dependent_job_status not in COMPLETED_JOB_STATUSES: + logger.debug( + f"Dependency not met: dependent job has not reached a completed status ({dependent_job_status})." + ) + return False + + return True + + +def job_should_be_skipped_due_to_unfulfillable_dependency( + dependency_type: Optional[DependencyType], dependent_job_status: JobStatus +) -> Union[tuple[Literal[False], None], tuple[Literal[True], str]]: + """Determine if a job should be skipped due to an unfulfillable dependency. + + Args: + dependency_type: Type of dependency ('hard' or 'soft') + dependent_job_status: Status of the dependent job + + Returns: + Union[tuple[Literal[False], None], tuple[Literal[True], str]]: Tuple indicating + if the job should be skipped and the reason + + Notes: + - A job should be skipped if it has a 'hard' dependency and the dependent job did not succeed. + """ + + # If dependency must have SUCCEEDED but is in a terminal non-success state, skip. + if dependency_type == DependencyType.SUCCESS_REQUIRED: + if dependent_job_status in (JobStatus.FAILED, JobStatus.ERRORED, JobStatus.SKIPPED, JobStatus.CANCELLED): + logger.debug( + f"Job should be skipped due to unfulfillable 'success_required' dependency " + f"({dependent_job_status})." + ) + return True, f"Dependency did not succeed ({dependent_job_status})" + + # If dependency requires 'completion' and you want CANCELLED to NOT qualify, skip here too. + if dependency_type == DependencyType.COMPLETION_REQUIRED: + if dependent_job_status in (JobStatus.CANCELLED, JobStatus.SKIPPED): + logger.debug( + f"Job should be skipped due to unfulfillable 'completion_required' dependency " + f"({dependent_job_status})." + ) + return True, f"Dependency was not completed successfully ({dependent_job_status})" + + return False, None diff --git a/src/mavedb/worker/lib/py.typed b/src/mavedb/worker/lib/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/pipeline_management.md b/src/mavedb/worker/pipeline_management.md new file mode 100644 index 000000000..730befba1 --- /dev/null +++ b/src/mavedb/worker/pipeline_management.md @@ -0,0 +1,256 @@ +# Pipeline Management + +Pipelines orchestrate multi-step workflows where jobs have dependency relationships. The system handles job ordering, status propagation, failure cascading, retries, pausing, and cancellation. + +## Pipeline Lifecycle + +``` + ┌──────────┐ + PipelineFactory ──► │ CREATED │ + └────┬─────┘ + │ start_pipeline job runs + ┌────▼─────┐ + ┌───────►│ RUNNING │◄───────┐ + │ └──┬───┬───┘ │ + │ │ │ │ + unpause │ │ coordinate_pipeline() + │ │ │ enqueues ready jobs + │ │ │ │ + ┌──────┴──┐ │ │ │ + │ PAUSED │◄───────┘ │ │ + └─────────┘ pause │ + │ + ┌────────────────┼────────────────┐ + │ │ │ + ┌─────▼─────┐ ┌──────▼──────┐ ┌──────▼──────┐ + │ SUCCEEDED │ │ FAILED │ │ PARTIAL │ + │ (all ok) │ │ (any error) │ │ (mixed) │ + └────────────┘ └─────────────┘ └─────────────┘ + + ┌─────────────┐ + cancel_pipeline │ CANCELLED │ + ──────────────► │ + └─────────────┘ +``` + +## Defining a New Pipeline + +Pipelines are declared in `src/mavedb/lib/workflow/definitions.py` as entries in `PIPELINE_DEFINITIONS`: + +```python +PIPELINE_DEFINITIONS: dict[str, PipelineDefinition] = { + "my_new_pipeline": { + "description": "Human-readable description of what this pipeline does", + "job_definitions": [ + { + "key": "first_job", # Unique key within pipeline + "function": "first_job_function_name", # Must match registered function name + "type": JobType.VARIANT_CREATION, # Job category enum + "params": { + "score_set_id": None, # None = filled at runtime from pipeline_params + "correlation_id": None, + }, + "dependencies": [], # No dependencies = runs first + }, + { + "key": "second_job", + "function": "second_job_function_name", + "type": JobType.VARIANT_MAPPING, + "params": { + "score_set_id": None, + "correlation_id": None, + }, + "dependencies": [ + ("first_job", DependencyType.SUCCESS_REQUIRED), # Runs only after first_job succeeds + ], + }, + { + "key": "optional_annotation", + "function": "annotate_function_name", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "score_set_id": None, + "correlation_id": None, + }, + "dependencies": [ + ("second_job", DependencyType.COMPLETION_REQUIRED), # Runs even if second_job fails + ], + }, + ], + }, +} +``` + +### Key rules for pipeline definitions + +- **`key`** must be unique within the pipeline. By convention, use the function name. For repeated functions (e.g., `refresh_clinvar_controls` for different date ranges), add a suffix: `refresh_clinvar_controls_202501`. +- **`function`** must match a registered function name in `BACKGROUND_FUNCTIONS`. +- **`params`** values of `None` are populated at runtime from `pipeline_params`. Values with actual data (e.g., `"year": 2025`) are used as-is. +- **`dependencies`** reference other jobs by their `key`. Use `SUCCESS_REQUIRED` when the dependent job cannot proceed without the prerequisite's output. Use `COMPLETION_REQUIRED` when the dependent job should run regardless of whether the prerequisite succeeded or failed. + +## How Pipelines Are Created and Triggered + +### From a Router Endpoint + +```python +# In src/mavedb/routers/score_sets.py +pipeline_factory = PipelineFactory(session=db) +pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( + pipeline_name="validate_map_annotate_score_set", + creating_user=user_data.user, + pipeline_params={ + "correlation_id": correlation_id_for_context(), + "score_set_id": item.id, + "updater_id": user_data.user.id, + "scores_file_key": scores_file_key, + "counts_file_key": counts_file_key, + "score_columns_metadata": {...}, + "count_columns_metadata": {...}, + }, +) + +# Enqueue only the start_pipeline entrypoint — coordination handles the rest +job = await worker.enqueue_job( + pipeline_entrypoint.job_function, + pipeline_entrypoint.id, + _job_id=arq_job_id(pipeline_entrypoint.urn), +) +``` + +### What PipelineFactory.create_pipeline() Does + +1. Looks up `PIPELINE_DEFINITIONS[pipeline_name]` +2. Creates a `Pipeline` record (status=CREATED) +3. Creates a `start_pipeline` `JobRun` as the pipeline entrypoint +4. For each `JobDefinition` in the pipeline: creates a `JobRun` with params merged from `pipeline_params` +5. For each dependency: creates a `JobDependency` record +6. Commits everything and returns `(pipeline, start_pipeline_job_run)` + +### From a Script + +```python +# In src/mavedb/scripts/run_pipeline.py +pipeline_factory = PipelineFactory(session=db) +pipeline, entrypoint = pipeline_factory.create_pipeline( + pipeline_name="validate_map_annotate_score_set", + creating_user=user, + pipeline_params={...}, +) +``` + +## Coordination Loop + +The `PipelineManager.coordinate_pipeline()` method is the heart of pipeline orchestration. It runs after every job completes (called by the `@with_pipeline_management` decorator): + +```python +async def coordinate_pipeline(self): + # 1. Evaluate pipeline status from job states + new_status = self.transition_pipeline_status() + self.db.flush() + + # 2. If pipeline failed/cancelled → cancel remaining jobs + if new_status in CANCELLED_PIPELINE_STATUSES: + self.cancel_remaining_jobs(reason="Pipeline failed or cancelled") + + # 3. If pipeline still running → find and enqueue ready jobs + if new_status in RUNNING_PIPELINE_STATUSES: + await self.enqueue_ready_jobs() + + # 4. Re-evaluate status (some jobs may have been skipped due to unfulfillable deps) + self.transition_pipeline_status() + self.db.flush() +``` + +### How `transition_pipeline_status()` Determines Status + +The method counts jobs by status and applies these rules in order: + +| Condition | New Pipeline Status | +|-----------|-------------------| +| Any job `ERRORED` | `FAILED` | +| Any **non-leaf** job `FAILED` (other jobs depend on it) | `FAILED` | +| Only **leaf** jobs `FAILED` (nothing depends on them), siblings still active | `RUNNING` | +| Only leaf jobs `FAILED`, all jobs terminal, at least one `SUCCEEDED` | `PARTIAL` | +| Only leaf jobs `FAILED`, all jobs terminal, none `SUCCEEDED` | `CANCELLED` | +| Any job `RUNNING` or `QUEUED` (no errors or non-leaf failures) | `RUNNING` | +| Any job `PENDING` | No change (waiting for coordination) | +| All jobs `SUCCEEDED` | `SUCCEEDED` | +| Mix of `SUCCEEDED` + `FAILED`(leaf)/`SKIPPED`/`CANCELLED` | `PARTIAL` | +| All remaining jobs `CANCELLED` or `SKIPPED` | `CANCELLED` | + +**Leaf vs non-leaf**: A job is a *leaf* if no other job in the pipeline depends on it. Leaf failures do not propagate — sibling jobs continue running and the pipeline settles to `PARTIAL` rather than `FAILED`. Non-leaf failures (where downstream jobs cannot proceed) always fail the pipeline immediately. + +### How `enqueue_ready_jobs()` Works + +For each PENDING job in the pipeline: +1. Check if all dependencies are met (via `can_enqueue_job()`) +2. If met: mark as QUEUED via `JobManager.prepare_queue()` +3. If dependencies are unfulfillable (e.g., hard dependency on a failed job): mark as SKIPPED +4. **Commit** all status changes before the async enqueue loop (prevents PostgreSQL deadlocks) +5. Enqueue each QUEUED job in ARQ + +### Dependency Resolution + +A dependency is **met** when: +- `SUCCESS_REQUIRED`: prerequisite job status is `SUCCEEDED` +- `COMPLETION_REQUIRED`: prerequisite job is in any completed state (`SUCCEEDED`, `FAILED`, `ERRORED`) + +A dependency is **unfulfillable** when: +- `SUCCESS_REQUIRED`: prerequisite job is in a terminal non-success state (`FAILED`, `ERRORED`, `SKIPPED`, `CANCELLED`) + +When a dependency is unfulfillable, the dependent job is proactively **skipped** rather than left pending forever. + +## Pipeline Operations + +### Pause / Unpause + +```python +await pipeline_manager.pause_pipeline(reason="Maintenance window") +# Running jobs complete, but no new jobs are enqueued +# ...later... +await pipeline_manager.unpause_pipeline(reason="Maintenance complete") +# Resumes coordination, enqueues ready jobs +``` + +### Cancel + +```python +await pipeline_manager.cancel_pipeline(reason="User requested") +# Sets pipeline to CANCELLED, skips PENDING jobs, cancels QUEUED/RUNNING jobs +``` + +### Restart + +```python +await pipeline_manager.restart_pipeline() +# Resets ALL jobs to PENDING, resets pipeline to CREATED, then starts fresh +``` + +## Failure and Retry Behavior + +When a job fails: + +1. The `@with_job_management` decorator marks it as `FAILED` or `ERRORED` +2. It checks `should_retry()`: retry count < max and failure category is retryable +3. If retryable: `prepare_retry()` resets job to `PENDING` with incremented `retry_count` +4. The `@with_pipeline_management` decorator calls `coordinate_pipeline()` +5. Coordination finds the retried job (now PENDING) and re-enqueues it if dependencies are met +6. If not retryable and the job is a **non-leaf** (other jobs depend on it): job stays `FAILED`, coordination marks pipeline as `FAILED`, cancels remaining jobs +7. If not retryable and the job is a **leaf** (nothing depends on it): job stays `FAILED`, sibling jobs continue running, pipeline eventually settles to `PARTIAL` + +### Stalled Job Recovery + +The `cleanup_stalled_jobs` cron job (runs every 30 minutes) catches jobs stuck in intermediate states: + +| State | Timeout | Action | +|-------|---------|--------| +| `QUEUED` | 10 minutes | Fail → retry if eligible | +| `RUNNING` | 60 minutes | Fail → retry if eligible | +| `PENDING` (in pipeline) | 30 minutes | Fail → retry if eligible | + +## See Also + +- [Job System Overview](jobs_overview.md) — End-to-end flow diagrams +- [Job Decorators](job_decorators.md) — How decorators trigger coordination +- [Job Managers](job_managers.md) — JobManager and PipelineManager APIs +- [Job Registry](job_registry.md) — How to register pipeline definitions diff --git a/src/mavedb/worker/py.typed b/src/mavedb/worker/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/settings.py b/src/mavedb/worker/settings.py deleted file mode 100644 index 0a9359d53..000000000 --- a/src/mavedb/worker/settings.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -from concurrent import futures -from datetime import timedelta -from typing import Callable - -from arq.connections import RedisSettings -from arq.cron import CronJob, cron - -from mavedb.data_providers.services import cdot_rest -from mavedb.db.session import SessionLocal -from mavedb.lib.logging.canonical import log_job -from mavedb.worker.jobs import ( - create_variants_for_score_set, - map_variants_for_score_set, - variant_mapper_manager, - refresh_materialized_views, - refresh_published_variants_view, - submit_score_set_mappings_to_ldh, - link_clingen_variants, - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, - link_gnomad_variants, - submit_score_set_mappings_to_car, -) - -# ARQ requires at least one task on startup. -BACKGROUND_FUNCTIONS: list[Callable] = [ - create_variants_for_score_set, - variant_mapper_manager, - map_variants_for_score_set, - refresh_published_variants_view, - submit_score_set_mappings_to_ldh, - link_clingen_variants, - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, - link_gnomad_variants, - submit_score_set_mappings_to_car, -] -# In UTC time. Depending on daylight savings time, this will bounce around by an hour but should always be very early in the morning -# for all of the USA. -BACKGROUND_CRONJOBS: list[CronJob] = [ - cron( - refresh_materialized_views, - name="refresh_all_materialized_views", - hour=20, - minute=0, - keep_result=timedelta(minutes=2).total_seconds(), - ) -] - -REDIS_IP = os.getenv("REDIS_IP") or "localhost" -REDIS_PORT = int(os.getenv("REDIS_PORT") or 6379) -REDIS_SSL = (os.getenv("REDIS_SSL") or "false").lower() == "true" - - -RedisWorkerSettings = RedisSettings(host=REDIS_IP, port=REDIS_PORT, ssl=REDIS_SSL) - - -async def startup(ctx): - ctx["pool"] = futures.ProcessPoolExecutor() - - -async def shutdown(ctx): - pass - - -async def on_job_start(ctx): - db = SessionLocal() - db.current_user_id = None - ctx["db"] = db - ctx["hdp"] = cdot_rest() - ctx["state"] = {} - - -async def on_job_end(ctx): - db = ctx["db"] - db.close() - - -class ArqWorkerSettings: - """ - Settings for the ARQ worker. - """ - - on_startup = startup - on_shutdown = shutdown - on_job_start = on_job_start - on_job_end = on_job_end - after_job_end = log_job - redis_settings = RedisWorkerSettings - functions: list = BACKGROUND_FUNCTIONS - cron_jobs: list = BACKGROUND_CRONJOBS - - job_timeout = 5 * 60 * 60 # Keep jobs alive for a long while... diff --git a/src/mavedb/worker/settings/__init__.py b/src/mavedb/worker/settings/__init__.py new file mode 100644 index 000000000..af2e6a275 --- /dev/null +++ b/src/mavedb/worker/settings/__init__.py @@ -0,0 +1,19 @@ +"""Worker settings configuration. + +This module provides ARQ worker settings organized by concern: +- constants: Environment variable configuration +- redis: Redis connection settings +- lifecycle: Worker startup/shutdown hooks +- worker: Main ARQ worker configuration class + +The settings are designed to be modular and easily testable, +with clear separation between infrastructure and application concerns. +""" + +from .redis import RedisWorkerSettings +from .worker import ArqWorkerSettings + +__all__ = [ + "ArqWorkerSettings", + "RedisWorkerSettings", +] diff --git a/src/mavedb/worker/settings/constants.py b/src/mavedb/worker/settings/constants.py new file mode 100644 index 000000000..b5e8f23d1 --- /dev/null +++ b/src/mavedb/worker/settings/constants.py @@ -0,0 +1,12 @@ +"""Environment configuration constants for worker settings. + +This module centralizes all environment variable handling for the worker, +providing sensible defaults and type conversion for configuration values. +All worker-related environment variables should be defined here. +""" + +import os + +REDIS_IP = os.getenv("REDIS_IP") or "localhost" +REDIS_PORT = int(os.getenv("REDIS_PORT") or 6379) +REDIS_SSL = (os.getenv("REDIS_SSL") or "false").lower() == "true" diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py new file mode 100644 index 000000000..54a0b4c76 --- /dev/null +++ b/src/mavedb/worker/settings/lifecycle.py @@ -0,0 +1,44 @@ +"""Worker lifecycle management hooks. + +This module defines the startup, shutdown, and job lifecycle hooks +for the ARQ worker. These hooks manage: +- Process pool for CPU-intensive tasks +- HGVS data provider setup +- Job state initialization and cleanup +""" + +from concurrent import futures + +from mavedb.data_providers.services import cdot_rest + + +def standalone_ctx(): + """Create a standalone worker context dictionary.""" + ctx = {} + ctx["pool"] = futures.ProcessPoolExecutor() + ctx["redis"] = None # Redis connection can be set up here if needed. + ctx["hdp"] = cdot_rest() + ctx["state"] = {} + + # Additional context setup can be added here as needed. + # This function should not drift from the lifecycle hooks + # below and is useful for invoking worker jobs outside of ARQ. + + return ctx + + +async def startup(ctx): + ctx["pool"] = futures.ProcessPoolExecutor(max_workers=4) + + +async def shutdown(ctx): + pass + + +async def on_job_start(ctx): + ctx["hdp"] = cdot_rest() + ctx["state"] = {} + + +async def on_job_end(ctx): + pass diff --git a/src/mavedb/worker/settings/redis.py b/src/mavedb/worker/settings/redis.py new file mode 100644 index 000000000..2773f77f2 --- /dev/null +++ b/src/mavedb/worker/settings/redis.py @@ -0,0 +1,12 @@ +"""Redis connection settings for ARQ worker. + +This module provides Redis connection configuration using environment +variables with appropriate defaults. The settings are compatible with +ARQ's RedisSettings class and handle SSL connections. +""" + +from arq.connections import RedisSettings + +from mavedb.worker.settings.constants import REDIS_IP, REDIS_PORT, REDIS_SSL + +RedisWorkerSettings = RedisSettings(host=REDIS_IP, port=REDIS_PORT, ssl=REDIS_SSL) diff --git a/src/mavedb/worker/settings/worker.py b/src/mavedb/worker/settings/worker.py new file mode 100644 index 000000000..e84b68c50 --- /dev/null +++ b/src/mavedb/worker/settings/worker.py @@ -0,0 +1,46 @@ +"""Main ARQ worker configuration class. + +This module defines the primary ArqWorkerSettings class that brings together +all worker configuration including: +- Job functions and cron jobs from the jobs registry +- Redis connection settings +- Lifecycle hooks for startup/shutdown and job execution +- Timeout and logging configuration + +This is the main configuration class used to start the ARQ worker. +""" + +from mavedb.lib.logging.canonical import log_job +from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS +from mavedb.worker.settings.lifecycle import on_job_end, on_job_start, shutdown, startup +from mavedb.worker.settings.redis import RedisWorkerSettings + +# Limit concurrency to prevent event loop starvation from sync psycopg2 DB +# operations. With the default max_jobs=10, multiple jobs issuing blocking DB +# calls simultaneously can starve the event loop and cause apparent hangs. +# 2 jobs still compete, but the practical impact is much less severe. +# +# TODO#715 Migrate to psycopg3 async driver to safely increase concurrency. +# psycopg3 supports both sync (API) and async (worker) modes on the same +# driver, enabling incremental migration of job functions without touching +# the FastAPI layer. Once all jobs use async sessions, raise MAX_JOBS to 10+. +MAX_JOBS = 2 +JOB_TIMEOUT_SECONDS = 2 * 60 * 60 # 2 hours — matches RUNNING_TIMEOUT_MINUTES (90 min) with buffer + + +class ArqWorkerSettings: + """ + Settings for the ARQ worker. + """ + + on_startup = startup + on_shutdown = shutdown + on_job_start = on_job_start + on_job_end = on_job_end + after_job_end = log_job + redis_settings = RedisWorkerSettings + functions: list = BACKGROUND_FUNCTIONS + cron_jobs: list = BACKGROUND_CRONJOBS + + max_jobs = MAX_JOBS + job_timeout = JOB_TIMEOUT_SECONDS diff --git a/tests/conftest.py b/tests/conftest.py index 33e709e95..34e366392 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,15 +1,22 @@ import logging # noqa: F401 +import os import sys +from contextlib import contextmanager from datetime import datetime from unittest import mock import email_validator import pytest import pytest_postgresql -from sqlalchemy import create_engine +import pytest_socket +from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool +# Set cache backend to memory for all tests BEFORE any mavedb modules are imported +# This ensures ClinGen API caching uses in-memory cache instead of Redis during tests +os.environ.setdefault("CLINGEN_CACHE_BACKEND", "memory") + from mavedb.db.base import Base from mavedb.models import * # noqa: F403 from mavedb.models.experiment import Experiment @@ -57,6 +64,21 @@ email_validator.TEST_ENVIRONMENT = True +def pytest_runtest_setup(item): + # Only block sockets for tests not marked with 'network' + if "network" not in item.keywords: + try: + pytest_socket.socket_allow_hosts(["localhost", "127.0.0.1", "::1"], allow_unix_socket=True) + except ImportError: + pass + + else: + try: + pytest_socket.enable_socket() + except ImportError: + pass + + @pytest.fixture() def session(postgresql): # Un-comment this line to log all database queries: @@ -72,6 +94,15 @@ def session(postgresql): Base.metadata.create_all(bind=engine) + # Create a unique index for the published_variants_materialized_view to + # enforce uniqueness on (variant_id, mapped_variant_id, score_set_id). This + # allows us to test mat view refreshes that require this constraint. + session.execute( + text("""CREATE UNIQUE INDEX IF NOT EXISTS published_variants_mv_unique_idx + ON published_variants_materialized_view (variant_id, mapped_variant_id, score_set_id)"""), + ) + session.commit() + try: yield session finally: @@ -79,6 +110,36 @@ def session(postgresql): Base.metadata.drop_all(bind=engine) +@pytest.fixture +def db_session_fixture(session): + @contextmanager + def _db_session_cm(): + yield session + + return _db_session_cm + + +# ALL locations which use the db_session fixture need to be patched to use +# the test version. +@pytest.fixture +def patch_db_session_ctxmgr(db_session_fixture): + """Patches all known locations of the db_session fixture to use the test version. + + To use this fixture, add it to the pytestmark list of a test module: + pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + If you see an error about a test being unable to connect to the database, you + likely need to add another patch here for the module that is trying to use + db_session or include the above mark in your test module. + """ + with ( + mock.patch("mavedb.db.session.db_session", db_session_fixture), + mock.patch("mavedb.worker.lib.decorators.utils.db_session", db_session_fixture), + # Add other modules that use db_session here as needed + ): + yield + + @pytest.fixture def setup_lib_db(session): """ @@ -336,3 +397,13 @@ def test_needing_publication_identifier_mock(mock_publication_fetch, ...): mocked_publications.append(publication_to_mock) # Return a single dict (original behavior) if only one was provided; otherwise the list. return mocked_publications[0] if len(mocked_publications) == 1 else mocked_publications + + +# Automatically set MAVEDB_TEST_MODE=1 for unit tests, unset for integration tests. +@pytest.fixture(autouse=True) +def set_mavedb_test_mode_flag(request): + # If 'unit' marker is present, set the flag; otherwise, unset it. + if request.node.get_closest_marker("unit"): + os.environ["MAVEDB_TEST_MODE"] = "1" + else: + os.environ.pop("MAVEDB_TEST_MODE", None) diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index a07607a71..bb61f3a3b 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -13,15 +13,18 @@ from biocommons.seqrepo import SeqRepo from fastapi.testclient import TestClient from httpx import AsyncClient +from sqlalchemy import Column, Float, Integer, MetaData, String, Table +from mavedb.db.session import create_engine, sessionmaker from mavedb.deps import get_db, get_seqrepo, get_worker, hgvs_data_provider -from mavedb.lib.authentication import get_current_user +from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.authorization import require_current_user -from mavedb.lib.types.authentication import UserData +from mavedb.lib.gnomad import gnomad_table_name +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.user import User from mavedb.server_main import app -from mavedb.worker.settings import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS -from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER +from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS +from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER, VALID_CAID #################################################################################################### # REDIS @@ -78,6 +81,10 @@ def some_test(client, arq_redis): await redis_.aclose(close_connection_pool=True) +async def dummy_arq_function(ctx, *args, **kwargs) -> JobExecutionOutcome: + return JobExecutionOutcome.succeeded() + + @pytest_asyncio.fixture() async def arq_worker(data_provider, session, arq_redis): """ @@ -87,7 +94,7 @@ async def arq_worker(data_provider, session, arq_redis): ``` async def worker_test(arq_redis, arq_worker): - await arq_redis.enqueue_job('some_job') + await arq_redis.enqueue_job('dummy_arq_function') await arq_worker.async_run() await arq_worker.run_check() ``` @@ -103,7 +110,7 @@ async def on_job(ctx): ctx["pool"] = futures.ProcessPoolExecutor() worker_ = Worker( - functions=BACKGROUND_FUNCTIONS, + functions=BACKGROUND_FUNCTIONS + [dummy_arq_function], cron_jobs=BACKGROUND_CRONJOBS, redis_pool=arq_redis, burst=True, @@ -120,9 +127,8 @@ async def on_job(ctx): @pytest.fixture -def standalone_worker_context(session, data_provider, arq_redis): +def standalone_worker_context(data_provider, arq_redis): yield { - "db": session, "hdp": data_provider, "state": {}, "job_id": "test_job", @@ -401,3 +407,58 @@ def client(app_): async def async_client(app_): async with AsyncClient(app=app_, base_url="http://testserver") as ac: yield ac + + +##################################################################################################### +# Athena +##################################################################################################### + + +@pytest.fixture +def athena_engine(): + """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" + engine = create_engine("sqlite:///:memory:") + metadata = MetaData() + + # TODO: Define your table schema here + my_table = Table( + gnomad_table_name(), + metadata, + Column("id", Integer, primary_key=True), + Column("locus.contig", String), + Column("locus.position", Integer), + Column("alleles", String), + Column("caid", String), + Column("joint.freq.all.ac", Integer), + Column("joint.freq.all.an", Integer), + Column("joint.fafmax.faf95_max_gen_anc", String), + Column("joint.fafmax.faf95_max", Float), + ) + metadata.create_all(engine) + + session = sessionmaker(autocommit=False, autoflush=False, bind=engine)() + + # Insert test data + session.execute( + my_table.insert(), + [ + { + "id": 1, + "locus.contig": "chr1", + "locus.position": 12345, + "alleles": "[G, A]", + "caid": VALID_CAID, + "joint.freq.all.ac": 23, + "joint.freq.all.an": 32432423, + "joint.fafmax.faf95_max_gen_anc": "anc1", + "joint.fafmax.faf95_max": 0.000006763700000000002, + } + ], + ) + session.commit() + session.close() + + try: + yield engine + finally: + engine.dispose() diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index e06d07a12..531393afc 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -44,6 +44,7 @@ VALID_PRO_ACCESSION = "NP_001637.4" VALID_GENE = "BRCA1" VALID_UNIPROT_ACCESSION = "P05067" +VALID_CAID = "CA9765210" VALID_ENSEMBL_IDENTIFIER = "ENST00000530893.6" @@ -1266,52 +1267,35 @@ }, } -TEST_CODING_LAYER = { +TEST_PROTEIN_LAYER = { + "computed_reference_sequence": { + "sequence_type": "protein", + "sequence_id": "ga4gh:SQ.ref_protein_test", + "sequence": "MKTIIALSYIFCLVFADYKDDDDK", + }, "mapped_reference_sequence": { - "sequence_accessions": [VALID_NT_ACCESSION], + "sequence_type": "protein", + "sequence_id": "ga4gh:SQ.map_protein_test", + "sequence_accessions": [VALID_PRO_ACCESSION], }, } -TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD = { - "metadata": {}, - "reference_sequences": { - "TEST1": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - } +TEST_CODING_LAYER = { + "computed_reference_sequence": { + "sequence_type": "coding", + "sequence_id": "ga4gh:SQ.ref_coding_test", + "sequence": "ATGAAGACGATTATTGCTCTTATCTTTCCTCTTTTGCTGATATACGACGACGACAAA", }, - "mapped_scores": [], - "vrs_version": "2.0", - "dcd_mapping_version": "pytest.0.0", - "mapped_date_utc": datetime.isoformat(datetime.now()), -} - -TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD = { - "metadata": {}, - "reference_sequences": { - "TEST2": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - } + "mapped_reference_sequence": { + "sequence_type": "coding", + "sequence_id": "ga4gh:SQ.map_coding_test", + "sequence_accessions": [VALID_NT_ACCESSION], }, - "mapped_scores": [], - "vrs_version": "2.0", - "dcd_mapping_version": "pytest.0.0", - "mapped_date_utc": datetime.isoformat(datetime.now()), } -TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD = { +TEST_MAPPING_SCAFFOLD = { "metadata": {}, - "reference_sequences": { - "TEST3": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - }, - "TEST4": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - }, - }, + "reference_sequences": {}, "mapped_scores": [], "vrs_version": "2.0", "dcd_mapping_version": "pytest.0.0", diff --git a/tests/helpers/transaction_spy.py b/tests/helpers/transaction_spy.py new file mode 100644 index 000000000..4381aa75f --- /dev/null +++ b/tests/helpers/transaction_spy.py @@ -0,0 +1,222 @@ +from contextlib import contextmanager +from typing import Generator, TypedDict, Union +from unittest.mock import AsyncMock, MagicMock, patch + +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from tests.helpers.util.common import create_failing_side_effect + + +class TransactionSpy: + """Factory for creating database transaction spy context managers.""" + + class Spies(TypedDict): + flush: Union[MagicMock, AsyncMock] + rollback: Union[MagicMock, AsyncMock] + commit: Union[MagicMock, AsyncMock] + + class SpiesWithException(Spies): + exception: Exception + + @staticmethod + @contextmanager + def spy( + session: Session, + expect_rollback: bool = False, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[Spies, None, None]: + """ + Create spies for database transaction methods. + + Args: + session: Database session to spy on + expect_rollback: Whether to assert db.rollback to be called + expect_flush: Whether to assert db.flush to be called + expect_commit: Whether to assert db.commit to be called + + Yields: + dict: Dictionary containing all the spies for granular assertion + + Note: + Use caution when combining expectations. For example, if expect_commit + is True, you may wish to set expect_flush to True as well, since commit + typically implies a flush operation within SQLAlchemy internals. + + Example: + ``` + with TransactionSpy.spy(session, expect_rollback=True) as spies: + # perform operation + ... + + # Make manual granular assertions on spies if desired + spies['rollback'].assert_called_once() + + # if assert_XXX=True is set, automatic assertions will be made at context exit. + # In this example, expect_rollback=True will ensure rollback was called at some point. + ``` + """ + with ( + patch.object(session, "rollback", wraps=session.rollback) as rollback_spy, + patch.object(session, "flush", wraps=session.flush) as flush_spy, + patch.object(session, "commit", wraps=session.commit) as commit_spy, + ): + spies: TransactionSpy.Spies = { + "flush": flush_spy, + "rollback": rollback_spy, + "commit": commit_spy, + } + + yield spies + + # Automatic assertions based on session expectations. + if expect_flush: + flush_spy.assert_called() + else: + flush_spy.assert_not_called() + if expect_rollback: + rollback_spy.assert_called() + else: + rollback_spy.assert_not_called() + if expect_commit: + commit_spy.assert_called() + else: + commit_spy.assert_not_called() + + @staticmethod + @contextmanager + def mock_database_execution_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = False, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks database execution failures with transaction spies. This context + will automatically assert calls to rollback, flush, and commit based on the provided expectations + which all default to False. + + Args: + session: Database session to mock + exception: Exception to raise (defaults to SQLAlchemyError) + fail_on_call: Which call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to False) + expect_flush: Whether to assert flush called (defaults to False) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception that will be raised + """ + exception = exception or SQLAlchemyError("DB Error") + + with ( + patch.object( + session, + "execute", + side_effect=create_failing_side_effect(exception, session.execute, fail_on_call), + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies + + @staticmethod + @contextmanager + def mock_database_flush_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = True, + expect_flush: bool = True, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks flush failures specifically. This context will automatically + assert that rollback and flush are called, and that commit is not called. These automatic + assertions can be overridden via the expect_XXX parameters. + + Args: + session: Database session to mock + exception: Exception to raise on flush (defaults to SQLAlchemyError) + fail_on_call: Which flush call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to True) + expect_flush: Whether to assert flush called (defaults to True) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception + """ + exception = exception or SQLAlchemyError("Flush Error") + + with ( + patch.object( + session, "flush", side_effect=create_failing_side_effect(exception, session.flush, fail_on_call) + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies + + @staticmethod + @contextmanager + def mock_database_rollback_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = True, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks rollback failures specifically. This context will automatically + assert that rollback is called, flush is not called, and commit is not called. These automatic + assertions can be overridden via the expect_XXX parameters. + + Args: + session: Database session to mock + exception: Exception to raise on rollback (defaults to SQLAlchemyError) + fail_on_call: Which rollback call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to True) + expect_flush: Whether to assert flush called (defaults to False) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception + """ + exception = exception or SQLAlchemyError("Rollback Error") + + with ( + patch.object( + session, "rollback", side_effect=create_failing_side_effect(exception, session.rollback, fail_on_call) + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies diff --git a/tests/helpers/util/common.py b/tests/helpers/util/common.py index 407cf101e..0acf2c1e0 100644 --- a/tests/helpers/util/common.py +++ b/tests/helpers/util/common.py @@ -56,3 +56,34 @@ def deepcamelize(data: Any) -> Any: return [deepcamelize(item) for item in data] else: return data + + +def create_failing_side_effect(exception, original_method, fail_on_call=1): + """ + Create a side effect function that fails on a specific call number, then delegates to original method. + + Args: + exception: The exception to raise on the failing call + original_method: The original method to delegate to after the failure + fail_on_call: Which call number should fail (1-indexed, defaults to first call) + + Returns: + A callable that can be used as a side_effect in mock.patch + + Example: + with patch.object(session, "execute", side_effect=create_failing_side_effect( + SQLAlchemyError("DB Error"), session.execute + )): + # First call will raise SQLAlchemyError, subsequent calls work normally + pass + """ + call_count = 0 + + def side_effect_function(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == fail_on_call: + raise exception + return original_method(*args, **kwargs) + + return side_effect_function diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py new file mode 100644 index 000000000..a9c4efa38 --- /dev/null +++ b/tests/helpers/util/setup/worker.py @@ -0,0 +1,146 @@ +from asyncio.unix_events import _UnixSelectorEventLoop +from copy import deepcopy +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + create_variants_for_score_set, + map_variants_for_score_set, +) +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import ( + TEST_CODING_LAYER, + TEST_GENE_INFO, + TEST_GENOMIC_LAYER, + TEST_MAPPING_SCAFFOLD, + TEST_PROTEIN_LAYER, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, +) + + +async def create_variants_in_score_set( + session, mock_s3_client, score_df, count_df, mock_worker_ctx, variant_creation_run +): + """Add variants to a given score set in the database.""" + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[score_df, count_df], + ), + ): + # Guard against both possible function signatures, with some uses of this function coming from + # integration tests that need not pass a JobManager. + try: + result = await create_variants_for_score_set( + mock_worker_ctx, + variant_creation_run.id, + ) + except TypeError: + result = await create_variants_for_score_set( + mock_worker_ctx, + variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], variant_creation_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + session.commit() + + +async def create_mappings_in_score_set( + session, mock_s3_client, mock_worker_ctx, score_df, count_df, variant_creation_run, variant_mapping_run +): + await create_variants_in_score_set( + session, mock_s3_client, score_df, count_df, mock_worker_ctx, variant_creation_run + ) + + score_set = session.execute( + select(ScoreSetDbModel).where(ScoreSetDbModel.id == variant_creation_run.job_params["score_set_id"]) + ).scalar_one() + + async def dummy_mapping_job(): + return await construct_mock_mapping_output(session, score_set, with_layers={"g", "c", "p"}) + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Guard against both possible function signatures, with some uses of this function coming from + # integration tests that need not pass a JobManager. + try: + result = await map_variants_for_score_set(mock_worker_ctx, variant_mapping_run.id) + except TypeError: + result = await map_variants_for_score_set( + mock_worker_ctx, + variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], variant_mapping_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + session.commit() + + +async def construct_mock_mapping_output( + session, + score_set, + with_layers, + with_gene_info=True, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, +): + """Construct mapping output for a given score set in the database.""" + mapping_output = deepcopy(TEST_MAPPING_SCAFFOLD) + + if with_reference_metadata: + for target in score_set.target_genes: + mapping_output["reference_sequences"][target.name] = { + "gene_info": TEST_GENE_INFO if with_gene_info else {}, + } + + for target in score_set.target_genes: + mapping_output["reference_sequences"][target.name]["layers"] = {} + if "g" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["g"] = TEST_GENOMIC_LAYER + if "c" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["c"] = TEST_CODING_LAYER + if "p" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["p"] = TEST_PROTEIN_LAYER + + if with_mapped_scores: + variants = session.scalars( + select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + + for idx, variant in enumerate(variants): + mapped_score = { + "pre_mapped": deepcopy(TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X) if with_pre_mapped else {}, + "post_mapped": deepcopy(TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X) if with_post_mapped else {}, + "mavedb_id": variant.urn, + } + + # Don't alter HGVS strings in post mapped output. This makes it considerably + # easier to assert correctness in tests. + if with_post_mapped: + mapped_score["post_mapped"]["expressions"][0]["value"] = variant.hgvs_nt or variant.hgvs_pro + + # Skip every other variant if not with_all_variants + if not with_all_variants and idx % 2 == 0: + mapped_score["post_mapped"] = {} + + mapping_output["mapped_scores"].append(mapped_score) + + if not mapping_output["mapped_scores"]: + mapping_output["error_message"] = "test error: no mapped scores" + + return mapping_output diff --git a/tests/helpers/util/variant.py b/tests/helpers/util/variant.py index 5fcc05db2..eede1e610 100644 --- a/tests/helpers/util/variant.py +++ b/tests/helpers/util/variant.py @@ -36,7 +36,11 @@ def mock_worker_variant_insertion( with ( open(scores_csv_path, "rb") as score_file, patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue, + patch("mavedb.routers.score_sets.s3_client") as mock_s3_client, ): + mock_s3 = mock_s3_client.return_value + mock_s3.upload_fileobj.return_value = None # or whatever you want + files = {"scores_file": (scores_csv_path.name, score_file, "rb")} if counts_csv_path is not None: @@ -69,6 +73,7 @@ def mock_worker_variant_insertion( # Assert we have mocked a job being added to the queue, and that the request succeeded. The # response value here isn't important- we will add variants to the score set manually. + mock_s3.upload_fileobj.assert_called() worker_queue.assert_called_once() assert response.status_code == 200 diff --git a/tests/lib/clingen/network/test_allele_registry.py b/tests/lib/clingen/network/test_allele_registry.py new file mode 100644 index 000000000..7c4bbfa6f --- /dev/null +++ b/tests/lib/clingen/network/test_allele_registry.py @@ -0,0 +1,92 @@ +# ruff: noqa: E402 +"""Tests for ClinGen Allele Registry API functions.""" + +import pytest + +pytest.importorskip("aiocache", reason="aiocache is required for tests of allele registry functions") + +import requests + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) + + +@pytest.mark.network +class TestGetCanonicalPaIdsNetwork: + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_known_caid(self): + # Using a known ClinGen Allele ID with MANE transcripts + clingen_allele_id = "CA321211" # Example ClinGen Allele ID + result = await get_canonical_pa_ids(clingen_allele_id) + assert isinstance(result, list) + assert result == ["PA2573050890", "PA321212"] # Expected MANE PA ID + + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_known_no_mane(self): + # Using a ClinGen Allele ID for protein change, as this will not have mane transcripts + clingen_allele_id = "PA102264" # Example ClinGen Allele ID with no MANE + result = await get_canonical_pa_ids(clingen_allele_id) + assert result == [] + + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_invalid_id(self): + # Using an invalid ClinGen Allele ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs + + clingen_allele_id = "INVALID_ID" + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_canonical_pa_ids(clingen_allele_id) + + +@pytest.mark.network +class TestGetMatchingRegisteredCaIdsNetwork: + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_known_paid(self): + # Using a known ClinGen PA ID with registered CA IDs + clingen_pa_id = "PA2573050890" # Example ClinGen PA ID + result = await get_matching_registered_ca_ids(clingen_pa_id) + assert isinstance(result, list) + assert "CA321211" in result # Expected registered CA ID + + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_known_no_caids(self): + # Using a ClinGen PA ID with no registered CA IDs + clingen_pa_id = "PA3051398879" # Example ClinGen PA ID with no registered CA IDs + result = await get_matching_registered_ca_ids(clingen_pa_id) + assert result == [] + + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_invalid_id(self): + # Using an invalid ClinGen PA ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs + clingen_pa_id = "INVALID_ID" + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_matching_registered_ca_ids(clingen_pa_id) + + +@pytest.mark.network +class TestGetAssociatedClinvarAlleleIdNetwork: + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_known_caid(self): + # Using a known ClinGen Allele ID with associated ClinVar Allele ID + clingen_allele_id = "CA321211" # Example ClinGen Allele ID + result = await get_associated_clinvar_allele_id(clingen_allele_id) + assert result == "211565" # Expected ClinVar Allele ID + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_association(self): + # Using a ClinGen Allele ID with no associated ClinVar Allele ID + clingen_allele_id = "CA9532274" # Example ClinGen Allele ID with no association + result = await get_associated_clinvar_allele_id(clingen_allele_id) + assert result == "" # Empty string indicates no ClinVar association (cached result) + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_invalid_id(self): + # Using an invalid ClinGen Allele ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs + clingen_allele_id = "INVALID_ID" + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_associated_clinvar_allele_id(clingen_allele_id) diff --git a/tests/lib/clingen/test_allele_registry.py b/tests/lib/clingen/test_allele_registry.py new file mode 100644 index 000000000..2d6fd28b9 --- /dev/null +++ b/tests/lib/clingen/test_allele_registry.py @@ -0,0 +1,453 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("aiocache", reason="aiocache is required to test caching behavior of allele registry functions") + +from unittest import mock + +import requests + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetCanonicalPaIds: + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_success(self, mock_request): + # Mock response object + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True, "@id": "https://reg.genome.network/allele/PA12345"}, + {"MANE": False, "@id": "https://reg.genome.network/allele/PA54321"}, + {"MANE": True, "@id": "https://reg.genome.network/allele/PA67890"}, + {"@id": "https://reg.genome.network/allele/PA00000"}, # No MANE + ] + } + mock_request.return_value = mock_response + + result = await get_canonical_pa_ids("CA00001") + assert result == ["PA12345", "PA67890"] + + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_no_transcript_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = await get_canonical_pa_ids("CA00002") + assert result == [] + + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_empty_transcript_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"transcriptAlleles": []} + mock_request.return_value = mock_response + + result = await get_canonical_pa_ids("CA00003") + assert result == [] + + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True}, # Missing @id + {"@id": "https://reg.genome.network/allele/PA99999"}, # Missing MANE + {}, # Missing both + ] + } + mock_request.return_value = mock_response + + result = await get_canonical_pa_ids("CA00004") + assert result == [] + + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" + mock_response = mock.Mock() + mock_response.status_code = 404 + mock_request.return_value = mock_response + + result = await get_canonical_pa_ids("CA404") + assert result == [] + + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_canonical_pa_ids("CA500") + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetMatchingRegisteredCaIds: + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_success(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "aminoAcidAlleles": [ + { + "matchingRegisteredTranscripts": [ + {"@id": "https://reg.genome.network/allele/CA11111"}, + {"@id": "https://reg.genome.network/allele/CA22222"}, + ] + }, + { + "matchingRegisteredTranscripts": [ + {"@id": "https://reg.genome.network/allele/CA33333"}, + ] + }, + { + # No matchingRegisteredTranscripts + }, + ] + } + mock_request.return_value = mock_response + + result = await get_matching_registered_ca_ids("PA12345") + assert result == ["CA11111", "CA22222", "CA33333"] + + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_no_amino_acid_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = await get_matching_registered_ca_ids("PA00000") + assert result == [] + + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_empty_amino_acid_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"aminoAcidAlleles": []} + mock_request.return_value = mock_response + + result = await get_matching_registered_ca_ids("PA00001") + assert result == [] + + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "aminoAcidAlleles": [ + {}, # No matchingRegisteredTranscripts + {"matchingRegisteredTranscripts": []}, # Empty list + ] + } + mock_request.return_value = mock_response + + result = await get_matching_registered_ca_ids("PA00002") + assert result == [] + + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" + mock_response = mock.Mock() + mock_response.status_code = 404 + mock_request.return_value = mock_response + + result = await get_matching_registered_ca_ids("PA404") + assert result == [] + + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_matching_registered_ca_ids("PAERROR") + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetAssociatedClinvarAlleleId: + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_success(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "123456"}]}} + mock_request.return_value = mock_response + + result = await get_associated_clinvar_allele_id("CA_CLINVAR_SUCCESS") + assert result == "123456" + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_external_records(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = await get_associated_clinvar_allele_id("CA_CLINVAR_NO_RECORDS") + + # For "no data found" cases we intentionally return an empty string (not None) + # to allow caching of these results. This is the modal case - most ClinGen alleles don't have ClinVar associations. + assert result == "" + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_clinvar_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {}} + mock_request.return_value = mock_response + + result = await get_associated_clinvar_allele_id("CA_CLINVAR_NO_ALLELES") + assert result == "" + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_missing_allele_id(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{}]}} + mock_request.return_value = mock_response + + result = await get_associated_clinvar_allele_id("CA_CLINVAR_MISSING_ID") + assert result == "" + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" + mock_response = mock.Mock() + mock_response.status_code = 404 + mock_request.return_value = mock_response + + result = await get_associated_clinvar_allele_id("CA_CLINVAR_404") + assert result == "" + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_CLINVAR_500") + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestCachingBehavior: + """Test caching behavior of allele registry functions. + + These tests verify that the @cached decorator works correctly with the + API functions, including cache hits, misses, and edge cases. + Uses in-memory cache (configured in conftest.py) to avoid requiring Redis. + """ + + @pytest.mark.asyncio + async def test_cache_hit_reduces_api_calls(self, mock_request, clear_cache): + """Verify first call is cache miss, second call is cache hit (no API call).""" + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "999999"}]}} + mock_request.return_value = mock_response + + # First call - should hit the API (cache miss) + result1 = await get_associated_clinvar_allele_id("CA_CACHE_TEST_1") + assert result1 == "999999" + assert mock_request.call_count == 1 + + # Second call with same ID - should hit cache (no new API call) + result2 = await get_associated_clinvar_allele_id("CA_CACHE_TEST_1") + assert result2 == "999999" + assert mock_request.call_count == 1 # Still 1, not 2 + + @pytest.mark.asyncio + async def test_empty_string_results_are_cached(self, mock_request, clear_cache): + """Verify that empty string results (no ClinVar association) are cached. + + This is the modal case - most ClinGen alleles don't have ClinVar associations. + We return empty string (not None) for successful API calls with no association, + so aiocache will cache these results and avoid repeated API calls. + """ + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} # No ClinVar association + mock_request.return_value = mock_response + + # First call - should hit the API + result1 = await get_associated_clinvar_allele_id("CA_NO_CLINVAR") + assert result1 == "" + assert mock_request.call_count == 1 + + # Second call - should hit cache (no new API call) + result2 = await get_associated_clinvar_allele_id("CA_NO_CLINVAR") + assert result2 == "" + assert mock_request.call_count == 1 # Still 1, not 2 + + @pytest.mark.asyncio + async def test_different_allele_ids_cached_separately(self, mock_request, clear_cache): + """Verify different allele IDs have separate cache entries.""" + # Mock responses for different allele IDs + mock_response1 = mock.Mock() + mock_response1.status_code = 200 + mock_response1.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "111111"}]}} + + mock_response2 = mock.Mock() + mock_response2.status_code = 200 + mock_response2.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "222222"}]}} + + mock_request.side_effect = [mock_response1, mock_response2] + + # Call with two different allele IDs + result1 = await get_associated_clinvar_allele_id("CA_SEPARATE_1") + result2 = await get_associated_clinvar_allele_id("CA_SEPARATE_2") + + # Both should have made API calls (different cache keys) + assert result1 == "111111" + assert result2 == "222222" + assert mock_request.call_count == 2 + + # Reset side_effect for subsequent calls + mock_request.side_effect = None + + # Calling again with same IDs should hit cache (no new calls) + result1_cached = await get_associated_clinvar_allele_id("CA_SEPARATE_1") + result2_cached = await get_associated_clinvar_allele_id("CA_SEPARATE_2") + + assert result1_cached == "111111" + assert result2_cached == "222222" + assert mock_request.call_count == 2 # Still 2, no new calls + + @pytest.mark.asyncio + async def test_api_errors_not_cached(self, mock_request, clear_cache): + """Verify that API error responses are NOT cached. + + This is important - if we cache errors, a temporary API failure + would prevent successful retries. Now that we raise exceptions, + the exception prevents caching and allows retries. + """ + # First call returns error + mock_error_response = mock.Mock() + mock_error_response.status_code = 500 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_error_response + + # First call - API error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_ERROR_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None # No exception on success + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "777777"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_ERROR_TEST") + assert result2 == "777777" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_rate_limit_errors_not_cached(self, mock_request, clear_cache): + """Verify that 429 rate limit errors are NOT cached. + + Rate limiting is a transient condition - we should retry after + the rate limit window expires, not cache the failure. + """ + # First call returns rate limit error + mock_error_response = mock.Mock() + mock_error_response.status_code = 429 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("429 Too Many Requests") + mock_request.return_value = mock_error_response + + # First call - rate limit error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_RATE_LIMIT_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry (after rate limit window) + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "429429"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (rate limit error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_RATE_LIMIT_TEST") + assert result2 == "429429" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_service_unavailable_errors_not_cached(self, mock_request, clear_cache): + """Verify that 503 service unavailable errors are NOT cached. + + Service unavailability is a transient condition - the service + may recover, so we should allow retries rather than caching the failure. + """ + # First call returns service unavailable error + mock_error_response = mock.Mock() + mock_error_response.status_code = 503 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("503 Service Unavailable") + mock_request.return_value = mock_error_response + + # First call - service unavailable error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_SERVICE_UNAVAILABLE_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry (after service recovers) + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "503503"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (service unavailable error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_SERVICE_UNAVAILABLE_TEST") + assert result2 == "503503" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_different_functions_share_raw_data_cache(self, mock_request, clear_cache): + """Verify different API functions share the underlying allele data cache. + + Since all functions delegate to get_clingen_allele_data, calling one function + caches the raw response, and subsequent calls for the same allele ID reuse it + without making additional API calls. + """ + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True, "@id": "https://reg.genome.network/allele/PA99999"}, + ], + "externalRecords": {"ClinVarAlleles": [{"alleleId": "888888"}]}, + } + mock_request.return_value = mock_response + + # First call fetches from API + result1 = await get_canonical_pa_ids("CA_SHARED_CACHE_TEST") + # Second call reuses cached raw data — no new API call + result2 = await get_associated_clinvar_allele_id("CA_SHARED_CACHE_TEST") + + assert result1 == ["PA99999"] + assert result2 == "888888" + assert mock_request.call_count == 1 # Only one API call for both functions diff --git a/tests/lib/clingen/test_cache.py b/tests/lib/clingen/test_cache.py new file mode 100644 index 000000000..3fd0d9d45 --- /dev/null +++ b/tests/lib/clingen/test_cache.py @@ -0,0 +1,179 @@ +# ruff: noqa: E402 +"""Tests for ClinGen cache configuration.""" + +import pytest + +pytest.importorskip("aiocache", reason="aiocache is required to test caching behavior of ClinGen API functions") + +import inspect + +from aiocache import Cache + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) +from mavedb.lib.clingen.cache import ( + CACHE_CLASS, + CACHE_CONFIG, + CACHE_KEY_PREFIX, + CACHE_KEY_VERSION, + CACHE_TTL_SECONDS, + clingen_cache_key_builder, + get_cache_configuration, +) + + +@pytest.mark.unit +class TestCacheConfiguration: + """Test cache configuration constants and key builder.""" + + def test_cache_constants(self): + """Verify cache constants are properly defined.""" + assert CACHE_KEY_PREFIX == "mavedb:clingen" + assert CACHE_KEY_VERSION == "v1" + assert CACHE_TTL_SECONDS == 86400 # 24 hours + + def test_cache_key_builder_with_positional_arg(self): + """Verify cache key builder generates correct keys with positional args.""" + + def mock_func(): + pass + + mock_func.__name__ = "get_associated_clinvar_allele_id" + + key = clingen_cache_key_builder(mock_func, "CA123456") + assert key == "v1:get_associated_clinvar_allele_id:CA123456" + + def test_cache_key_builder_with_kwargs(self): + """Verify cache key builder generates correct keys with kwargs.""" + + def mock_func(): + pass + + mock_func.__name__ = "get_canonical_pa_ids" + + # Test with clingen_allele_id kwarg + key = clingen_cache_key_builder(mock_func, clingen_allele_id="CA654321") + assert key == "v1:get_canonical_pa_ids:CA654321" + + # Test with clingen_pa_id kwarg + mock_func.__name__ = "get_matching_registered_ca_ids" + key = clingen_cache_key_builder(mock_func, clingen_pa_id="PA987654") + assert key == "v1:get_matching_registered_ca_ids:PA987654" + + def test_cache_key_builder_includes_function_name(self): + """Verify cache keys are isolated by function name.""" + + def func1(): + pass + + def func2(): + pass + + func1.__name__ = "get_canonical_pa_ids" + func2.__name__ = "get_associated_clinvar_allele_id" + + key1 = clingen_cache_key_builder(func1, "CA123") + key2 = clingen_cache_key_builder(func2, "CA123") + + # Same allele ID, different functions = different cache keys + assert key1 == "v1:get_canonical_pa_ids:CA123" + assert key2 == "v1:get_associated_clinvar_allele_id:CA123" + assert key1 != key2 + + def test_cache_key_builder_raises_on_missing_id(self): + """Verify cache key builder raises error when allele_id is missing.""" + + def mock_func(): + pass + + mock_func.__name__ = "test_function" + + with pytest.raises(ValueError, match="allele_id is required"): + clingen_cache_key_builder(mock_func) + + def test_functions_are_async_with_cached_decorator(self): + """Verify all ClinGen API functions are async (required for aiocache).""" + assert inspect.iscoroutinefunction(get_canonical_pa_ids) + assert inspect.iscoroutinefunction(get_matching_registered_ca_ids) + assert inspect.iscoroutinefunction(get_associated_clinvar_allele_id) + + +@pytest.mark.unit +class TestCacheBackendConfiguration: + """Test cache backend configuration logic.""" + + def test_get_cache_configuration_redis_backend(self): + """Verify get_cache_configuration returns correct Redis config.""" + cache_class, cache_config = get_cache_configuration( + backend="redis", redis_host="test-host", redis_port=1234, redis_ssl=True + ) + + assert cache_class == Cache.REDIS + assert cache_config["endpoint"] == "test-host" + assert cache_config["port"] == 1234 + assert cache_config["ssl"] is True + assert cache_config["namespace"] == CACHE_KEY_PREFIX + + def test_get_cache_configuration_memory_backend(self): + """Verify get_cache_configuration returns correct memory config.""" + cache_class, cache_config = get_cache_configuration(backend="memory") + + assert cache_class == Cache.MEMORY + assert cache_config["namespace"] == CACHE_KEY_PREFIX + # Memory backend should not have Redis-specific config + assert "endpoint" not in cache_config + assert "port" not in cache_config + assert "ssl" not in cache_config + + def test_get_cache_configuration_invalid_backend(self): + """Verify get_cache_configuration raises error for invalid backend.""" + with pytest.raises(ValueError, match="Unsupported cache backend: invalid"): + get_cache_configuration(backend="invalid") + + def test_get_cache_configuration_defaults_from_env(self, monkeypatch): + """Verify get_cache_configuration reads from environment variables.""" + monkeypatch.setenv("CLINGEN_CACHE_BACKEND", "memory") + + cache_class, cache_config = get_cache_configuration() + + assert cache_class == Cache.MEMORY + + def test_get_cache_configuration_redis_defaults(self): + """Verify get_cache_configuration uses correct defaults for Redis.""" + cache_class, cache_config = get_cache_configuration(backend="redis") + + assert cache_class == Cache.REDIS + assert cache_config["endpoint"] == "redis" + assert cache_config["port"] == 6379 + assert cache_config["ssl"] is False + + def test_get_cache_configuration_redis_ssl_parsing(self): + """Verify SSL boolean is parsed correctly from string.""" + # Test True + _, config_true = get_cache_configuration(backend="redis", redis_ssl=True) + assert config_true["ssl"] is True + + # Test False + _, config_false = get_cache_configuration(backend="redis", redis_ssl=False) + assert config_false["ssl"] is False + + def test_module_level_cache_config_initialized(self): + """Verify module-level CACHE_CLASS and CACHE_CONFIG are initialized.""" + # Should be initialized (either Redis or Memory depending on env) + assert CACHE_CLASS is not None + assert CACHE_CONFIG is not None + assert isinstance(CACHE_CONFIG, dict) + assert "namespace" in CACHE_CONFIG + + def test_cache_backend_is_memory_in_tests(self): + """Verify cache backend is configured to use memory in test environment.""" + # In test environment, CLINGEN_CACHE_BACKEND env var is set to "memory" in tests/conftest.py + assert CACHE_CLASS == Cache.MEMORY + assert CACHE_CONFIG["namespace"] == CACHE_KEY_PREFIX + # Memory backend should not have Redis-specific config + assert "endpoint" not in CACHE_CONFIG + assert "port" not in CACHE_CONFIG + assert "ssl" not in CACHE_CONFIG diff --git a/tests/lib/clingen/test_services.py b/tests/lib/clingen/test_services.py index 481c16d8e..74faed293 100644 --- a/tests/lib/clingen/test_services.py +++ b/tests/lib/clingen/test_services.py @@ -3,7 +3,6 @@ import os from datetime import datetime from unittest.mock import MagicMock, patch -from urllib import parse import pytest import requests @@ -12,16 +11,13 @@ cdot = pytest.importorskip("cdot") fastapi = pytest.importorskip("fastapi") -from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.clingen.services import ( ClinGenAlleleRegistryService, ClinGenLdhService, - clingen_allele_id_from_ldh_variation, get_allele_registry_associations, - get_clingen_variation, ) from mavedb.lib.utils import batched -from tests.helpers.constants import VALID_CLINGEN_CA_ID TEST_CLINGEN_URL = "https://pytest.clingen.com" TEST_CAR_URL = "https://pytest.car.clingen.com" @@ -219,66 +215,6 @@ def test_dispatch_submissions_no_batching(self, mock_batched, mock_authenticate, ) -@patch("mavedb.lib.clingen.services.requests.get") -def test_get_clingen_variation_success(mock_get): - mocked_response_json = {"data": {"ldFor": {"Variant": [{"id": "variant_1", "name": "Test Variant"}]}}} - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.json.return_value = mocked_response_json - mock_get.return_value = mock_response - - urn = "urn:example:variant" - result = get_clingen_variation(urn) - - assert result == mocked_response_json - mock_get.assert_called_once_with( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - -@patch("mavedb.lib.clingen.services.requests.get") -def test_get_clingen_variation_failure(mock_get): - mock_response = MagicMock() - mock_response.status_code = 404 - mock_response.text = "Not Found" - mock_get.return_value = mock_response - - urn = "urn:example:nonexistent_variant" - result = get_clingen_variation(urn) - - assert result is None - mock_get.assert_called_once_with( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - -def test_clingen_allele_id_from_ldh_variation_success(): - variation = {"data": {"ldFor": {"Variant": [{"entId": VALID_CLINGEN_CA_ID}]}}} - result = clingen_allele_id_from_ldh_variation(variation) - assert result == VALID_CLINGEN_CA_ID - - -def test_clingen_allele_id_from_ldh_variation_missing_key(): - variation = {"data": {"ldFor": {"Variant": []}}} - - result = clingen_allele_id_from_ldh_variation(variation) - assert result is None - - -def test_clingen_allele_id_from_ldh_variation_no_variation(): - result = clingen_allele_id_from_ldh_variation(None) - assert result is None - - -def test_clingen_allele_id_from_ldh_variation_key_error(): - variation = {"data": {}} - - result = clingen_allele_id_from_ldh_variation(variation) - assert result is None - - class TestClinGenAlleleRegistryService: def test_init(self, car_service): assert car_service.url == TEST_CAR_URL diff --git a/tests/lib/clinvar/network/test_utils.py b/tests/lib/clinvar/network/test_utils.py new file mode 100644 index 000000000..10f03b77c --- /dev/null +++ b/tests/lib/clinvar/network/test_utils.py @@ -0,0 +1,31 @@ +from datetime import datetime + +import pytest + +from mavedb.lib.clinvar.utils import fetch_clinvar_variant_data + + +@pytest.mark.network +@pytest.mark.slow +class TestFetchClinvarVariantDataIntegration: + @pytest.mark.asyncio + async def test_fetch_recent_variant_data(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + now = datetime.now() + # Attempt to fetch the most recent available month (previous month) + month = now.month - 1 if now.month > 1 else 12 + year = now.year if now.month > 1 else now.year - 1 + + content = await fetch_clinvar_variant_data(month, year) + assert content + + @pytest.mark.asyncio + async def test_fetch_older_variant_data(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # Fetch an older known date + content = await fetch_clinvar_variant_data(2, 2015) + assert content diff --git a/tests/lib/clinvar/test_utils.py b/tests/lib/clinvar/test_utils.py new file mode 100644 index 000000000..66eec150f --- /dev/null +++ b/tests/lib/clinvar/test_utils.py @@ -0,0 +1,238 @@ +import csv +import gzip +import io +from datetime import datetime + +import pytest +import requests + +from mavedb.lib.clinvar.constants import CLINVAR_FIELDS_TO_KEEP +from mavedb.lib.clinvar.utils import ( + fetch_clinvar_variant_data, + validate_clinvar_variant_summary_date, +) + + +def _mock_session(mock_get): + """Create a mock requests.Session whose .get delegates to mock_get.""" + + class _Session: + headers = {} + + def update(self, _): + pass + + def get(self, url, **kwargs): + return mock_get(url, **kwargs) + + session = _Session() + session.headers = {} + return session + + +def _make_gzipped_tsv(text: str) -> bytes: + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode="wb") as gz: + gz.write(text.encode("utf-8")) + return buf.getvalue() + + +# Minimal valid TSV content with the required columns for parsing +MOCK_TSV_CONTENT = _make_gzipped_tsv( + "#AlleleID\tGeneSymbol\tClinicalSignificance\tReviewStatus\n" + "123\tBRCA1\tPathogenic\treviewed by expert panel\n" + "456\tTP53\tBenign\tcriteria provided, single submitter\n" +) + + +@pytest.mark.unit +class TestValidateClinvarVariantSummaryDate: + def test_valid_past_date(self): + validate_clinvar_variant_summary_date(2, 2015) + + def test_valid_current_month_and_year(self): + now = datetime.now() + validate_clinvar_variant_summary_date(now.month, now.year) + + def test_invalid_month_low(self): + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + validate_clinvar_variant_summary_date(0, 2020) + + def test_invalid_month_high(self): + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + validate_clinvar_variant_summary_date(13, 2020) + + def test_year_before_2015(self): + with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): + validate_clinvar_variant_summary_date(6, 2014) + + def test_year_2015_before_february(self): + with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): + validate_clinvar_variant_summary_date(1, 2015) + + def test_year_in_future(self): + future_year = datetime.now().year + 1 + with pytest.raises(ValueError, match="Cannot fetch ClinVar data for future years."): + validate_clinvar_variant_summary_date(6, future_year) + + def test_month_in_future_for_current_year(self): + now = datetime.now() + if now.month == 12: + pytest.skip("December, no future month in current year") + return + + future_month = now.month + 1 + with pytest.raises(ValueError, match="Cannot fetch ClinVar data for future months."): + validate_clinvar_variant_summary_date(future_month, now.year) + + +class MockResponse: + def __init__(self, content, status_code=200, raise_exc=None): + self.content = content + self.status_code = status_code + self._raise_exc = raise_exc + + def raise_for_status(self): + if self._raise_exc: + raise self._raise_exc + + +@pytest.mark.unit +class TestFetchClinvarVariantData: + @pytest.mark.asyncio + async def test_top_level_url_success(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + def mock_get(url, **kwargs): + return MockResponse(MOCK_TSV_CONTENT) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + result = await fetch_clinvar_variant_data(1, 2016) + + assert "123" in result + assert "456" in result + assert result["123"]["GeneSymbol"] == "BRCA1" + assert result["456"]["ClinicalSignificance"] == "Benign" + + @pytest.mark.asyncio + async def test_falls_back_to_archive_url(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + call_count = {"count": 0} + + def mock_get(url, **kwargs): + call_count["count"] += 1 + if call_count["count"] == 1: + return MockResponse(b"", status_code=404, raise_exc=requests.exceptions.HTTPError("404")) + return MockResponse(MOCK_TSV_CONTENT) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + result = await fetch_clinvar_variant_data(2, 2017) + + assert "123" in result + assert call_count["count"] == 2 + + @pytest.mark.asyncio + async def test_both_urls_fail_raises(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + def mock_get(url, **kwargs): + raise requests.RequestException("Not found") + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + with pytest.raises(requests.RequestException, match="Not found"): + await fetch_clinvar_variant_data(3, 2018) + + @pytest.mark.asyncio + async def test_invalid_date_raises(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + await fetch_clinvar_variant_data(0, 2020) + + @pytest.mark.asyncio + async def test_cache_hit_skips_network(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + call_count = {"count": 0} + + def mock_get(url, **kwargs): + call_count["count"] += 1 + return MockResponse(MOCK_TSV_CONTENT) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + + result1 = await fetch_clinvar_variant_data(5, 2020) + assert call_count["count"] == 1 + + result2 = await fetch_clinvar_variant_data(5, 2020) + assert call_count["count"] == 1 # No new network call + assert result1 == result2 + + @pytest.mark.asyncio + async def test_only_keeps_configured_fields(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + tsv_with_extra_cols = _make_gzipped_tsv( + "#AlleleID\tGeneSymbol\tClinicalSignificance\tReviewStatus\tExtraCol\n" + "789\tBRCA2\tLikely pathogenic\tno assertion\tignored\n" + ) + + def mock_get(url, **kwargs): + return MockResponse(tsv_with_extra_cols) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + result = await fetch_clinvar_variant_data(7, 2022) + + assert set(result["789"].keys()) == set(CLINVAR_FIELDS_TO_KEEP) + assert "ExtraCol" not in result["789"] + + @pytest.mark.asyncio + async def test_handles_large_csv_fields(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + large_field = "A" * (csv.field_size_limit() + 100) + tsv = _make_gzipped_tsv( + f"#AlleleID\tGeneSymbol\tClinicalSignificance\tReviewStatus\n999\t{large_field}\tBenign\tok\n" + ) + + def mock_get(url, **kwargs): + return MockResponse(tsv) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + result = await fetch_clinvar_variant_data(8, 2023) + + assert result["999"]["GeneSymbol"] == large_field + + @pytest.mark.asyncio + async def test_does_not_alter_csv_field_size_limit(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + default_limit = csv.field_size_limit() + + def mock_get(url, **kwargs): + return MockResponse(MOCK_TSV_CONTENT) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + await fetch_clinvar_variant_data(9, 2023) + + assert csv.field_size_limit() == default_limit + + @pytest.mark.asyncio + async def test_stale_cache_removed_on_fields_change(self, monkeypatch, tmp_path): + """When CLINVAR_FIELDS_TO_KEEP changes (different hash), the old pickle is deleted.""" + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # Create a fake stale cache file with a different hash + stale_file = tmp_path / "variant_summary_2020-10.parsed.deadbeef.pkl" + stale_file.write_bytes(b"stale") + + def mock_get(url, **kwargs): + return MockResponse(MOCK_TSV_CONTENT) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + await fetch_clinvar_variant_data(10, 2020) + + assert not stale_file.exists() + pkl_files = list(tmp_path.glob("variant_summary_2020-10.parsed.*.pkl")) + assert len(pkl_files) == 1 diff --git a/tests/lib/conftest.py b/tests/lib/conftest.py index 2befdb597..1e873c2c5 100644 --- a/tests/lib/conftest.py +++ b/tests/lib/conftest.py @@ -8,6 +8,9 @@ import pytest from humps import decamelize +from mavedb.models.enums import JobStatus +from mavedb.models.job_run import JobRun + from mavedb.models.acmg_classification import ACMGClassification from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.enums.user_role import UserRole @@ -50,6 +53,13 @@ VALID_SCORE_SET_URN, ) +# Attempt to import optional lib level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass + @pytest.fixture def setup_lib_db(session): @@ -338,3 +348,17 @@ def mocked_gnomad_variant_row(): def data_files(tmp_path): copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") return tmp_path / "data" + + +@pytest.fixture +def job_run(session): + """Create a persisted JobRun for use in annotation status tests.""" + job = JobRun( + job_type="test_annotation_job", + job_function="test_function", + status=JobStatus.RUNNING, + ) + session.add(job) + session.commit() + session.refresh(job) + return job diff --git a/tests/lib/conftest_optional.py b/tests/lib/conftest_optional.py new file mode 100644 index 000000000..f9dddf4ec --- /dev/null +++ b/tests/lib/conftest_optional.py @@ -0,0 +1,24 @@ +import pytest_asyncio +from aiocache import Cache + +from mavedb.lib.clingen.cache import CACHE_CLASS, CACHE_CONFIG + + +@pytest_asyncio.fixture +async def clear_cache(): + """Clear the aiocache cache before and after each test. + + This ensures test isolation when testing caching behavior for ClinGen API calls. + Uses the module-level cache configuration which is set to memory backend via + environment variable in tests/conftest.py. + + Note: ClinVar TSV files use file-based caching, not aiocache, so they are not + affected by this fixture. ClinVar tests should use tmp_path fixture instead. + """ + cache = Cache(CACHE_CLASS, **CACHE_CONFIG) + await cache.clear() + + yield + + await cache.clear() + await cache.close() diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py new file mode 100644 index 000000000..52771b6bf --- /dev/null +++ b/tests/lib/test_annotation_status_manager.py @@ -0,0 +1,1366 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("psycopg2") + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.variant import Variant + + +@pytest.fixture +def annotation_status_manager(session, job_run): + """Fixture to provide an AnnotationStatusManager instance.""" + return AnnotationStatusManager(session, job_run_id=job_run.id) + + +@pytest.fixture +def existing_annotation_status(session, annotation_status_manager, setup_lib_db_with_variant): + """Fixture to create an existing annotation status in the database.""" + + # Add initial annotation + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert annotation.id is not None + assert annotation.current is True + + return annotation + + +@pytest.fixture +def existing_unversioned_annotation_status(session, annotation_status_manager, setup_lib_db_with_variant): + """Fixture to create an existing annotation status in the database.""" + + # Add initial annotation + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + ) + + assert annotation.id is not None + assert annotation.current is True + + return annotation + + +@pytest.mark.unit +class TestAnnotationStatusManagerCreateAnnotationUnit: + """Unit tests for AnnotationStatusManager.add_annotation method.""" + + @pytest.mark.parametrize( + "annotation_type", + AnnotationType._member_map_.values(), + ) + @pytest.mark.parametrize( + "status", + AnnotationStatus._member_map_.values(), + ) + def test_add_annotation_creates_entry_with_annotation_type_version_status( + self, session, annotation_status_manager, annotation_type, status, setup_lib_db_with_variant + ): + """Test that adding an annotation creates a new entry with correct type and version.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=annotation_type, + version="v1.0", + annotation_data={}, + current=True, + status=status, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=annotation_type, + version="v1.0", + ) + + assert annotation is not None + assert annotation.annotation_type == annotation_type + assert annotation.status == status + assert annotation.version == "v1.0" + + def test_add_annotation_stores_job_run_id( + self, session, annotation_status_manager, job_run, setup_lib_db_with_variant + ): + """Test that every annotation is created with the job_run_id from the manager.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + status=AnnotationStatus.SUCCESS, + version="v1.0", + annotation_data={}, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1.0", + ) + + assert annotation is not None + assert annotation.job_run_id == job_run.id + + def test_add_annotation_persists_annotation_data( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that adding an annotation persists the provided annotation data.""" + annotation_data = { + "annotation_metadata": {"some_key": "some_value"}, + "error_message": None, + } + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + status=AnnotationStatus.SUCCESS, + version="v1.0", + failure_category=None, + annotation_data=annotation_data, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1.0", + ) + + assert annotation is not None + assert annotation.failure_category is None + for key, value in annotation_data.items(): + assert getattr(annotation, key) == value + + def test_add_annotation_creates_entry_and_marks_previous_not_current( + self, session, job_run, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation creates a new entry and marks previous ones as not current.""" + manager = AnnotationStatusManager(session, job_run_id=job_run.id) + + # Add second annotation for same (variant, type, version) + manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + manager.flush() + session.commit() + + annotation = manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert annotation is not None + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is False + + def test_add_annotation_with_different_version_keeps_previous_current( + self, session, job_run, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation with a different version keeps previous current.""" + manager = AnnotationStatusManager(session, job_run_id=job_run.id) + + # Add second annotation for same (variant, type) but different version + manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + manager.flush() + session.commit() + + annotation = manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + + assert annotation is not None + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is True + + def test_add_annotation_with_different_type_keeps_previous_current( + self, session, job_run, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation with a different type keeps previous current.""" + manager = AnnotationStatusManager(session, job_run_id=job_run.id) + + # Add second annotation for same variant but different type + manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + manager.flush() + session.commit() + + annotation = manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version="v1", + ) + + assert annotation is not None + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is True + + def test_add_annotation_without_version(self, session, annotation_status_manager, setup_lib_db_with_variant): + """Test that adding an annotation without specifying version works correctly.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + version=None, + annotation_data={}, + status=AnnotationStatus.SKIPPED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + ) + + assert annotation is not None + assert annotation.id is not None + assert annotation.version is None + assert annotation.current is True + + def test_add_annotation_multiple_without_version_marks_previous_not_current( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that adding multiple annotations without version marks previous ones as not current.""" + + # Add second annotation without version + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + second_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + ) + + assert second_annotation is not None + assert second_annotation.id is not None + assert second_annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_unversioned_annotation_status) + assert existing_unversioned_annotation_status.current is False + + def test_add_annotation_different_type_without_version_keeps_previous_current( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation of different type without version keeps previous current.""" + + # Add second annotation of different type without version + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + second_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + ) + + assert second_annotation is not None + assert second_annotation.id is not None + assert second_annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_unversioned_annotation_status) + assert existing_unversioned_annotation_status.current is True + + def test_add_annotation_multiple_variants_independent_current_flags( + self, session, annotation_status_manager, setup_lib_db_with_score_set + ): + """Test that adding annotations for different variants maintains independent current flags.""" + + variant1 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={}) + variant2 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.2A>T", hgvs_pro="NP_000000.1:p.Met2Val", data={}) + session.add_all([variant1, variant2]) + session.commit() + session.refresh(variant1) + session.refresh(variant2) + + # Add annotation for variant 1 + annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + + # Add annotation for variant 2 + annotation_status_manager.add_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + annotation2 = annotation_status_manager.get_current_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert annotation1 is not None + assert annotation1.id is not None + assert annotation1.current is True + + assert annotation2 is not None + assert annotation2.id is not None + assert annotation2.current is True + + +class TestAnnotationStatusManagerGetCurrentAnnotationUnit: + """Unit tests for AnnotationStatusManager.get_current_annotation method.""" + + def test_get_current_annotation_returns_none_when_no_entry( + self, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that getting current annotation returns None when no entry exists.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_returns_correct_entry( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation returns the correct entry.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation.id == existing_annotation_status.id + assert annotation.current is True + + def test_get_current_annotation_returns_none_for_non_current( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation returns None when the entry is not current.""" + # Mark existing annotation as not current + existing_annotation_status.current = False + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_with_different_version_returns_none( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation with different version returns None.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert annotation is None + + def test_get_current_annotation_with_different_type_returns_none( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation with different type returns None.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_without_version_returns_correct_entry( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation without version returns the correct entry.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + ) + assert annotation.id == existing_unversioned_annotation_status.id + assert annotation.current is True + + +class TestAnnotationStatusManagerIntegration: + """Integration tests for AnnotationStatusManager methods.""" + + def test_add_and_get_current_annotation_work_together( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that adding and getting current annotation work together correctly.""" + # Add annotation + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + # Get current annotation + retrieved_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert retrieved_annotation is not None + assert retrieved_annotation.current is True + assert retrieved_annotation.status == AnnotationStatus.SUCCESS + + @pytest.mark.parametrize( + "version", + ["v1.0", "v2.0", None], + ) + def test_add_multiple_and_get_current_returns_latest( + self, session, annotation_status_manager, version, setup_lib_db_with_variant + ): + """Test that adding multiple annotations and getting current returns the latest one.""" + # Add first annotation + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + # Add second annotation + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + # Get current annotation + retrieved_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation is not None + assert retrieved_annotation.current is True + assert retrieved_annotation.version == version + assert retrieved_annotation.status == AnnotationStatus.SUCCESS + + @pytest.mark.parametrize( + "version", + ["v1.0", "v2.0", None], + ) + def test_add_annotations_for_different_variants_and_get_current_independent( + self, session, annotation_status_manager, version, setup_lib_db_with_score_set + ): + """Test that adding annotations for different variants and getting current works independently.""" + + variant1 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={}) + variant2 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.2A>T", hgvs_pro="NP_000000.1:p.Met2Val", data={}) + session.add_all([variant1, variant2]) + session.commit() + session.refresh(variant1) + session.refresh(variant2) + + # Add annotation for variant 1 + annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + + # Add annotation for variant 2 + annotation_status_manager.add_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + # Get current annotation for variant 1 + retrieved_annotation1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation1 is not None + assert retrieved_annotation1.current is True + assert retrieved_annotation1.status == AnnotationStatus.SUCCESS + assert retrieved_annotation1.version == version + + # Get current annotation for variant 2 + retrieved_annotation2 = annotation_status_manager.get_current_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation2 is not None + assert retrieved_annotation2.current is True + assert retrieved_annotation2.status == AnnotationStatus.FAILED + assert retrieved_annotation2.version == version + + +@pytest.mark.unit +class TestAnnotationStatusManagerReplaceAllVersionsUnit: + """Unit tests for the replace_all_versions parameter of AnnotationStatusManager.add_annotation.""" + + def test_replace_all_versions_false_keeps_different_version_current( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Default behavior: a new annotation only retires the same version, not others.""" + # existing_annotation_status is version "v1", current=True + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + session.commit() + + new_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert new_annotation is not None + assert new_annotation.current is True + + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is True + + def test_replace_all_versions_true_retires_all_versions( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """replace_all_versions=True retires all current records for (variant, type) regardless of version.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + session.commit() + + # Both v1 and v2 are current at this point (replace_all_versions=False) + v1 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + v2 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert v1 is not None and v1.current is True + assert v2 is not None and v2.current is True + + # Now add v3 with replace_all_versions=True — should retire both v1 and v2 + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v3", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=True, + ) + annotation_status_manager.flush() + session.commit() + + session.refresh(v1) + session.refresh(v2) + assert v1.current is False + assert v2.current is False + + v3 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v3", + ) + assert v3 is not None and v3.current is True + + def test_replace_all_versions_true_only_affects_matching_type( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """replace_all_versions=True only retires records for the same annotation_type.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + vrs = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + clinvar = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v1", + ) + + # replace VRS_MAPPING only + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=True, + ) + annotation_status_manager.flush() + session.commit() + + session.refresh(vrs) + session.refresh(clinvar) + assert vrs.current is False + assert clinvar.current is True + + new_vrs = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert new_vrs is not None and new_vrs.current is True + + def test_replace_all_versions_true_only_affects_matching_variant( + self, session, annotation_status_manager, setup_lib_db_with_score_set + ): + """replace_all_versions=True only retires records for the same variant_id.""" + variant1 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={}) + variant2 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.2A>T", hgvs_pro="NP_000000.1:p.Met2Val", data={}) + session.add_all([variant1, variant2]) + session.commit() + session.refresh(variant1) + session.refresh(variant2) + + annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + ann1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + ann2 = annotation_status_manager.get_current_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + # replace variant1 only + annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=True, + ) + annotation_status_manager.flush() + session.commit() + + session.refresh(ann1) + session.refresh(ann2) + assert ann1.current is False + assert ann2.current is True # untouched + + new_ann1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert new_ann1 is not None and new_ann1.current is True + + def test_replace_all_versions_true_same_version_also_retired( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """replace_all_versions=True retires a same-version record just as replace_all_versions=False would.""" + # existing_annotation_status is version "v1" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + replace_all_versions=True, + ) + annotation_status_manager.flush() + session.commit() + + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is False + + new_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert new_annotation is not None + assert new_annotation.current is True + assert new_annotation.status == AnnotationStatus.FAILED + + +@pytest.mark.unit +class TestAnnotationStatusManagerBatchingUnit: + """Unit tests for batching and flush behavior.""" + + def test_flush_noop_when_empty(self, annotation_status_manager): + """flush() with no pending annotations does nothing and does not error.""" + annotation_status_manager.flush() # should not raise + + def test_auto_flush_at_batch_size(self, session, setup_lib_db_with_score_set): + """Annotations are auto-flushed to the DB when batch_size is reached.""" + variants = [ + Variant(score_set_id=1, hgvs_nt=f"NM_000000.1:c.{i}A>G", hgvs_pro=f"NP_000000.1:p.Met{i}Val", data={}) + for i in range(3) + ] + session.add_all(variants) + session.commit() + for v in variants: + session.refresh(v) + + manager = AnnotationStatusManager(session, batch_size=2) + + # Add first — stays pending (below threshold) + manager.add_annotation( + variant_id=variants[0].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + assert len(manager._pending) == 1 + + # Add second — triggers auto-flush (reaches batch_size=2) + manager.add_annotation( + variant_id=variants[1].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + assert len(manager._pending) == 0 # flushed + + # Verify the auto-flushed rows are visible in the DB + ann = manager.get_current_annotation( + variant_id=variants[0].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert ann is not None and ann.current is True + + # Add a third — stays pending (below threshold again) + manager.add_annotation( + variant_id=variants[2].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + assert len(manager._pending) == 1 + + # Explicit flush persists the remainder + manager.flush() + assert len(manager._pending) == 0 + + ann3 = manager.get_current_annotation( + variant_id=variants[2].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert ann3 is not None and ann3.current is True + + def test_get_current_annotation_auto_flushes_pending( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_current_annotation() flushes pending writes before querying.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + # No explicit flush — get_current_annotation should auto-flush + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation is not None + assert annotation.current is True + assert len(annotation_status_manager._pending) == 0 + + def test_flush_clears_internal_buffers(self, session, annotation_status_manager, setup_lib_db_with_variant): + """flush() clears both _pending and _retirement_filters.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + assert len(annotation_status_manager._pending) == 1 + assert len(annotation_status_manager._retirement_filters) == 1 + + annotation_status_manager.flush() + assert len(annotation_status_manager._pending) == 0 + assert len(annotation_status_manager._retirement_filters) == 0 + + def test_batch_retirement_groups_by_annotation_type( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """Multiple annotation types in one batch are retired independently.""" + # Create initial annotations for two types + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + vrs_v1 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + clinvar_v1 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v1", + ) + + # Now add replacements for both types in one batch + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v2", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + session.refresh(vrs_v1) + session.refresh(clinvar_v1) + assert vrs_v1.current is False + assert clinvar_v1.current is False + + vrs_v2 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + clinvar_v2 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v2", + ) + assert vrs_v2 is not None and vrs_v2.current is True + assert clinvar_v2 is not None and clinvar_v2.current is True + + +@pytest.mark.unit +class TestAnnotationStatusManagerAuditHelpersUnit: + """Unit tests for audit query helpers: get_annotation_history and get_all_current_annotations.""" + + def test_get_annotation_history_returns_all_rows_newest_first( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_annotation_history returns both current and retired rows, newest first.""" + # Create two annotations for the same (variant, type, version) — first gets retired + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + history = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert len(history) == 2 + # Newest first + assert history[0].status == AnnotationStatus.FAILED + assert history[0].current is True + assert history[1].status == AnnotationStatus.SUCCESS + assert history[1].current is False + + def test_get_annotation_history_filters_by_version( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_annotation_history with version only returns matching rows.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-02", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + session.commit() + + history_jan = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + ) + assert len(history_jan) == 1 + assert history_jan[0].version == "2025-01" + + def test_get_annotation_history_without_version_returns_all_versions( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_annotation_history without version returns rows across all versions.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-02", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + session.commit() + + history = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + ) + assert len(history) == 2 + + def test_get_annotation_history_empty_for_no_records(self, annotation_status_manager, setup_lib_db_with_variant): + """get_annotation_history returns empty list when no records exist.""" + history = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + ) + assert history == [] + + def test_get_annotation_history_auto_flushes_pending( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_annotation_history flushes pending writes before querying.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + # No explicit flush + history = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + ) + assert len(history) == 1 + assert len(annotation_status_manager._pending) == 0 + + def test_get_all_current_annotations_returns_all_types( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations returns current annotations across all types.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + all_current = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert len(all_current) == 3 + types = {a.annotation_type for a in all_current} + assert types == { + AnnotationType.VRS_MAPPING, + AnnotationType.CLINVAR_CONTROL, + AnnotationType.CLINGEN_ALLELE_ID, + } + + def test_get_all_current_annotations_excludes_retired( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations does not include retired rows.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + + # Replace it — v1 becomes retired + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + all_current = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert len(all_current) == 1 + assert all_current[0].version == "v2" + + def test_get_all_current_annotations_empty_for_no_records( + self, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations returns empty list when no records exist.""" + result = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert result == [] + + def test_get_all_current_annotations_auto_flushes_pending( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations flushes pending writes before querying.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + # No explicit flush + result = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert len(result) == 1 + assert len(annotation_status_manager._pending) == 0 + + def test_get_all_current_annotations_ordered_by_type_then_version( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations returns results ordered by annotation_type, version.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-02", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + session.commit() + + all_current = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert len(all_current) == 3 + # clinvar_control < vrs_mapping alphabetically + assert all_current[0].annotation_type == AnnotationType.CLINVAR_CONTROL + assert all_current[0].version == "2025-01" + assert all_current[1].annotation_type == AnnotationType.CLINVAR_CONTROL + assert all_current[1].version == "2025-02" + assert all_current[2].annotation_type == AnnotationType.VRS_MAPPING + + +@pytest.mark.unit +class TestVariantAnnotationStatusReprUnit: + """Unit tests for the VariantAnnotationStatus __repr__ method.""" + + def test_repr_includes_key_fields(self, session, annotation_status_manager, setup_lib_db_with_variant): + """__repr__ includes id, variant_id, type, version, status, current, and created_at.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + repr_str = repr(annotation) + + assert "VariantAnnotationStatus" in repr_str + assert f"id={annotation.id}" in repr_str + assert f"variant_id={setup_lib_db_with_variant.id}" in repr_str + assert "type='vrs_mapping'" in repr_str + assert "version='v1'" in repr_str + assert "status='success'" in repr_str + assert "current=True" in repr_str + assert "created_at=" in repr_str diff --git a/tests/lib/test_gnomad.py b/tests/lib/test_gnomad.py index 043c6c56a..14dde9527 100644 --- a/tests/lib/test_gnomad.py +++ b/tests/lib/test_gnomad.py @@ -1,25 +1,26 @@ # ruff: noqa: E402 -import pytest -import importlib from unittest.mock import patch +import pytest + +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + pyathena = pytest.importorskip("pyathena") fastapi = pytest.importorskip("fastapi") from mavedb.lib.gnomad import ( - gnomad_identifier, allele_list_from_list_like_string, + gnomad_identifier, + gnomad_table_name, link_gnomad_variants_to_mapped_variants, ) -from mavedb.models.mapped_variant import MappedVariant from mavedb.models.gnomad_variant import GnomADVariant - +from mavedb.models.mapped_variant import MappedVariant from tests.helpers.constants import ( - TEST_GNOMAD_ALLELE_NUMBER, + TEST_GNOMAD_DATA_VERSION, TEST_GNOMAD_VARIANT, TEST_MINIMAL_MAPPED_VARIANT, - TEST_GNOMAD_DATA_VERSION, ) ### Tests for gnomad_identifier function ### @@ -63,22 +64,17 @@ def test_gnomad_identifier_raises_with_no_alleles(): ### Tests for gnomad_table_name function ### -def test_gnomad_table_name_returns_expected(monkeypatch): - monkeypatch.setenv("GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION) - # Reload the module to update GNOMAD_DATA_VERSION global - import mavedb.lib.gnomad as gnomad_mod - - importlib.reload(gnomad_mod) - assert gnomad_mod.gnomad_table_name() == TEST_GNOMAD_DATA_VERSION.replace(".", "_") - +def test_gnomad_table_name_returns_expected(): + with patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION): + assert gnomad_table_name() == TEST_GNOMAD_DATA_VERSION.replace(".", "_") -def test_gnomad_table_name_raises_if_env_not_set(monkeypatch): - monkeypatch.delenv("GNOMAD_DATA_VERSION", raising=False) - import mavedb.lib.gnomad as gnomad_mod - importlib.reload(gnomad_mod) - with pytest.raises(ValueError, match="GNOMAD_DATA_VERSION environment variable is not set."): - gnomad_mod.gnomad_table_name() +def test_gnomad_table_name_raises_if_env_not_set(): + with ( + pytest.raises(ValueError, match="GNOMAD_DATA_VERSION environment variable is not set."), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", None), + ): + gnomad_table_name() ### Tests for allele_list_from_list_like_string function ### @@ -125,6 +121,16 @@ def test_allele_list_from_list_like_string_invalid_format_not_list(): ### Tests for link_gnomad_variants_to_mapped_variants function ### +def _verify_annotation_status(session, mapped_variants, expected_version): + annotations = session.query(VariantAnnotationStatus).all() + assert len(annotations) == len(mapped_variants) + + for mapped_variant, annotation in zip(mapped_variants, annotations): + assert annotation.variant_id == mapped_variant.variant_id + assert annotation.annotation_type == "gnomad_allele_frequency" + assert annotation.version == expected_version + + def test_links_new_gnomad_variant_to_mapped_variant( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant ): @@ -148,6 +154,8 @@ def test_links_new_gnomad_variant_to_mapped_variant( for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_can_link_gnomad_variants_with_none_type_faf_fields( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant @@ -175,6 +183,8 @@ def test_can_link_gnomad_variants_with_none_type_faf_fields( for attr in gnomad_variant_comparator: assert getattr(mapped_variant.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_links_existing_gnomad_variant(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): gnomad_variant = GnomADVariant(**TEST_GNOMAD_VARIANT) @@ -199,8 +209,10 @@ def test_links_existing_gnomad_variant(session, mocked_gnomad_variant_row, setup for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) -def test_removes_existing_gnomad_variant_with_same_version( + +def test_adding_existing_gnomad_variant_with_same_version_does_not_result_in_duplication( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant ): mapped_variant = setup_lib_db_with_mapped_variant @@ -212,7 +224,6 @@ def test_removes_existing_gnomad_variant_with_same_version( result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 1 - setattr(mocked_gnomad_variant_row, "joint.freq.all.ac", "1234") with patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION): result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 1 @@ -221,8 +232,6 @@ def test_removes_existing_gnomad_variant_with_same_version( session.refresh(mapped_variant) edited_saved_gnomad_variant = TEST_GNOMAD_VARIANT.copy() - edited_saved_gnomad_variant["allele_count"] = 1234 - edited_saved_gnomad_variant["allele_frequency"] = float(1234 / int(TEST_GNOMAD_ALLELE_NUMBER)) edited_saved_gnomad_variant.pop("creation_date") edited_saved_gnomad_variant.pop("modification_date") @@ -230,6 +239,8 @@ def test_removes_existing_gnomad_variant_with_same_version( for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant, mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_links_multiple_rows_and_variants(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): mapped_variant1 = setup_lib_db_with_mapped_variant @@ -256,11 +267,15 @@ def test_links_multiple_rows_and_variants(session, mocked_gnomad_variant_row, se for attr in gnomad_variant_comparator: assert getattr(mv.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant1, mapped_variant2], TEST_GNOMAD_DATA_VERSION) + def test_returns_zero_when_no_mapped_variants(session, mocked_gnomad_variant_row): result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 0 + _verify_annotation_status(session, [], TEST_GNOMAD_DATA_VERSION) + def test_only_current_flag_filters_variants(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): mapped_variant1 = setup_lib_db_with_mapped_variant @@ -287,6 +302,8 @@ def test_only_current_flag_filters_variants(session, mocked_gnomad_variant_row, for attr in gnomad_variant_comparator: assert getattr(mapped_variant2.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant2], TEST_GNOMAD_DATA_VERSION) + def test_only_current_flag_is_false_operates_on_all_variants( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant @@ -313,3 +330,5 @@ def test_only_current_flag_is_false_operates_on_all_variants( assert len(mv.gnomad_variants) == 1 for attr in gnomad_variant_comparator: assert getattr(mv.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + + _verify_annotation_status(session, [mapped_variant1, mapped_variant2], TEST_GNOMAD_DATA_VERSION) diff --git a/tests/lib/test_slack.py b/tests/lib/test_slack.py new file mode 100644 index 000000000..f39de007c --- /dev/null +++ b/tests/lib/test_slack.py @@ -0,0 +1,197 @@ +# ruff: noqa: E402 + +"""Tests for Slack notification utilities.""" + +from unittest.mock import patch + +import pytest + +pytest.importorskip("slack_sdk", reason="slack_sdk is required to test Slack notification utilities") + +from mavedb.lib.slack import _retry_status_text, send_slack_error, send_slack_job_error, send_slack_job_failure + + +@pytest.mark.unit +class TestSendSlackError: + """Tests for send_slack_error resilience.""" + + def test_send_slack_error_does_not_propagate_exceptions(self): + """send_slack_error should catch and log any internal exceptions rather than propagating them.""" + with ( + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + patch("mavedb.lib.slack.logger") as mock_logger, + ): + # Should not raise + send_slack_error(ValueError("original error")) + + mock_logger.critical.assert_called_once_with("Failed to send Slack error notification", exc_info=True) + + def test_send_slack_error_calls_send_slack_message(self): + """send_slack_error should format and send the error via send_slack_message.""" + with patch("mavedb.lib.slack.send_slack_message") as mock_send: + send_slack_error(ValueError("test error")) + + mock_send.assert_called_once() + sent_text = mock_send.call_args[0][0] + assert "ValueError" in sent_text + assert "test error" in sent_text + + def test_send_slack_error_with_string_error(self): + """send_slack_error should handle non-exception inputs gracefully.""" + with patch("mavedb.lib.slack.send_slack_message") as mock_send: + send_slack_error("plain string error") + + mock_send.assert_called_once() + sent_text = mock_send.call_args[0][0] + assert "plain string error" in sent_text + + +@pytest.mark.unit +class TestRetryStatusText: + """Tests for _retry_status_text helper.""" + + def test_will_retry_first_attempt(self): + assert _retry_status_text(retry_count=0, max_retries=3, will_retry=True) == "Attempt 1 of 4 — will retry" + + def test_will_retry_second_attempt(self): + assert _retry_status_text(retry_count=1, max_retries=3, will_retry=True) == "Attempt 2 of 4 — will retry" + + def test_final_retry_exhausted(self): + assert ( + _retry_status_text(retry_count=3, max_retries=3, will_retry=False) + == "Attempt 4 of 4 — this job will not be retried" + ) + + def test_no_retries_configured(self): + assert ( + _retry_status_text(retry_count=0, max_retries=0, will_retry=False) + == "Attempt 1 of 1 — this job will not be retried" + ) + + +@pytest.mark.unit +class TestSendSlackJobFailure: + """Tests for send_slack_job_failure.""" + + def test_includes_retry_context_when_will_retry(self): + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_failure( + job_urn="urn:mavedb:00000001-a-1", + job_function="map_variants", + reason="timeout", + failure_category="TIMEOUT", + retry_count=0, + max_retries=3, + will_retry=True, + ) + + mock_send.assert_called_once() + fallback, blocks = mock_send.call_args[0] + assert "will retry" in fallback + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 1 of 4" in retry_field["text"] + assert "will retry" in retry_field["text"] + + def test_includes_retry_context_when_no_more_retries(self): + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_failure( + job_urn="urn:mavedb:00000001-a-1", + job_function="map_variants", + reason="timeout", + failure_category="TIMEOUT", + retry_count=3, + max_retries=3, + will_retry=False, + ) + + mock_send.assert_called_once() + fallback, blocks = mock_send.call_args[0] + assert "will not be retried" in fallback + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 4 of 4" in retry_field["text"] + assert "will not be retried" in retry_field["text"] + + def test_defaults_produce_no_retry_text(self): + """Default parameters (retry_count=0, max_retries=0, will_retry=False) show attempt 1 of 1.""" + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_failure( + job_urn="urn:mavedb:00000001-a-1", + job_function="map_variants", + reason="bad data", + failure_category="VALIDATION_ERROR", + ) + + mock_send.assert_called_once() + _, blocks = mock_send.call_args[0] + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 1 of 1" in retry_field["text"] + + def test_does_not_propagate_exceptions(self): + with ( + patch("mavedb.lib.slack._send_slack_blocks", side_effect=RuntimeError("Slack is down")), + patch("mavedb.lib.slack.logger") as mock_logger, + ): + send_slack_job_failure( + job_urn="urn:test", + job_function="fn", + reason="r", + failure_category="c", + ) + + mock_logger.critical.assert_called_once_with("Failed to send Slack job failure notification", exc_info=True) + + +@pytest.mark.unit +class TestSendSlackJobError: + """Tests for send_slack_job_error.""" + + def test_includes_retry_context_when_will_retry(self): + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_error( + job_urn="urn:mavedb:00000001-a-1", + job_function="create_variants", + err=RuntimeError("boom"), + failure_category="NETWORK_ERROR", + retry_count=1, + max_retries=3, + will_retry=True, + ) + + mock_send.assert_called_once() + fallback, blocks = mock_send.call_args[0] + assert "will retry" in fallback + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 2 of 4" in retry_field["text"] + assert "will retry" in retry_field["text"] + + def test_includes_retry_context_when_exhausted(self): + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_error( + job_urn="urn:mavedb:00000001-a-1", + job_function="create_variants", + err=RuntimeError("boom"), + failure_category="NETWORK_ERROR", + retry_count=3, + max_retries=3, + will_retry=False, + ) + + mock_send.assert_called_once() + _, blocks = mock_send.call_args[0] + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 4 of 4" in retry_field["text"] + assert "will not be retried" in retry_field["text"] + + def test_does_not_propagate_exceptions(self): + with ( + patch("mavedb.lib.slack._send_slack_blocks", side_effect=RuntimeError("Slack is down")), + patch("mavedb.lib.slack.logger") as mock_logger, + ): + send_slack_job_error(job_urn="urn:test", job_function="fn", err=ValueError("e")) + + mock_logger.critical.assert_called_once_with("Failed to send Slack job error notification", exc_info=True) diff --git a/tests/lib/workflow/conftest.py b/tests/lib/workflow/conftest.py new file mode 100644 index 000000000..0f9d9e507 --- /dev/null +++ b/tests/lib/workflow/conftest.py @@ -0,0 +1,111 @@ +from unittest.mock import patch + +import pytest + +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_run import JobRun +from mavedb.models.user import User +from tests.helpers.constants import TEST_USER + +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass + + +@pytest.fixture +def sample_job_definition(): + """Provides a sample job definition for testing.""" + return { + "key": "sample_job", + "type": "data_processing", + "function": "process_data", + "params": {"param1": "value1", "param2": "value2", "required_param": None}, + "dependencies": [], + } + + +@pytest.fixture +def sample_independent_pipeline_definition(sample_job_definition): + """Provides a sample pipeline definition for testing.""" + return { + "name": "sample_pipeline", + "description": "A sample pipeline for testing purposes.", + "job_definitions": [sample_job_definition], + } + + +@pytest.fixture +def sample_dependent_pipeline_definition(): + """Provides a sample pipeline definition with job dependencies for testing.""" + job_def_1 = { + "key": "job_1", + "type": "data_processing", + "function": "process_data_1", + "params": {"paramA": None}, + "dependencies": [], + } + job_def_2 = { + "key": "job_2", + "type": "data_processing", + "function": "process_data_2", + "params": {"paramB": None}, + "dependencies": [("job_1", DependencyType.SUCCESS_REQUIRED)], + } + return { + "name": "dependent_pipeline", + "description": "A sample pipeline with job dependencies for testing.", + "job_definitions": [job_def_1, job_def_2], + } + + +@pytest.fixture +def with_test_pipeline_definition_ctx(sample_dependent_pipeline_definition, sample_independent_pipeline_definition): + """Fixture to temporarily add a test pipeline definition.""" + test_pipeline_definitions = { + sample_dependent_pipeline_definition["name"]: sample_dependent_pipeline_definition, + sample_independent_pipeline_definition["name"]: sample_independent_pipeline_definition, + } + + with patch("mavedb.lib.workflow.pipeline_factory.PIPELINE_DEFINITIONS", test_pipeline_definitions): + yield + + +@pytest.fixture +def test_user(session): + """Fixture to create and provide a test user in the database.""" + db = session + user = User(**TEST_USER) + db.add(user) + db.commit() + yield user + + +@pytest.fixture +def test_workflow_parent_job_run(session, test_user): + """Fixture to create and provide a test parent job run for workflow tests.""" + parent_job_run = JobRun( + job_type="test_type", + job_function="test_function", + job_params={}, + correlation_id="test_correlation_id", + ) + session.add(parent_job_run) + session.commit() + + yield parent_job_run + + +@pytest.fixture +def test_workflow_child_job_run(session, test_user, test_workflow_parent_job_run): + """Fixture to create and provide a test child job run for workflow tests.""" + child_job_run = JobRun( + job_type="test_type", + job_function="test_function", + job_params={}, + correlation_id="test_correlation_id", + ) + session.add(child_job_run) + session.commit() + + yield child_job_run diff --git a/tests/lib/workflow/conftest_optional.py b/tests/lib/workflow/conftest_optional.py new file mode 100644 index 000000000..f165cc741 --- /dev/null +++ b/tests/lib/workflow/conftest_optional.py @@ -0,0 +1,16 @@ +import pytest + +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.lib.workflow.pipeline_factory import PipelineFactory + + +@pytest.fixture +def job_factory(session): + """Fixture to provide a mocked JobFactory instance.""" + yield JobFactory(session) + + +@pytest.fixture +def pipeline_factory(session): + """Fixture to provide a mocked PipelineFactory instance.""" + yield PipelineFactory(session) diff --git a/tests/lib/workflow/test_job_factory.py b/tests/lib/workflow/test_job_factory.py new file mode 100644 index 000000000..4ea1b5d0b --- /dev/null +++ b/tests/lib/workflow/test_job_factory.py @@ -0,0 +1,337 @@ +# ruff: noqa: E402 +import pytest + +from mavedb.models.job_dependency import JobDependency + +pytest.importorskip("fastapi") + +from unittest.mock import patch + +from mavedb.models.pipeline import Pipeline + + +@pytest.mark.unit +class TestJobFactoryCreateJobRunUnit: + """Unit tests for the JobFactory create_job_run method.""" + + def test_create_job_run_persists_preset_params_from_definition(self, job_factory, sample_job_definition): + existing_params = {"param1": "new_value1", "param2": "new_value2", "required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=existing_params, + pipeline_id=1, + ) + + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + + def test_create_job_run_raises_error_for_missing_params(self, job_factory, sample_job_definition): + incomplete_params = {"param1": "new_value1"} # Missing param2 + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=incomplete_params, + pipeline_id=1, + ) + + assert "Missing required param: required_param" in str(exc_info.value) + + def test_create_job_run_fills_in_required_params(self, job_factory, sample_job_definition): + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=1, + ) + + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + assert job_run.job_params["required_param"] == "required_value" + + def test_create_job_run_persists_correlation_id(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.correlation_id == "test-correlation-id" + + def test_create_job_run_persists_mavedb_version(self, job_factory, sample_job_definition): + with patch("mavedb.lib.workflow.job_factory.mavedb_version", "1.2.3"): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.mavedb_version == "1.2.3" + + def test_create_job_run_persists_job_type_and_function(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.job_type == sample_job_definition["type"] + assert job_run.job_function == sample_job_definition["function"] + + def test_create_job_run_ignores_extra_pipeline_params(self, job_factory, sample_job_definition): + pipeline_params = { + "param1": "new_value1", + "param2": "new_value2", + "required_param": "required_value", + "extra_param": "should_be_ignored", + } + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=1, + ) + + assert "extra_param" not in job_run.job_params + + def test_create_job_run_with_no_pipeline_id(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + ) + + assert job_run.pipeline_id is None + + def test_create_job_run_associates_with_pipeline(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=42, + ) + + assert job_run.pipeline_id == 42 + + def test_create_job_run_adds_to_session(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run in job_factory.session.new + + def test_create_job_run_sets_retry_delay_seconds_from_definition(self, job_factory, sample_job_definition): + sample_job_definition["retry_delay_seconds"] = 30 + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.retry_delay_seconds == 30 + + def test_create_job_run_retry_delay_seconds_defaults_to_none_when_absent(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.retry_delay_seconds is None + + +@pytest.mark.integration +class TestJobFactoryCreateJobRunIntegration: + """Integration tests for the JobFactory create_job_run method within pipeline execution.""" + + def test_create_job_run_independent(self, job_factory, sample_job_definition): + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=pipeline_params, + ) + job_factory.session.commit() + + retrieved_job_run = job_factory.session.get(type(job_run), job_run.id) + + assert retrieved_job_run is not None + assert retrieved_job_run.job_type == sample_job_definition["type"] + assert retrieved_job_run.job_function == sample_job_definition["function"] + assert retrieved_job_run.job_params["param1"] == "value1" + assert retrieved_job_run.job_params["param2"] == "value2" + assert retrieved_job_run.job_params["required_param"] == "required_value" + assert retrieved_job_run.correlation_id == "integration-correlation-id" + assert retrieved_job_run.pipeline_id is None + + def test_create_job_run_with_pipeline(self, job_factory, sample_job_definition): + pipeline = Pipeline( + name="Test Pipeline", + description="A pipeline for testing JobFactory integration.", + ) + job_factory.session.add(pipeline) + job_factory.session.flush() + + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=pipeline.id, + ) + job_factory.session.commit() + + retrieved_job_run = job_factory.session.get(type(job_run), job_run.id) + + assert retrieved_job_run is not None + assert retrieved_job_run.job_type == sample_job_definition["type"] + assert retrieved_job_run.job_function == sample_job_definition["function"] + assert retrieved_job_run.job_params["param1"] == "value1" + assert retrieved_job_run.job_params["param2"] == "value2" + assert retrieved_job_run.job_params["required_param"] == "required_value" + assert retrieved_job_run.correlation_id == "integration-correlation-id" + assert retrieved_job_run.pipeline_id == pipeline.id + + def test_create_job_run_missing_params_raises_error(self, job_factory, sample_job_definition): + incomplete_params = {"param1": "new_value1"} # Missing required_param + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=incomplete_params, + pipeline_id=100, + ) + + assert "Missing required param: required_param" in str(exc_info.value) + + +@pytest.mark.unit +class TestJobFactoryCreateJobDependencyUnit: + """Unit tests for the JobFactory create_job_dependency method.""" + + def test_create_job_dependency_persists_fields( + self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run + ): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + dependency_type = "success_required" + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + dependency_type=dependency_type, + ) + + assert job_dependency.id == child_job_run_id + assert job_dependency.depends_on_job_id == parent_job_run_id + assert job_dependency.dependency_type == dependency_type + + def test_create_job_dependency_defaults_dependency_type( + self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run + ): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert job_dependency.id == child_job_run_id + assert job_dependency.depends_on_job_id == parent_job_run_id + assert job_dependency.dependency_type == "success_required" + + def test_create_job_dependency_raises_error_for_nonexistent_parent(self, job_factory, test_workflow_child_job_run): + parent_job_run_id = 9999 # Assuming this ID does not exist + child_job_run_id = test_workflow_child_job_run.id + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Parent job run ID {parent_job_run_id} does not exist." in str(exc_info.value) + + def test_create_job_dependency_raises_error_for_nonexistent_child(self, job_factory, test_workflow_parent_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = 9999 # Assuming this ID does not exist + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Child job run ID {child_job_run_id} does not exist." in str(exc_info.value) + + +@pytest.mark.integration +class TestJobFactoryCreateJobDependencyIntegration: + """Integration tests for the JobFactory create_job_dependency method within job execution.""" + + def test_create_job_dependency(self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + dependency_type = "success_required" + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + dependency_type=dependency_type, + ) + job_factory.session.commit() + + retrieved_dependency = ( + job_factory.session.query(type(job_dependency)) + .filter( + type(job_dependency).id == child_job_run_id, + type(job_dependency).depends_on_job_id == parent_job_run_id, + ) + .first() + ) + + assert retrieved_dependency is not None + assert retrieved_dependency.id == child_job_run_id + assert retrieved_dependency.depends_on_job_id == parent_job_run_id + assert retrieved_dependency.dependency_type == dependency_type + + def test_create_job_dependency_missing_parent_raises_error(self, session, job_factory, test_workflow_child_job_run): + parent_job_run_id = 9999 # Assuming this ID does not exist + child_job_run_id = test_workflow_child_job_run.id + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Parent job run ID {parent_job_run_id} does not exist." in str(exc_info.value) + job_dependencies = session.query(JobDependency).all() + assert not job_dependencies + + def test_create_job_dependency_missing_child_raises_error(self, session, job_factory, test_workflow_parent_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = 9999 # Assuming this ID does not exist + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Child job run ID {child_job_run_id} does not exist." in str(exc_info.value) + job_dependencies = session.query(JobDependency).all() + assert not job_dependencies diff --git a/tests/lib/workflow/test_pipeline_factory.py b/tests/lib/workflow/test_pipeline_factory.py new file mode 100644 index 000000000..b944e4695 --- /dev/null +++ b/tests/lib/workflow/test_pipeline_factory.py @@ -0,0 +1,242 @@ +# ruff: noqa: E402 +import pytest + +pytest.importorskip("fastapi") + +from sqlalchemy import select + +from mavedb.lib.workflow.pipeline_factory import PipelineFactory +from mavedb.models.job_run import JobRun + + +@pytest.mark.unit +class TestPipelineFactoryUnit: + """Unit tests for the PipelineFactory class.""" + + def test_create_pipeline_raises_if_pipeline_not_found(self, session, test_user): + """Test that creating a pipeline with an unknown name raises a KeyError.""" + pipeline_factory = PipelineFactory(session=session) + + with pytest.raises(KeyError) as exc_info: + pipeline_factory.create_pipeline( + pipeline_name="unknown_pipeline", + creating_user=test_user, + pipeline_params={}, + ) + + assert "unknown_pipeline" in str(exc_info.value) + + def test_create_pipeline_prioritizes_correlation_id_from_params( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that the correlation_id from pipeline_params is used when creating a pipeline.""" + pipeline_name = sample_independent_pipeline_definition["name"] + test_correlation_id = "test-correlation-id-123" + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"correlation_id": test_correlation_id, "required_param": "some_value"}, + ) + + assert job_run.correlation_id == test_correlation_id + + def test_create_pipeline_creates_start_pipeline_job( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in a JobRun of type 'start_pipeline'.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + + start_pipeline_jobs = [jr for jr in job_runs if jr.job_function == "start_pipeline"] + assert len(start_pipeline_jobs) == 1 + assert start_pipeline_jobs[0].id == job_run.id + + def test_create_pipeline_creates_job_runs( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in the correct number of JobRun instances.""" + pipeline_name = sample_independent_pipeline_definition["name"] + expected_job_count = len(sample_independent_pipeline_definition["job_definitions"]) + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + + # One additional job run for the start_pipeline job + assert len(job_runs) == expected_job_count + 1 + + def test_create_pipeline_creates_job_dependencies( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_dependent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline with job dependencies results in correct JobDependency records.""" + pipeline_name = sample_dependent_pipeline_definition["name"] + jobs = sample_dependent_pipeline_definition["job_definitions"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"paramA": "valueA", "paramB": "valueB", "required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + job_run_dict = {jr.job_function: jr for jr in job_runs} + + # Verify dependencies + for job_def in jobs: + job_deps = job_def["dependencies"] + job_run = job_run_dict[job_def["function"]] + + # For each dependency, check that a JobDependency record exists + # and verify its properties + for dep_key, dependency_type in job_deps: + dep_job_run = job_run_dict[[jd for jd in jobs if jd["key"] == dep_key][0]["function"]] + + assert len(job_run.job_dependencies) == 1 + for jd in job_run.job_dependencies: + assert jd.depends_on_job_id == dep_job_run.id + assert jd.dependency_type == dependency_type + + def test_create_pipeline_creates_pipeline( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in a Pipeline record in the database.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(pipeline.__class__).where(pipeline.__class__.id == pipeline.id) + retrieved_pipeline = session.execute(stmt).scalars().first() + + assert retrieved_pipeline is not None + assert retrieved_pipeline.id == pipeline.id + + +@pytest.mark.integration +class TestPipelineFactoryIntegration: + """Integration tests for the PipelineFactory class.""" + + def test_create_pipeline_independent( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Integration test for creating an independent pipeline.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + assert pipeline.name == pipeline_name + assert job_run.job_function == "start_pipeline" + + for job_def in sample_independent_pipeline_definition["job_definitions"]: + stmt = select(JobRun).where( + JobRun.pipeline_id == pipeline.id, + JobRun.job_function == job_def["function"], + ) + job_run = session.execute(stmt).scalars().first() + assert job_run is not None + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + assert job_run.pipeline_id == pipeline.id + assert job_run.job_dependencies == [] + + def test_create_pipeline_dependent( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_dependent_pipeline_definition, + test_user, + ): + """Integration test for creating a dependent pipeline.""" + pipeline_name = sample_dependent_pipeline_definition["name"] + + passed_params = {"paramA": "valueA", "paramB": "valueB", "required_param": "some_value"} + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params=passed_params, + ) + + assert pipeline.name == pipeline_name + assert job_run.job_function == "start_pipeline" + + job_runs = {} + for job_def in sample_dependent_pipeline_definition["job_definitions"]: + stmt = select(JobRun).where( + JobRun.pipeline_id == pipeline.id, + JobRun.job_function == job_def["function"], + ) + jr = session.execute(stmt).scalars().first() + assert jr is not None + assert jr.pipeline_id == pipeline.id + for param_key, param_value in job_def["params"].items(): + if param_value is not None: + assert jr.job_params[param_key] == param_value + else: + assert jr.job_params[param_key] == passed_params[param_key] + + job_runs[job_def["key"]] = jr + + # Verify dependencies + for job_def in sample_dependent_pipeline_definition["job_definitions"]: + job_deps = job_def["dependencies"] + job_run = job_runs[job_def["key"]] + for dep_key, dependency_type in job_deps: + dep_job_run = job_runs[dep_key] + + assert len(job_run.job_dependencies) == 1 + for jd in job_run.job_dependencies: + assert jd.depends_on_job_id == dep_job_run.id + assert jd.dependency_type == dependency_type diff --git a/tests/routers/conftest.py b/tests/routers/conftest.py index d54b18d82..ba34c5489 100644 --- a/tests/routers/conftest.py +++ b/tests/routers/conftest.py @@ -4,32 +4,36 @@ import pytest from mavedb.models.clinical_control import ClinicalControl -from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.contributor import Contributor +from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.enums.user_role import UserRole -from mavedb.models.publication_identifier import PublicationIdentifier from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.license import License +from mavedb.models.publication_identifier import PublicationIdentifier from mavedb.models.role import Role from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User - from tests.helpers.constants import ( ADMIN_USER, - TEST_CLINVAR_CONTROL, - TEST_GENERIC_CLINICAL_CONTROL, - EXTRA_USER, EXTRA_CONTRIBUTOR, + EXTRA_LICENSE, + EXTRA_USER, + TEST_CLINVAR_CONTROL, TEST_DB_KEYWORDS, - TEST_LICENSE, + TEST_GENERIC_CLINICAL_CONTROL, + TEST_GNOMAD_VARIANT, TEST_INACTIVE_LICENSE, - EXTRA_LICENSE, + TEST_LICENSE, + TEST_PUBMED_PUBLICATION, TEST_SAVED_TAXONOMY, TEST_USER, - TEST_PUBMED_PUBLICATION, - TEST_GNOMAD_VARIANT, ) +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass + @pytest.fixture def setup_router_db(session): diff --git a/tests/routers/conftest_optional.py b/tests/routers/conftest_optional.py new file mode 100644 index 000000000..efbd119bd --- /dev/null +++ b/tests/routers/conftest_optional.py @@ -0,0 +1,14 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.routers.score_sets.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 diff --git a/tests/routers/test_job_runs.py b/tests/routers/test_job_runs.py new file mode 100644 index 000000000..c2c430f6c --- /dev/null +++ b/tests/routers/test_job_runs.py @@ -0,0 +1,172 @@ +# ruff: noqa: E402 + +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from tests.helpers.dependency_overrider import DependencyOverrider + + +def _make_pipeline(session, **overrides) -> Pipeline: + defaults = { + "name": "test_pipeline", + "status": PipelineStatus.RUNNING, + "correlation_id": "corr-1", + } + defaults.update(overrides) + pipeline = Pipeline(**defaults) + session.add(pipeline) + session.commit() + session.refresh(pipeline) + return pipeline + + +def _make_job_run(session, pipeline_id=None, **overrides) -> JobRun: + defaults = { + "job_type": "variant_mapping", + "job_function": "map_variants_for_score_set", + "status": JobStatus.PENDING, + "pipeline_id": pipeline_id, + "correlation_id": "corr-1", + "max_retries": 3, + "retry_count": 0, + } + defaults.update(overrides) + job_run = JobRun(**defaults) + session.add(job_run) + session.commit() + session.refresh(job_run) + return job_run + + +#################################################################################################### +# /api/v1/job-runs +#################################################################################################### + + +def test_cannot_list_job_runs_as_anonymous_user(client, setup_router_db, anonymous_app_overrides): + with DependencyOverrider(anonymous_app_overrides): + response = client.get("/api/v1/job-runs/") + + assert response.status_code == 401 + + +def test_cannot_list_job_runs_as_normal_user(client, setup_router_db): + response = client.get("/api/v1/job-runs/") + assert response.status_code == 403 + + +def test_can_list_job_runs_as_admin(client, session, setup_router_db, admin_app_overrides): + _make_job_run(session, status=JobStatus.PENDING) + _make_job_run(session, status=JobStatus.SUCCEEDED) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 2 + + +def test_list_job_runs_filters_by_status(client, session, setup_router_db, admin_app_overrides): + _make_job_run(session, status=JobStatus.FAILED, error_message="boom") + _make_job_run(session, status=JobStatus.SUCCEEDED) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/?status=failed") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["status"] == "failed" + assert body[0]["errorMessage"] == "boom" + + +def test_list_job_runs_filters_by_job_type(client, session, setup_router_db, admin_app_overrides): + _make_job_run(session, job_type="variant_mapping", job_function="map_variants_for_score_set") + _make_job_run(session, job_type="variant_creation", job_function="create_variants_for_score_set") + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/?job_type=variant_creation") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["jobType"] == "variant_creation" + + +def test_list_job_runs_filters_by_pipeline_id(client, session, setup_router_db, admin_app_overrides): + pipeline = _make_pipeline(session) + _make_job_run(session, pipeline_id=pipeline.id) + _make_job_run(session, pipeline_id=None) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/job-runs/?pipeline_id={pipeline.id}") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["pipelineId"] == pipeline.id + + +def test_list_job_runs_respects_limit(client, session, setup_router_db, admin_app_overrides): + for _ in range(4): + _make_job_run(session) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/?limit=2") + + assert response.status_code == 200 + assert len(response.json()) == 2 + + +def test_cannot_show_job_run_as_normal_user(client, session, setup_router_db): + job_run = _make_job_run(session) + response = client.get(f"/api/v1/job-runs/{job_run.urn}") + assert response.status_code == 403 + + +def test_show_job_run_returns_404_for_unknown_urn(client, setup_router_db, admin_app_overrides): + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/urn:mavedb-job:does-not-exist") + + assert response.status_code == 404 + + +def test_show_job_run_returns_detail_with_traceback(client, session, setup_router_db, admin_app_overrides): + job_run = _make_job_run( + session, + status=JobStatus.FAILED, + error_message="kaboom", + error_traceback="Traceback (most recent call last):\n File 'x.py'", + failure_category="system_error", + ) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/job-runs/{job_run.urn}") + + assert response.status_code == 200 + body = response.json() + assert body["urn"] == job_run.urn + assert body["status"] == "failed" + assert body["errorMessage"] == "kaboom" + # The detail response is the only place a full traceback is returned to operators. + assert body["errorTraceback"].startswith("Traceback") + assert body["failureCategory"] == "system_error" + + +def test_show_job_run_renders_metadata_key(client, session, setup_router_db, admin_app_overrides): + job_run = _make_job_run(session, metadata_={"k": "v"}) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/job-runs/{job_run.urn}") + + assert response.status_code == 200 + body = response.json() + # `metadata_` on the ORM model surfaces as JSON key `metadata`. + assert body["metadata"] == {"k": "v"} diff --git a/tests/routers/test_pipelines.py b/tests/routers/test_pipelines.py new file mode 100644 index 000000000..496569e58 --- /dev/null +++ b/tests/routers/test_pipelines.py @@ -0,0 +1,193 @@ +# ruff: noqa: E402 + +from datetime import datetime, timezone + +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from tests.helpers.dependency_overrider import DependencyOverrider + + +def _make_pipeline(session, **overrides) -> Pipeline: + defaults = { + "name": "test_pipeline", + "description": "test pipeline description", + "status": PipelineStatus.RUNNING, + "correlation_id": "corr-1", + } + defaults.update(overrides) + pipeline = Pipeline(**defaults) + session.add(pipeline) + session.commit() + session.refresh(pipeline) + return pipeline + + +def _make_job_run(session, pipeline_id=None, **overrides) -> JobRun: + defaults = { + "job_type": "variant_mapping", + "job_function": "map_variants_for_score_set", + "status": JobStatus.PENDING, + "pipeline_id": pipeline_id, + "correlation_id": "corr-1", + "max_retries": 3, + "retry_count": 0, + } + defaults.update(overrides) + job_run = JobRun(**defaults) + session.add(job_run) + session.commit() + session.refresh(job_run) + return job_run + + +#################################################################################################### +# /api/v1/pipelines +#################################################################################################### + + +def test_cannot_list_pipelines_as_anonymous_user(client, setup_router_db, anonymous_app_overrides): + with DependencyOverrider(anonymous_app_overrides): + response = client.get("/api/v1/pipelines/") + + assert response.status_code == 401 + + +def test_cannot_list_pipelines_as_normal_user(client, setup_router_db): + response = client.get("/api/v1/pipelines/") + assert response.status_code == 403 + + +def test_can_list_pipelines_as_admin(client, session, setup_router_db, admin_app_overrides): + _make_pipeline(session, name="p1", status=PipelineStatus.RUNNING) + _make_pipeline(session, name="p2", status=PipelineStatus.SUCCEEDED) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 2 + names = {row["name"] for row in body} + assert names == {"p1", "p2"} + + +def test_list_pipelines_filters_by_status(client, session, setup_router_db, admin_app_overrides): + _make_pipeline(session, name="p_running", status=PipelineStatus.RUNNING) + _make_pipeline(session, name="p_done", status=PipelineStatus.SUCCEEDED) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/?status=running") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["name"] == "p_running" + + +def test_list_pipelines_filters_by_correlation_id(client, session, setup_router_db, admin_app_overrides): + _make_pipeline(session, name="p_a", correlation_id="corr-a") + _make_pipeline(session, name="p_b", correlation_id="corr-b") + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/?correlation_id=corr-a") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["name"] == "p_a" + + +def test_list_pipelines_respects_limit(client, session, setup_router_db, admin_app_overrides): + for i in range(5): + _make_pipeline(session, name=f"p{i}") + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/?limit=3") + + assert response.status_code == 200 + assert len(response.json()) == 3 + + +def test_cannot_show_pipeline_as_normal_user(client, session, setup_router_db): + pipeline = _make_pipeline(session) + response = client.get(f"/api/v1/pipelines/{pipeline.urn}") + assert response.status_code == 403 + + +def test_show_pipeline_returns_404_for_unknown_urn(client, setup_router_db, admin_app_overrides): + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/urn:mavedb-pipeline:does-not-exist") + + assert response.status_code == 404 + + +def test_show_pipeline_returns_progress(client, session, setup_router_db, admin_app_overrides): + pipeline = _make_pipeline(session) + _make_job_run(session, pipeline_id=pipeline.id, status=JobStatus.SUCCEEDED) + _make_job_run(session, pipeline_id=pipeline.id, status=JobStatus.FAILED) + _make_job_run(session, pipeline_id=pipeline.id, status=JobStatus.PENDING) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/pipelines/{pipeline.urn}") + + assert response.status_code == 200 + body = response.json() + assert body["urn"] == pipeline.urn + assert body["name"] == pipeline.name + # Progress aggregation is delegated to PipelineManager.get_pipeline_progress(). + progress = body["progress"] + assert progress["totalJobs"] == 3 + assert progress["successfulJobs"] == 1 + assert progress["failedJobs"] == 1 + assert progress["pendingJobs"] == 1 + # completion = succeeded + failed + skipped + cancelled = 2 / 3 + assert progress["completedJobs"] == 2 + assert 66.0 < progress["completionPercentage"] < 67.0 + + +def test_show_pipeline_renders_metadata_key(client, session, setup_router_db, admin_app_overrides): + pipeline = _make_pipeline(session, metadata_={"foo": "bar"}) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/pipelines/{pipeline.urn}") + + assert response.status_code == 200 + body = response.json() + # `metadata_` on the ORM model surfaces as JSON key `metadata`. + assert body["metadata"] == {"foo": "bar"} + + +def test_show_pipeline_with_no_jobs_reports_empty_progress(client, session, setup_router_db, admin_app_overrides): + pipeline = _make_pipeline(session) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/pipelines/{pipeline.urn}") + + assert response.status_code == 200 + progress = response.json()["progress"] + assert progress["totalJobs"] == 0 + assert progress["completionPercentage"] == 100.0 + + +def test_list_pipelines_orders_by_created_desc(client, session, setup_router_db, admin_app_overrides): + older = _make_pipeline(session, name="older") + # Force created_at ordering deterministically. + older.created_at = datetime(2024, 1, 1, tzinfo=timezone.utc) + session.commit() + newer = _make_pipeline(session, name="newer") + newer.created_at = datetime(2025, 1, 1, tzinfo=timezone.utc) + session.commit() + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/") + + assert response.status_code == 200 + names = [row["name"] for row in response.json()] + assert names == ["newer", "older"] diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index c1476a65b..4b85fd8b5 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -489,7 +489,7 @@ def test_can_patch_score_set_data_before_publication( indirect=["mock_publication_fetch"], ) def test_can_patch_score_set_data_with_files_before_publication( - client, setup_router_db, form_field, filename, mime_type, data_files, mock_publication_fetch + client, setup_router_db, form_field, filename, mime_type, data_files, mock_publication_fetch, mock_s3_client ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -501,7 +501,10 @@ def test_can_patch_score_set_data_with_files_before_publication( if form_field == "counts_file" or form_field == "scores_file": data_file_path = data_files / filename files = {form_field: (filename, open(data_file_path, "rb"), mime_type)} - with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + with ( + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): response = client.patch(f"/api/v1/score-sets-with-variants/{score_set['urn']}", files=files) worker_queue.assert_called_once() assert response.status_code == 200 @@ -875,7 +878,9 @@ def test_show_score_sets_anonymous_can_fetch_public_score_sets( assert response_data[0]["urn"] == published_score_set["urn"] -def test_show_score_sets_anonymous_cannot_fetch_private_score_sets(session, client, setup_router_db, anonymous_app_overrides): +def test_show_score_sets_anonymous_cannot_fetch_private_score_sets( + session, client, setup_router_db, anonymous_app_overrides +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) # Score set is private (not published); change ownership so it belongs to another user @@ -927,7 +932,9 @@ def test_show_score_sets_mixed_public_and_private_returns_404( ): experiment = create_experiment(client) public_score_set = create_seq_score_set(client, experiment["urn"]) - public_score_set = mock_worker_variant_insertion(client, session, data_provider, public_score_set, data_files / "scores.csv") + public_score_set = mock_worker_variant_insertion( + client, session, data_provider, public_score_set, data_files / "scores.csv" + ) private_score_set = create_seq_score_set(client, experiment["urn"]) with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): published_score_set = publish_score_set(client, public_score_set["urn"]) @@ -1057,13 +1064,14 @@ def test_creating_user_can_view_all_score_calibrations_in_score_set(client, setu ######################################################################################################################## -def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, data_files): +def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, data_files, mock_s3_client): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1081,7 +1089,9 @@ def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, da assert score_set == response_data -def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setup_router_db, data_files): +def test_add_score_set_variants_scores_and_counts_endpoint( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" @@ -1090,6 +1100,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1111,7 +1122,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( - session, client, setup_router_db, data_files + session, client, setup_router_db, data_files, mock_s3_client ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -1125,6 +1136,7 @@ def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( open(score_columns_metadata_path, "rb") as score_columns_metadata_file, open(count_columns_metadata_path, "rb") as count_columns_metadata_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): score_columns_metadata = json.load(score_columns_metadata_file) count_columns_metadata = json.load(count_columns_metadata_file) @@ -1151,13 +1163,14 @@ def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( assert score_set == response_data -def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_router_db, data_files): +def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_router_db, data_files, mock_s3_client): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores_utf8_encoded.csv" with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1175,7 +1188,9 @@ def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_ assert score_set == response_data -def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, client, setup_router_db, data_files): +def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores_utf8_encoded.csv" @@ -1184,6 +1199,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1259,7 +1275,9 @@ def test_anonymous_cannot_add_scores_to_other_user_score_set( assert "Could not validate credentials" in response_data["detail"] -def test_contributor_can_add_scores_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_contributor_can_add_scores_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) change_ownership(session, score_set["urn"], ScoreSetDbModel) @@ -1276,6 +1294,7 @@ def test_contributor_can_add_scores_to_other_user_score_set(session, client, set with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1313,7 +1332,9 @@ def test_contributor_can_add_scores_to_other_user_score_set(session, client, set assert score_set == response_data -def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_contributor_can_add_scores_and_counts_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) change_ownership(session, score_set["urn"], ScoreSetDbModel) @@ -1332,6 +1353,7 @@ def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1373,7 +1395,7 @@ def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, def test_admin_can_add_scores_to_other_user_score_set( - session, client, setup_router_db, data_files, admin_app_overrides + session, client, setup_router_db, data_files, mock_s3_client, admin_app_overrides ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -1383,6 +1405,7 @@ def test_admin_can_add_scores_to_other_user_score_set( open(scores_csv_path, "rb") as scores_file, DependencyOverrider(admin_app_overrides), patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1400,7 +1423,9 @@ def test_admin_can_add_scores_to_other_user_score_set( assert score_set == response_data -def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_admin_can_add_scores_and_counts_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" @@ -1409,6 +1434,7 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1429,6 +1455,115 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client assert score_set == response_data +######################################################################################################################## +# Score set variant upload error handling +######################################################################################################################## + + +def test_upload_score_set_variant_data_returns_500_and_resets_processing_state_when_enqueue_job_fails( + session, client, setup_router_db, data_files, mock_s3_client +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + scores_csv_path = data_files / "scores.csv" + + with ( + open(scores_csv_path, "rb") as scores_file, + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception("queue failure")), + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): + response = client.post( + f"/api/v1/score-sets/{score_set['urn']}/variants/data", + files={"scores_file": (scores_csv_path.name, scores_file, "text/csv")}, + ) + + assert response.status_code == 500 + + db_score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"])).one() + session.refresh(db_score_set) + assert db_score_set.processing_state == ProcessingState.failed + + +def test_upload_score_set_variant_data_deletes_s3_files_when_enqueue_job_fails( + client, setup_router_db, data_files, mock_s3_client +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + scores_csv_path = data_files / "scores.csv" + counts_csv_path = data_files / "counts.csv" + + with ( + open(scores_csv_path, "rb") as scores_file, + open(counts_csv_path, "rb") as counts_file, + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception("queue failure")), + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): + response = client.post( + f"/api/v1/score-sets/{score_set['urn']}/variants/data", + files={ + "scores_file": (scores_csv_path.name, scores_file, "text/csv"), + "counts_file": (counts_csv_path.name, counts_file, "text/csv"), + }, + ) + + assert response.status_code == 500 + # Both uploaded S3 keys should be passed to delete_objects for cleanup. + mock_s3_client.delete_objects.assert_called_once() + delete_call_kwargs = mock_s3_client.delete_objects.call_args.kwargs + deleted_keys = {obj["Key"] for obj in delete_call_kwargs["Delete"]["Objects"]} + assert len(deleted_keys) == 2 + assert all("scores.csv" in k or "counts.csv" in k for k in deleted_keys) + + +def test_upload_score_set_variant_data_deletes_s3_files_when_pipeline_creation_fails( + client, setup_router_db, data_files, mock_s3_client +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + scores_csv_path = data_files / "scores.csv" + + with ( + open(scores_csv_path, "rb") as scores_file, + patch("mavedb.routers.score_sets.PipelineFactory.create_pipeline", side_effect=Exception("pipeline failure")), + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): + response = client.post( + f"/api/v1/score-sets/{score_set['urn']}/variants/data", + files={"scores_file": (scores_csv_path.name, scores_file, "text/csv")}, + ) + + assert response.status_code == 500 + mock_s3_client.delete_objects.assert_called_once() + delete_call_kwargs = mock_s3_client.delete_objects.call_args.kwargs + deleted_keys = {obj["Key"] for obj in delete_call_kwargs["Delete"]["Objects"]} + assert len(deleted_keys) == 1 + assert any("scores.csv" in k for k in deleted_keys) + + +def test_patch_score_set_with_variants_returns_500_and_resets_processing_state_when_enqueue_job_fails( + session, client, setup_router_db, data_files, mock_s3_client +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + scores_csv_path = data_files / "scores.csv" + + with ( + open(scores_csv_path, "rb") as scores_file, + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception("queue failure")), + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): + response = client.patch( + f"/api/v1/score-sets-with-variants/{score_set['urn']}", + files={"scores_file": (scores_csv_path.name, scores_file, "text/csv")}, + ) + + assert response.status_code == 500 + + db_score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"])).one() + session.refresh(db_score_set) + assert db_score_set.processing_state == ProcessingState.failed + + ######################################################################################################################## # Score set publication ######################################################################################################################## diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index 49dad88f9..eaf613683 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -1,34 +1,286 @@ +""" +Test configuration and fixtures for worker lib tests. +""" + +from datetime import datetime from pathlib import Path from shutil import copytree from unittest.mock import Mock +import pandas as pd import pytest +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.experiment import Experiment +from mavedb.models.experiment_set import ExperimentSet +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun from mavedb.models.license import License -from mavedb.models.taxonomy import Taxonomy +from mavedb.models.pipeline import Pipeline +from mavedb.models.score_set import ScoreSet +from mavedb.models.target_gene import TargetGene +from mavedb.models.target_sequence import TargetSequence from mavedb.models.user import User +from tests.helpers.constants import EXTRA_USER, TEST_LICENSE, TEST_USER + +# Attempt to import optional top level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from .conftest_optional import * # noqa: F401, F403 + +except ModuleNotFoundError: + pass + + +@pytest.fixture +def sample_job_run(sample_pipeline): + """Create a sample JobRun instance for testing.""" + return JobRun( + id=1, + urn="test:job:1", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=sample_pipeline.id, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_dependent_job_run(sample_pipeline): + """Create a sample dependent JobRun instance for testing.""" + return JobRun( + id=2, + urn="test:job:2", + job_type="dependent_job", + job_function="dependent_function", + status=JobStatus.PENDING, + pipeline_id=sample_pipeline.id, + progress_current=0, + progress_total=100, + progress_message="Waiting for dependency", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_independent_job_run(): + """Create a sample independent JobRun instance for testing.""" + return JobRun( + id=3, + urn="test:job:3", + job_type="independent_job", + job_function="independent_function", + status=JobStatus.PENDING, + pipeline_id=None, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_pipeline(): + """Create a sample Pipeline instance for testing.""" + return Pipeline( + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_empty_pipeline(): + """Create a sample Pipeline instance with no jobs for testing.""" + return Pipeline( + id=999, + urn="test:pipeline:999", + name="Empty Pipeline", + description="A pipeline with no jobs", + status=PipelineStatus.CREATED, + correlation_id="empty_correlation_456", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_job_dependency(sample_dependent_job_run, sample_job_run): + """Create a sample JobDependency instance for testing.""" + return JobDependency( + id=sample_dependent_job_run.id, # dependent job + depends_on_job_id=sample_job_run.id, # depends on job 1 + dependency_type=DependencyType.SUCCESS_REQUIRED, + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_user(): + """Create a sample User instance for testing.""" + return User(**TEST_USER) -from tests.helpers.constants import ( - EXTRA_USER, - TEST_LICENSE, - TEST_INACTIVE_LICENSE, - TEST_SAVED_TAXONOMY, - TEST_USER, - TEST_MAVEDB_ATHENA_ROW, -) + +@pytest.fixture +def sample_extra_user(): + """Create an extra sample User instance for testing.""" + return User(**EXTRA_USER) + + +@pytest.fixture +def sample_license(): + """Create a sample License instance for testing.""" + return License(**TEST_LICENSE) + + +@pytest.fixture +def sample_experiment_set(sample_user): + """Create a sample ExperimentSet instance for testing.""" + return ExperimentSet( + extra_metadata={}, + created_by=sample_user, + ) + + +@pytest.fixture +def sample_experiment(sample_experiment_set, sample_user): + """Create a sample Experiment instance for testing.""" + return Experiment( + title="Sample Experiment", + short_description="A sample experiment for testing purposes", + abstract_text="This is an abstract for the sample experiment.", + method_text="This is a method description for the sample experiment.", + extra_metadata={}, + experiment_set=sample_experiment_set, + created_by=sample_user, + ) + + +@pytest.fixture +def sample_score_set(sample_experiment, sample_user, sample_license): + """Create a sample ScoreSet instance for testing.""" + return ScoreSet( + title="Sample Score Set", + short_description="A sample score set for testing purposes", + abstract_text="This is an abstract for the sample score set.", + method_text="This is a method description for the sample score set.", + extra_metadata={}, + experiment=sample_experiment, + created_by=sample_user, + license=sample_license, + target_genes=[ + TargetGene( + name="Sample Gene", + category="protein_coding", + target_sequence=TargetSequence(label="testsequence", sequence_type="dna", sequence="ATGCAT"), + ) + ], + ) @pytest.fixture -def setup_worker_db(session): +def with_populated_domain_data( + session, + sample_user, + sample_extra_user, + sample_experiment_set, + sample_experiment, + sample_score_set, + sample_license, +): db = session - db.add(User(**TEST_USER)) - db.add(User(**EXTRA_USER)) - db.add(Taxonomy(**TEST_SAVED_TAXONOMY)) - db.add(License(**TEST_LICENSE)) - db.add(License(**TEST_INACTIVE_LICENSE)) + db.add(sample_user) + db.add(sample_extra_user) + db.add(sample_experiment_set) + db.add(sample_experiment) + db.add(sample_score_set) + db.add(sample_license) db.commit() +@pytest.fixture +def with_populated_job_data( + session, + sample_job_run, + sample_pipeline, + sample_empty_pipeline, + sample_job_dependency, + sample_dependent_job_run, + sample_independent_job_run, +): + """Set up the database with sample data for worker tests.""" + session.add(sample_pipeline) + session.add(sample_empty_pipeline) + session.add(sample_job_run) + session.add(sample_dependent_job_run) + session.add(sample_independent_job_run) + session.add(sample_job_dependency) + session.commit() + + +@pytest.fixture +def mock_pipeline(): + """Create a mock Pipeline instance. By default, + properties are identical to a default new Pipeline entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=Pipeline, + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + metadata_={}, + created_at=datetime.now(), + started_at=None, + finished_at=None, + created_by_user_id=None, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_run(mock_pipeline): + """Create a mock JobRun instance. By default, + properties are identical to a default new JobRun entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=JobRun, + id=123, + urn="test:job:123", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=mock_pipeline.id, + max_retries=3, + retry_count=0, + retry_delay_seconds=None, + scheduled_at=datetime.now(), + started_at=None, + finished_at=None, + created_at=datetime.now(), + error_message=None, + error_traceback=None, + failure_category=None, + progress_current=None, + progress_total=None, + progress_message=None, + correlation_id=None, + metadata_={}, + mavedb_version=None, + ) + + @pytest.fixture def data_files(tmp_path): copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") @@ -36,10 +288,10 @@ def data_files(tmp_path): @pytest.fixture -def mocked_gnomad_variant_row(): - gnomad_variant = Mock() +def sample_score_dataframe(data_files): + return pd.read_csv(data_files / "scores.csv") - for key, value in TEST_MAVEDB_ATHENA_ROW.items(): - setattr(gnomad_variant, key, value) - return gnomad_variant +@pytest.fixture +def sample_count_dataframe(data_files): + return pd.read_csv(data_files / "counts.csv") diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py new file mode 100644 index 000000000..0f1d2e95f --- /dev/null +++ b/tests/worker/conftest_optional.py @@ -0,0 +1,64 @@ +from concurrent.futures import ProcessPoolExecutor +from unittest.mock import Mock, patch + +import pytest +from arq import ArqRedis +from cdot.hgvs.dataproviders import RESTDataProvider +from sqlalchemy.orm import Session + +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + + +@pytest.fixture +def mock_job_manager(mock_job_run): + """Create a JobManager with mocked database and Redis dependencies.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to load the job from DB + manager = object.__new__(JobManager) + manager.db = mock_db + manager.redis = mock_redis + manager.job_id = mock_job_run.id + manager.context = {} + + with patch.object(manager, "get_job", return_value=mock_job_run): + yield manager + + +@pytest.fixture +def mock_pipeline_manager(mock_job_manager, mock_pipeline): + """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to validate the pipeline + manager = object.__new__(PipelineManager) + manager.db = mock_db + manager.redis = mock_redis + manager.pipeline_id = 123 + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.JobManager") as mock_job_manager_class, + patch.object(manager, "get_pipeline", return_value=mock_pipeline), + ): + mock_job_manager_class.return_value = mock_job_manager + yield manager + + +@pytest.fixture +def mock_worker_ctx(): + """Create a mock worker context dictionary for testing.""" + mock_redis = Mock(spec=ArqRedis) + mock_hdp = Mock(spec=RESTDataProvider) + mock_pool = Mock(spec=ProcessPoolExecutor) + + # Don't mock the session itself to allow real DB interactions in tests + # It's generally more pain than it's worth to mock out SQLAlchemy sessions, + # although it can sometimes be useful when raising specific exceptions. + return { + "redis": mock_redis, + "hdp": mock_hdp, + "pool": mock_pool, + } diff --git a/tests/worker/data/counts.csv b/tests/worker/data/counts.csv index 0cc1e742a..4821232a3 100644 --- a/tests/worker/data/counts.csv +++ b/tests/worker/data/counts.csv @@ -1,4 +1,5 @@ -hgvs_nt,hgvs_pro,c_0,c_1 -c.1A>T,p.Thr1Ser,10,20 -c.2C>T,p.Thr1Met,8,8 -c.6T>A,p.Phe2Leu,90,2 +hgvs_nt,hgvs_splice,hgvs_pro,c_0,c_1 +c.1A>T,NA,p.Met1Leu,10,20 +c.2T>A,NA,p.Met1Lys,8,8 +c.3G>C,NA,p.Met1Ile,90,2 +c.4C>G,NA,p.His2Asp,12,1 diff --git a/tests/worker/data/scores.csv b/tests/worker/data/scores.csv index 11fce4988..bd8e3baed 100644 --- a/tests/worker/data/scores.csv +++ b/tests/worker/data/scores.csv @@ -1,4 +1,5 @@ -hgvs_nt,hgvs_pro,score,s_0,s_1 -c.1A>T,p.Thr1Ser,0.3,val1,val1 -c.2C>T,p.Thr1Met,0.0,val2,val2 -c.6T>A,p.Phe2Leu,-1.65,val3,val3 +hgvs_nt,hgvs_splice,hgvs_pro,score,s_0,s_1 +c.1A>T,NA,p.Met1Leu,0.3,val1,val1 +c.2T>A,NA,p.Met1Lys,0,val2,val2 +c.3G>C,NA,p.Met1Ile,-1.65,val3,val3 +c.4C>G,NA,p.His2Asp,NA,val5,val4 diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py new file mode 100644 index 000000000..109595d75 --- /dev/null +++ b/tests/worker/jobs/conftest.py @@ -0,0 +1,1249 @@ +import pytest + +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.pipeline import Pipeline +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from tests.helpers.constants import VALID_CAID + +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass + + +## param fixtures for job runs ## + + +@pytest.fixture +def create_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "scores_file_key": "sample_scores.csv", + "counts_file_key": "sample_counts.csv", + "correlation_id": "sample-correlation-id", + "updater_id": sample_user.id, + "score_set_id": sample_score_set.id, + "score_columns_metadata": {"s_0": {"description": "metadataS", "details": "detailsS"}}, + "count_columns_metadata": {"c_0": {"description": "metadataC", "details": "detailsC"}}, + } + + +@pytest.fixture +def map_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for map_variants_for_score_set job.""" + + return { + "score_set_id": sample_score_set.id, + "correlation_id": "sample-mapping-correlation-id", + "updater_id": sample_user.id, + } + + +@pytest.fixture +def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def submit_uniprot_mapping_jobs_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for submit_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def poll_uniprot_mapping_jobs_sample_params( + submit_uniprot_mapping_jobs_sample_params, + with_dependent_polling_job_for_submission_run, +): + """Provide sample parameters for poll_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": submit_uniprot_mapping_jobs_sample_params["correlation_id"], + "score_set_id": submit_uniprot_mapping_jobs_sample_params["score_set_id"], + "mapping_jobs": {}, + } + + +@pytest.fixture +def submit_score_set_mappings_to_car_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for submit_score_set_mappings_to_car job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def refresh_clinvar_controls_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for refresh_clinvar_controls job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +## Sample pipeline + + +@pytest.fixture +def sample_pipeline(): + """Create a sample Pipeline instance for testing.""" + + return Pipeline( + name="Sample Pipeline", + description="A sample pipeline for testing purposes", + ) + + +@pytest.fixture +def with_sample_pipeline(session, sample_pipeline): + """Fixture to ensure sample pipeline exists in the database.""" + session.add(sample_pipeline) + session.commit() + + +## Variant creation job fixtures + + +@pytest.fixture +def dummy_variant_creation_job_run(create_variants_sample_params): + """Create a dummy variant creation job run for testing.""" + + return JobRun( + urn="test:dummy_variant_creation_job", + job_type="dummy_variant_creation", + job_function="dummy_variant_creation_function", + max_retries=3, + retry_count=0, + job_params=create_variants_sample_params, + ) + + +@pytest.fixture +def dummy_variant_mapping_job_run(map_variants_sample_params): + """Create a dummy variant mapping job run for testing.""" + + return JobRun( + urn="test:dummy_variant_mapping_job", + job_type="dummy_variant_mapping", + job_function="dummy_variant_mapping_function", + max_retries=3, + retry_count=0, + job_params=map_variants_sample_params, + ) + + +@pytest.fixture +def with_dummy_setup_jobs( + session, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, +): + """Add dummy variant creation and mapping job runs to the session.""" + + session.add(dummy_variant_creation_job_run) + session.add(dummy_variant_mapping_job_run) + session.commit() + + +## Gnomad Linkage Job Fixtures ## + + +@pytest.fixture +def sample_link_gnomad_variants_pipeline(): + """Create a pipeline instance for link_gnomad_variants job.""" + + return Pipeline( + urn="test:link_gnomad_variants_pipeline", + name="Link gnomAD Variants Pipeline", + ) + + +@pytest.fixture +def sample_link_gnomad_variants_run(link_gnomad_variants_sample_params): + """Create a JobRun instance for link_gnomad_variants job.""" + + return JobRun( + urn="test:link_gnomad_variants", + job_type="link_gnomad_variants", + job_function="link_gnomad_variants", + max_retries=3, + retry_count=0, + job_params=link_gnomad_variants_sample_params, + ) + + +@pytest.fixture +def with_gnomad_linking_job(session, sample_link_gnomad_variants_run): + """Add a link_gnomad_variants job run to the session.""" + + session.add(sample_link_gnomad_variants_run) + session.commit() + + +@pytest.fixture +def with_gnomad_linking_pipeline(session, sample_link_gnomad_variants_pipeline): + """Add a link_gnomad_variants pipeline to the session.""" + + session.add(sample_link_gnomad_variants_pipeline) + session.commit() + + +@pytest.fixture +def sample_link_gnomad_variants_run_pipeline( + session, + with_gnomad_linking_job, + with_gnomad_linking_pipeline, + sample_link_gnomad_variants_run, + sample_link_gnomad_variants_pipeline, +): + """Provide a context with a link_gnomad_variants job run and pipeline.""" + + sample_link_gnomad_variants_run.pipeline_id = sample_link_gnomad_variants_pipeline.id + session.commit() + return sample_link_gnomad_variants_run + + +@pytest.fixture +def setup_sample_variants_with_caid( + session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run +): + """Setup variants and mapped variants in the database for testing.""" + score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) + + # Add a variant and mapped variant to the database with a CAID + variant = Variant( + urn="urn:variant:test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=VALID_CAID, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + return variant, mapped_variant + + +## Uniprot Job Fixtures ## + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:submit_uniprot_mapping_jobs_pipeline", + name="Submit UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for poll_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:poll_uniprot_mapping_jobs_pipeline", + name="Poll UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run(submit_uniprot_mapping_jobs_sample_params): + """Create a JobRun instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return JobRun( + urn="test:submit_uniprot_mapping_jobs", + job_type="submit_uniprot_mapping_jobs", + job_function="submit_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params=submit_uniprot_mapping_jobs_sample_params, + ) + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dummy_poll_uniprot_mapping_jobs", + job_type="dummy_poll_uniprot_mapping_jobs", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dependent_poll_uniprot_mapping_jobs", + job_type="dependent_poll_uniprot_mapping_jobs", + job_function="poll_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def with_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + session.add(sample_dummy_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_dummy_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_dependent_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_independent_polling_job_for_submission_run( + session, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + +@pytest.fixture +def with_submit_uniprot_mapping_job(session, sample_submit_uniprot_mapping_jobs_run): + """Add a submit_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_job(session, sample_poll_uniprot_mapping_jobs_run): + """Add a poll_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_poll_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run_in_pipeline( + session, + with_submit_uniprot_mapping_job, + with_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a submit_uniprot_mapping_jobs job run and pipeline.""" + + sample_submit_uniprot_mapping_jobs_run.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_submit_uniprot_mapping_jobs_run + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_run_in_pipeline( + session, + with_independent_polling_job_for_submission_run, + with_poll_uniprot_mapping_jobs_pipeline, + sample_polling_job_for_submission_run, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a poll_uniprot_mapping_jobs job run and pipeline.""" + + sample_polling_job_for_submission_run.pipeline_id = sample_poll_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_polling_job_for_submission_run + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run_in_pipeline( + session, + with_dummy_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_dummy_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_dummy_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run_in_pipeline( + session, + with_dependent_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def with_submit_uniprot_mapping_jobs_pipeline( + session, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Add a submit_uniprot_mapping_jobs pipeline to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_pipeline) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_jobs_pipeline( + session, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Add a poll_uniprot_mapping_jobs pipeline to the session.""" + session.add(sample_poll_uniprot_mapping_jobs_pipeline) + session.commit() + + +## Clingen Job Fixtures ## + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_pipeline(): + """Create a pipeline instance for submit_score_set_mappings_to_car job.""" + + return Pipeline( + urn="test:submit_score_set_mappings_to_car_pipeline", + name="Submit Score Set Mappings to ClinGen Allele Registry Pipeline", + ) + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_pipeline(): + """Create a pipeline instance for submit_score_set_mappings_to_ldh job.""" + + return Pipeline( + urn="test:submit_score_set_mappings_to_ldh_pipeline", + name="Submit Score Set Mappings to ClinGen Allele Registry Pipeline", + ) + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_job_run(submit_score_set_mappings_to_car_params): + """Create a JobRun instance for submit_score_set_mappings_to_car job.""" + + return JobRun( + urn="test:submit_score_set_mappings_to_car", + job_type="submit_score_set_mappings_to_car", + job_function="submit_score_set_mappings_to_car", + max_retries=3, + retry_count=0, + job_params=submit_score_set_mappings_to_car_params, + ) + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_job_run(submit_score_set_mappings_to_car_params): + """Create a JobRun instance for submit_score_set_mappings_to_car job.""" + + return JobRun( + urn="test:submit_score_set_mappings_to_car", + job_type="submit_score_set_mappings_to_car", + job_function="submit_score_set_mappings_to_car", + max_retries=3, + retry_count=0, + job_params=submit_score_set_mappings_to_car_params, + ) + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_job_run_in_pipeline( + session, + with_submit_score_set_mappings_to_car_pipeline, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_pipeline, + submit_score_set_mappings_to_car_sample_job_run, +): + """Provide a context with a submit_score_set_mappings_to_car job run and pipeline.""" + + submit_score_set_mappings_to_car_sample_job_run.pipeline_id = submit_score_set_mappings_to_car_sample_pipeline.id + session.commit() + return submit_score_set_mappings_to_car_sample_job_run + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline( + session, + with_submit_score_set_mappings_to_ldh_pipeline, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_pipeline, + submit_score_set_mappings_to_ldh_sample_job_run, +): + """Provide a context with a submit_score_set_mappings_to_ldh job run and pipeline.""" + + submit_score_set_mappings_to_ldh_sample_job_run.pipeline_id = submit_score_set_mappings_to_ldh_sample_pipeline.id + session.commit() + return submit_score_set_mappings_to_ldh_sample_job_run + + +@pytest.fixture +def with_submit_score_set_mappings_to_car_job(session, submit_score_set_mappings_to_car_sample_job_run): + """Add a submit_score_set_mappings_to_car job run to the session.""" + + session.add(submit_score_set_mappings_to_car_sample_job_run) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_ldh_job(session, submit_score_set_mappings_to_ldh_sample_job_run): + """Add a submit_score_set_mappings_to_ldh job run to the session.""" + + session.add(submit_score_set_mappings_to_ldh_sample_job_run) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_car_pipeline( + session, + submit_score_set_mappings_to_car_sample_pipeline, +): + """Add a submit_score_set_mappings_to_car pipeline to the session.""" + + session.add(submit_score_set_mappings_to_car_sample_pipeline) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_ldh_pipeline( + session, + submit_score_set_mappings_to_ldh_sample_pipeline, +): + """Add a submit_score_set_mappings_to_ldh pipeline to the session.""" + + session.add(submit_score_set_mappings_to_ldh_sample_pipeline) + session.commit() + + +@pytest.fixture +def sample_independent_variant_creation_run(create_variants_sample_params): + """Create a JobRun instance for variant creation job.""" + + return JobRun( + urn="test:create_variants_for_score_set", + job_type="create_variants_for_score_set", + job_function="create_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=create_variants_sample_params, + ) + + +@pytest.fixture +def sample_independent_variant_mapping_run(map_variants_sample_params): + """Create a JobRun instance for variant mapping job.""" + + return JobRun( + urn="test:map_variants_for_score_set", + job_type="map_variants_for_score_set", + job_function="map_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=map_variants_sample_params, + ) + + +@pytest.fixture +def dummy_pipeline_step(): + """Create a dummy pipeline step function for testing.""" + + return JobRun( + urn="test:dummy_pipeline_step", + job_type="dummy_pipeline_step", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + ) + + +@pytest.fixture +def sample_pipeline_variant_creation_run( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_independent_variant_creation_run, +): + """Create a JobRun instance for variant creation job.""" + + sample_independent_variant_creation_run.pipeline_id = sample_variant_creation_pipeline.id + session.add(sample_independent_variant_creation_run) + session.commit() + return sample_independent_variant_creation_run + + +@pytest.fixture +def sample_pipeline_variant_mapping_run( + session, + with_variant_mapping_pipeline, + sample_independent_variant_mapping_run, + sample_variant_mapping_pipeline, +): + """Create a JobRun instance for variant mapping job.""" + + sample_independent_variant_mapping_run.pipeline_id = sample_variant_mapping_pipeline.id + session.add(sample_independent_variant_mapping_run) + session.commit() + return sample_independent_variant_mapping_run + + +@pytest.fixture +def sample_variant_creation_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_creation_pipeline", + description="Pipeline for creating variants", + ) + + +@pytest.fixture +def sample_variant_mapping_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_mapping_pipeline", + description="Pipeline for mapping variants", + ) + + +@pytest.fixture +def with_independent_processing_runs( + session, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, +): + """Fixture to ensure independent variant processing runs exist in the database.""" + + session.add(sample_independent_variant_creation_run) + session.add(sample_independent_variant_mapping_run) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline(session, sample_variant_creation_pipeline): + """Fixture to ensure variant creation pipeline and its runs exist in the database.""" + session.add(sample_variant_creation_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline_runs( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_creation_run) + dummy_pipeline_step.pipeline_id = sample_variant_creation_pipeline.id + session.add(dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline(session, sample_variant_mapping_pipeline): + """Fixture to ensure variant mapping pipeline and its runs exist in the database.""" + session.add(sample_variant_mapping_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline_runs( + session, + with_variant_mapping_pipeline, + sample_variant_mapping_pipeline, + sample_pipeline_variant_mapping_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_mapping_run) + dummy_pipeline_step.pipeline_id = sample_variant_mapping_pipeline.id + session.add(dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline(): + """Create a sample Pipeline instance for testing.""" + + return Pipeline( + name="Dummy Pipeline", + description="A dummy pipeline for testing purposes", + ) + + +@pytest.fixture +def with_dummy_pipeline(session, sample_dummy_pipeline): + """Fixture to ensure dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline): + """Create a sample JobRun instance for starting the dummy pipeline.""" + start_job_run = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="start_pipeline", + job_function="start_pipeline", + ) + session.add(start_job_run) + session.commit() + + return start_job_run + + +@pytest.fixture +def with_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline_start): + """Fixture to ensure a start pipeline job run for the dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline_start) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_step(session, sample_dummy_pipeline): + """Create a sample PipelineStep instance for the dummy pipeline.""" + step = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="dummy_step", + job_function="dummy_arq_function", + ) + session.add(step) + session.commit() + return step + + +@pytest.fixture +def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pipeline, sample_dummy_pipeline_step): + """Fixture to ensure dummy pipeline steps exist in the database.""" + session.add(sample_dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_job_run(refresh_clinvar_controls_sample_params): + """Create a JobRun instance for refresh_clinvar_controls job.""" + + return JobRun( + urn="test:refresh_clinvar_controls", + job_type="refresh_clinvar_controls", + job_function="refresh_clinvar_controls", + max_retries=3, + retry_count=0, + job_params=refresh_clinvar_controls_sample_params, + ) + + +@pytest.fixture +def with_refresh_clinvar_controls_job(session, sample_refresh_clinvar_controls_job_run): + """Add a refresh_clinvar_controls job run to the session.""" + + session.add(sample_refresh_clinvar_controls_job_run) + session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_pipeline(): + """Create a pipeline instance for refresh_clinvar_controls job.""" + + return Pipeline( + urn="test:refresh_clinvar_controls_pipeline", + name="Refresh ClinVar Controls Pipeline", + ) + + +@pytest.fixture +def with_refresh_clinvar_controls_pipeline( + session, + sample_refresh_clinvar_controls_pipeline, +): + """Add a refresh_clinvar_controls pipeline to the session.""" + + session.add(sample_refresh_clinvar_controls_pipeline) + session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_job_in_pipeline( + session, + with_refresh_clinvar_controls_job, + with_refresh_clinvar_controls_pipeline, + sample_refresh_clinvar_controls_job_run, + sample_refresh_clinvar_controls_pipeline, +): + """Provide a context with a refresh_clinvar_controls job run and pipeline.""" + + sample_refresh_clinvar_controls_job_run.pipeline_id = sample_refresh_clinvar_controls_pipeline.id + session.commit() + return sample_refresh_clinvar_controls_job_run + + +## Janitor job fixtures + + +@pytest.fixture +def sample_cleanup_job_run(): + """Create a JobRun instance for a cleanup job.""" + + return JobRun( + urn="test:cleanup_job", + job_type="cleanup_job", + job_function="cleanup_function", + max_retries=3, + retry_count=0, + ) + + +@pytest.fixture +def with_cleanup_job(session, sample_cleanup_job_run): + """Add a cleanup job run to the session.""" + + session.add(sample_cleanup_job_run) + session.commit() + + +## HGVS Population Job Fixtures ## + + +@pytest.fixture +def populate_hgvs_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for populate_hgvs_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_populate_hgvs_pipeline(): + """Create a pipeline instance for populate_hgvs_for_score_set job.""" + + return Pipeline( + urn="test:populate_hgvs_pipeline", + name="Populate HGVS Pipeline", + ) + + +@pytest.fixture +def sample_populate_hgvs_run(populate_hgvs_sample_params): + """Create a JobRun instance for populate_hgvs_for_score_set job.""" + + return JobRun( + urn="test:populate_hgvs_for_score_set", + job_type="populate_hgvs_for_score_set", + job_function="populate_hgvs_for_score_set", + max_retries=3, + retry_count=0, + job_params=populate_hgvs_sample_params, + ) + + +@pytest.fixture +def with_populate_hgvs_job(session, sample_populate_hgvs_run): + """Add a populate_hgvs_for_score_set job run to the session.""" + + session.add(sample_populate_hgvs_run) + session.commit() + + +@pytest.fixture +def with_populate_hgvs_pipeline(session, sample_populate_hgvs_pipeline): + """Add a populate_hgvs pipeline to the session.""" + + session.add(sample_populate_hgvs_pipeline) + session.commit() + + +@pytest.fixture +def sample_populate_hgvs_run_pipeline( + session, + with_populate_hgvs_job, + with_populate_hgvs_pipeline, + sample_populate_hgvs_run, + sample_populate_hgvs_pipeline, +): + """Provide a context with a populate_hgvs job run and pipeline.""" + + sample_populate_hgvs_run.pipeline_id = sample_populate_hgvs_pipeline.id + session.commit() + return sample_populate_hgvs_run + + +@pytest.fixture +def setup_sample_variants_with_caid_for_hgvs( + session, with_populated_domain_data, mock_worker_ctx, sample_populate_hgvs_run +): + """Setup variants and mapped variants in the database for HGVS population testing.""" + score_set = session.get(ScoreSet, sample_populate_hgvs_run.job_params["score_set_id"]) + + variant = Variant( + urn="urn:variant:test-variant-with-caid-hgvs", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=VALID_CAID, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + return variant, mapped_variant + + +# --- Variant Translation Fixtures --- + + +@pytest.fixture +def populate_variant_translations_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for populate_variant_translations_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_populate_variant_translations_pipeline(): + """Create a pipeline instance for populate_variant_translations_for_score_set job.""" + + return Pipeline( + urn="test:populate_variant_translations_pipeline", + name="Populate Variant Translations Pipeline", + ) + + +@pytest.fixture +def sample_populate_variant_translations_run(populate_variant_translations_sample_params): + """Create a JobRun instance for populate_variant_translations_for_score_set job.""" + + return JobRun( + urn="test:populate_variant_translations_for_score_set", + job_type="populate_variant_translations_for_score_set", + job_function="populate_variant_translations_for_score_set", + max_retries=3, + retry_count=0, + job_params=populate_variant_translations_sample_params, + ) + + +@pytest.fixture +def with_populate_variant_translations_job(session, sample_populate_variant_translations_run): + """Add a populate_variant_translations_for_score_set job run to the session.""" + + session.add(sample_populate_variant_translations_run) + session.commit() + + +@pytest.fixture +def with_populate_variant_translations_pipeline(session, sample_populate_variant_translations_pipeline): + """Add a populate_variant_translations pipeline to the session.""" + + session.add(sample_populate_variant_translations_pipeline) + session.commit() + + +@pytest.fixture +def sample_populate_variant_translations_run_pipeline( + session, + with_populate_variant_translations_job, + with_populate_variant_translations_pipeline, + sample_populate_variant_translations_run, + sample_populate_variant_translations_pipeline, +): + """Provide a context with a populate_variant_translations job run and pipeline.""" + + sample_populate_variant_translations_run.pipeline_id = sample_populate_variant_translations_pipeline.id + session.commit() + return sample_populate_variant_translations_run + + +@pytest.fixture +def setup_sample_variants_with_caid_for_translation( + session, with_populated_domain_data, mock_worker_ctx, sample_populate_variant_translations_run +): + """Setup variants and mapped variants in the database for variant translation testing.""" + score_set = session.get(ScoreSet, sample_populate_variant_translations_run.job_params["score_set_id"]) + + variant = Variant( + urn="urn:variant:test-variant-with-caid-translation", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=VALID_CAID, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + return variant, mapped_variant + + +## ClinGen Cache Warming Job Fixtures ## + + +@pytest.fixture +def warm_clingen_cache_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for warm_clingen_cache job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_warm_clingen_cache_job_run(warm_clingen_cache_sample_params): + """Create a JobRun instance for warm_clingen_cache job.""" + + return JobRun( + urn="test:warm_clingen_cache", + job_type="warm_clingen_cache", + job_function="warm_clingen_cache", + max_retries=3, + retry_count=0, + job_params=warm_clingen_cache_sample_params, + ) + + +@pytest.fixture +def with_warm_clingen_cache_job(session, sample_warm_clingen_cache_job_run): + """Add a warm_clingen_cache job run to the session.""" + + session.add(sample_warm_clingen_cache_job_run) + session.commit() + + +@pytest.fixture +def sample_warm_clingen_cache_pipeline(): + """Create a pipeline instance for warm_clingen_cache job.""" + + return Pipeline( + urn="test:warm_clingen_cache_pipeline", + name="Warm ClinGen Cache Pipeline", + ) + + +@pytest.fixture +def with_warm_clingen_cache_pipeline(session, sample_warm_clingen_cache_pipeline): + """Add a warm_clingen_cache pipeline to the session.""" + + session.add(sample_warm_clingen_cache_pipeline) + session.commit() + + +@pytest.fixture +def sample_warm_clingen_cache_job_in_pipeline( + session, + with_warm_clingen_cache_job, + with_warm_clingen_cache_pipeline, + sample_warm_clingen_cache_job_run, + sample_warm_clingen_cache_pipeline, +): + """Provide a context with a warm_clingen_cache job run and pipeline.""" + + sample_warm_clingen_cache_job_run.pipeline_id = sample_warm_clingen_cache_pipeline.id + session.commit() + return sample_warm_clingen_cache_job_run + + +## VEP Population Job Fixtures ## + + +@pytest.fixture +def populate_vep_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for populate_vep_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_populate_vep_pipeline(): + """Create a pipeline instance for populate_vep_for_score_set job.""" + + return Pipeline( + urn="test:populate_vep_pipeline", + name="Populate VEP Pipeline", + ) + + +@pytest.fixture +def sample_populate_vep_run(populate_vep_sample_params): + """Create a JobRun instance for populate_vep_for_score_set job.""" + + return JobRun( + urn="test:populate_vep_for_score_set", + job_type="populate_vep_for_score_set", + job_function="populate_vep_for_score_set", + max_retries=3, + retry_count=0, + job_params=populate_vep_sample_params, + ) + + +@pytest.fixture +def with_populate_vep_job(session, sample_populate_vep_run): + """Add a populate_vep_for_score_set job run to the session.""" + + session.add(sample_populate_vep_run) + session.commit() + + +@pytest.fixture +def with_populate_vep_pipeline(session, sample_populate_vep_pipeline): + """Add a populate_vep pipeline to the session.""" + + session.add(sample_populate_vep_pipeline) + session.commit() + + +@pytest.fixture +def sample_populate_vep_run_pipeline( + session, + with_populate_vep_job, + with_populate_vep_pipeline, + sample_populate_vep_run, + sample_populate_vep_pipeline, +): + """Provide a context with a populate_vep job run and pipeline.""" + + sample_populate_vep_run.pipeline_id = sample_populate_vep_pipeline.id + session.commit() + return sample_populate_vep_run + + +@pytest.fixture +def setup_sample_variants_for_vep(session, with_populated_domain_data, mock_worker_ctx, sample_populate_vep_run): + """Setup a variant and mapped variant with hgvs_assay_level for VEP testing.""" + score_set = session.get(ScoreSet, sample_populate_vep_run.job_params["score_set_id"]) + + variant = Variant( + urn="urn:variant:test-variant-for-vep", + score_set_id=score_set.id, + hgvs_nt="NM_007294.4:c.5G>A", + hgvs_pro="NP_009225.1:p.Cys2Tyr", + data={"hgvs_c": "NM_007294.4:c.5G>A", "hgvs_p": "NP_009225.1:p.Cys2Tyr"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + post_mapped={"type": "Allele", "expressions": [{"value": "NM_007294.4:c.5G>A", "syntax": "hgvs.c"}]}, + hgvs_assay_level="NM_007294.4:c.5G>A", + ) + session.add(mapped_variant) + session.commit() + return variant, mapped_variant diff --git a/tests/worker/jobs/conftest_optional.py b/tests/worker/jobs/conftest_optional.py new file mode 100644 index 000000000..3ca408cba --- /dev/null +++ b/tests/worker/jobs/conftest_optional.py @@ -0,0 +1,14 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py new file mode 100644 index 000000000..0f41cb595 --- /dev/null +++ b/tests/worker/jobs/data_management/test_views.py @@ -0,0 +1,256 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.published_variant import PublishedVariantsMV +from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + +############################################################################################################################################ +# refresh_materialized_views +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestRefreshMaterializedViewsUnit: + """Unit tests for the refresh_materialized_views function.""" + + async def test_refresh_materialized_views_calls_refresh_function(self, mock_worker_ctx, mock_job_manager): + """Test that refresh_materialized_views calls the refresh function.""" + with patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh: + result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_refresh.assert_called_once_with(mock_job_manager.db) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshMaterializedViewsIntegration: + """Integration tests for the refresh_materialized_views function and decorator logic.""" + + async def test_refresh_materialized_views_integration(self, standalone_worker_context, session): + """Integration test that runs refresh_materialized_views end-to-end.""" + + result = await refresh_materialized_views(standalone_worker_context) + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + assert job is not None + assert job.status == JobStatus.SUCCEEDED + assert job.job_type == "cron_job" + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_refresh_materialized_views_handles_exceptions(self, standalone_worker_context, session): + """Integration test that ensures exceptions during refresh are handled properly.""" + + with ( + patch( + "mavedb.worker.jobs.data_management.views.refresh_all_mat_views", + side_effect=Exception("Test exception during refresh"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await refresh_materialized_views(standalone_worker_context) + mock_send_slack_job_error.assert_called_once() + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + + assert job is not None + assert job.status == JobStatus.ERRORED + assert job.job_type == "cron_job" + assert job.error_message == "Test exception during refresh" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshMaterializedViewsArqContext: + """Integration tests for refresh_materialized_views within an ARQ worker context.""" + + async def test_refresh_materialized_views_arq_integration( + self, arq_redis, arq_worker, standalone_worker_context, session + ): + """Integration test that runs refresh_materialized_views end-to-end using ARQ context.""" + await arq_redis.enqueue_job("refresh_materialized_views") + await arq_worker.async_run() + await arq_worker.run_check() + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + assert job is not None + assert job.status == JobStatus.SUCCEEDED + assert job.job_type == "cron_job" + + +############################################################################################################################################ +# refresh_published_variants_view +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestRefreshPublishedVariantsViewUnit: + """Unit tests for the refresh_published_variants_view function.""" + + async def test_refresh_published_variants_view_calls_refresh_function( + self, mock_worker_ctx, mock_job_manager, mock_job_run + ): + """Test that refresh_published_variants_view calls the refresh function.""" + mock_job_run.job_params = {"correlation_id": "test-corr-id"} + + with ( + patch.object(PublishedVariantsMV, "refresh") as mock_refresh, + patch("mavedb.worker.jobs.data_management.views.validate_job_params"), + ): + result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_refresh.assert_called_once_with(mock_job_manager.db) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshPublishedVariantsViewIntegration: + """Integration tests for the refresh_published_variants_view function and decorator logic.""" + + @pytest.fixture() + def setup_refresh_job_run(self, session): + """Add a refresh_published_variants_view job run to the DB before each test.""" + job_run = JobRun( + job_type="data_management", + job_function="refresh_published_variants_view", + status=JobStatus.PENDING, + job_params={"correlation_id": "test-corr-id"}, + ) + session.add(job_run) + session.commit() + return job_run + + async def test_refresh_published_variants_view_integration_standalone( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end.""" + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_refresh_published_variants_view_integration_pipeline( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end.""" + # Create a pipeline for the job run and associate it + pipeline = Pipeline( + name="Test Pipeline for Published Variants View Refresh", + ) + session.add(pipeline) + session.commit() + session.refresh(pipeline) + setup_refresh_job_run.pipeline_id = pipeline.id + session.add(setup_refresh_job_run) + session.commit() + + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + session.refresh(pipeline) + assert pipeline.status == PipelineStatus.SUCCEEDED + + async def test_refresh_published_variants_view_handles_exceptions( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that ensures exceptions during refresh are handled properly.""" + with ( + patch.object( + PublishedVariantsMV, + "refresh", + side_effect=Exception("Test exception during published variants view refresh"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + mock_send_slack_job_error.assert_called_once() + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.ERRORED + assert setup_refresh_job_run.error_message == "Test exception during published variants view refresh" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + async def test_refresh_published_variants_view_requires_params( + self, setup_refresh_job_run, standalone_worker_context, session + ): + """Integration test that ensures required job params are validated.""" + setup_refresh_job_run.job_params = {} # Clear required params + session.add(setup_refresh_job_run) + session.commit() + + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + mock_send_slack_job_error.assert_called_once() + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.ERRORED + assert "Job has no job_params defined" in setup_refresh_job_run.error_message + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshPublishedVariantsViewArqContext: + """Integration tests for refresh_published_variants_view within an ARQ worker context.""" + + @pytest.fixture() + def setup_refresh_job_run(self, session): + """Add a refresh_published_variants_view job run to the DB before each test.""" + job_run = JobRun( + job_type="data_management", + job_function="refresh_published_variants_view", + status=JobStatus.PENDING, + job_params={"correlation_id": "test-corr-id"}, + ) + session.add(job_run) + session.commit() + return job_run + + async def test_refresh_published_variants_view_arq_integration( + self, arq_redis, arq_worker, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end using ARQ context.""" + await arq_redis.enqueue_job("refresh_published_variants_view", setup_refresh_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py new file mode 100644 index 000000000..2bd8645a6 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -0,0 +1,141 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.mapped_variant import MappedVariant +from tests.helpers.util.setup.worker import create_mappings_in_score_set + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +# XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +@pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EClingenSubmitScoreSetMappingsToCar: + """End-to-end tests for ClinGen CAR submission jobs.""" + + async def test_clingen_car_submission_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + mock_s3_client, + sample_score_set, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_pipeline, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test the end-to-end flow of submitting score set mappings to ClinGen CAR.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testuser"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the submission job was completed successfully + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id is not None + + +# XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +@pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.network +class TestE2EClingenSubmitScoreSetMappingsToLdh: + """End-to-end tests for ClinGen LDH submission jobs.""" + + async def test_clingen_ldh_submission_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + mock_s3_client, + sample_score_set, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_pipeline, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test the end-to-end flow of submitting score set mappings to ClinGen LDH.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.clingen.constants.LDH_ACCESS_ENDPOINT", "https://genboree.org/ldh-stg/srvc"), + patch("mavedb.lib.clingen.constants.CLIN_GEN_TENANT", "dev-clingen"), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the submission job succeeded + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/network/test_clinvar.py b/tests/worker/jobs/external_services/network/test_clinvar.py new file mode 100644 index 000000000..54ae2fff3 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_clinvar.py @@ -0,0 +1,48 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from sqlalchemy import select + +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +@pytest.mark.slow +class TestE2ERefreshClinvarControls: + async def test_refresh_clinvar_controls_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + setup_sample_variants_with_caid, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test the end-to-end flow of refreshing ClinVar clinical controls.""" + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added successfully + clinical_controls = session.scalars(select(ClinicalControl)).all() + assert len(clinical_controls) == 1 + assert clinical_controls[0].db_identifier == "3045425" + + # Verify that annotation status was added + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job run was completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/network/test_hgvs.py b/tests/worker/jobs/external_services/network/test_hgvs.py new file mode 100644 index 000000000..56f100e0b --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_hgvs.py @@ -0,0 +1,54 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from sqlalchemy import select + +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus, PipelineStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +@pytest.mark.slow +class TestE2EPopulateHgvsForScoreSet: + """End-to-end test for HGVS population against the real ClinGen API.""" + + async def test_populate_hgvs_e2e( + self, + session, + arq_redis, + arq_worker, + sample_populate_hgvs_run_pipeline, + sample_populate_hgvs_pipeline, + setup_sample_variants_with_caid_for_hgvs, + ): + """Enqueue the HGVS population job, run the worker, and verify HGVS fields are populated.""" + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(sample_populate_hgvs_run_pipeline) + assert sample_populate_hgvs_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_hgvs_pipeline) + assert sample_populate_hgvs_pipeline.status == PipelineStatus.SUCCEEDED + + session.refresh(mapped_variant) + annotation = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == mapped_variant.variant_id, + VariantAnnotationStatus.annotation_type == AnnotationType.MAPPED_HGVS, + VariantAnnotationStatus.current.is_(True), + ) + ).one_or_none() + assert annotation is not None + assert annotation.status in (AnnotationStatus.SUCCESS, AnnotationStatus.SKIPPED) diff --git a/tests/worker/jobs/external_services/network/test_uniprot.py b/tests/worker/jobs/external_services/network/test_uniprot.py new file mode 100644 index 000000000..506eb20f0 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_uniprot.py @@ -0,0 +1,66 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from tests.helpers.constants import TEST_REFSEQ_IDENTIFIER + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EUniprotMappingJobs: + """End-to-end tests for UniProt mapping jobs.""" + + async def test_uniprot_mapping_jobs_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_polling_job_for_submission_run_in_pipeline, + ): + """Test the end-to-end flow of submitting and polling UniProt mapping jobs.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [TEST_REFSEQ_IDENTIFIER]}} + session.commit() + + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + submitted_jobs = sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] + assert "1" in submitted_jobs + assert submitted_jobs["1"]["job_id"] is not None + assert submitted_jobs["1"]["accession"] == TEST_REFSEQ_IDENTIFIER + + # Verify that polling job params have been updated correctly + session.refresh(sample_polling_job_for_submission_run_in_pipeline) + assert sample_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] == { + "1": {"job_id": submitted_jobs["1"]["job_id"], "accession": TEST_REFSEQ_IDENTIFIER} + } + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job has run and is succeeded (pipeline ctx) + session.refresh(sample_polling_job_for_submission_run_in_pipeline) + assert sample_polling_job_for_submission_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/network/test_variant_translations.py b/tests/worker/jobs/external_services/network/test_variant_translations.py new file mode 100644 index 000000000..b45087dd9 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_variant_translations.py @@ -0,0 +1,56 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from sqlalchemy import select + +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus, PipelineStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +@pytest.mark.slow +class TestE2EPopulateVariantTranslationsForScoreSet: + """End-to-end test for variant translation population against the real ClinGen API.""" + + async def test_populate_variant_translations_e2e( + self, + session, + arq_redis, + arq_worker, + sample_populate_variant_translations_run_pipeline, + sample_populate_variant_translations_pipeline, + setup_sample_variants_with_caid_for_translation, + ): + """Enqueue the variant translation job, run the worker, and verify translations are created.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(sample_populate_variant_translations_run_pipeline) + assert sample_populate_variant_translations_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_variant_translations_pipeline) + assert sample_populate_variant_translations_pipeline.status == PipelineStatus.SUCCEEDED + + annotation = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == mapped_variant.variant_id, + VariantAnnotationStatus.annotation_type == AnnotationType.VARIANT_TRANSLATION, + VariantAnnotationStatus.current.is_(True), + ) + ).one_or_none() + assert annotation is not None + assert annotation.status in (AnnotationStatus.SUCCESS, AnnotationStatus.SKIPPED) diff --git a/tests/worker/jobs/external_services/network/test_vep.py b/tests/worker/jobs/external_services/network/test_vep.py new file mode 100644 index 000000000..e53013c97 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_vep.py @@ -0,0 +1,56 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from sqlalchemy import select + +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus, PipelineStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +@pytest.mark.slow +class TestE2EPopulateVepForScoreSet: + """End-to-end test for VEP functional consequence prediction against the real Ensembl API.""" + + async def test_populate_vep_e2e( + self, + session, + arq_redis, + arq_worker, + sample_populate_vep_run_pipeline, + sample_populate_vep_pipeline, + setup_sample_variants_for_vep, + ): + """Enqueue the VEP job, run the worker, and verify consequence and annotation are populated.""" + _, mapped_variant = setup_sample_variants_for_vep + + await arq_redis.enqueue_job("populate_vep_for_score_set", sample_populate_vep_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(sample_populate_vep_run_pipeline) + assert sample_populate_vep_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_vep_pipeline) + assert sample_populate_vep_pipeline.status == PipelineStatus.SUCCEEDED + + session.refresh(mapped_variant) + assert mapped_variant.vep_functional_consequence is not None + assert mapped_variant.vep_access_date is not None + + annotation = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == mapped_variant.variant_id, + VariantAnnotationStatus.annotation_type == AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + VariantAnnotationStatus.current.is_(True), + ) + ).one() + assert annotation.status == AnnotationStatus.SUCCESS diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py new file mode 100644 index 000000000..57e2dcc57 --- /dev/null +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -0,0 +1,2467 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.variant import Variant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus +from mavedb.worker.jobs.external_services.clingen import ( + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST +from tests.helpers.util.setup.worker import create_mappings_in_score_set + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarUnit: + """Tests for the Clingen submit_score_set_mappings_to_car function.""" + + async def test_submit_score_set_mappings_to_car_submission_disabled( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + ): + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SKIPPED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_no_mappings( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + ): + """Test submitting score set mappings to ClinGen when there are no mappings.""" + with ( + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + ): + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_no_registered_alleles( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return no registered alleles + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=[], + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + + async def test_submit_score_set_mappings_to_car_no_linked_alleles( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles that do not match submitted HGVS + registered_alleles_mock = [ + {"@id": "CA123456", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>C"}]}, + {"@id": "CA234567", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>G"}]}, + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + + async def test_submit_score_set_mappings_to_car_repeated_hgvs( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles with repeated HGVS + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": "CA_DUPLICATE", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mapped_variants[0].post_mapped)}], + } + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + # Patch get_hgvs_from_post_mapped to return the same HGVS for all variants + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + return_value=get_hgvs_from_post_mapped(mapped_variants[0].post_mapped), + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id == "CA_DUPLICATE" + + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + assert ann.annotation_type == "clingen_allele_id" + + async def test_submit_score_set_mappings_to_car_partial_failure( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test that partial CAR failures (some matched, some not) result in a succeeded outcome with failure annotations.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Get mapped variants; return a CAR response that only matches the first variant + mapped_variants = session.scalars(select(MappedVariant)).all() + assert len(mapped_variants) == 4 + + first_hgvs = get_hgvs_from_post_mapped(mapped_variants[0].post_mapped) + registered_alleles_mock = [ + { + "@id": f"CA{mapped_variants[0].id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": first_hgvs}], + } + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["matched_count"] == 1 + assert result.data["failed_count"] == 3 + + # Verify only the first variant got a CAID + variants_with_caid = session.scalars( + select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None)) + ).all() + assert len(variants_with_caid) == 1 + assert variants_with_caid[0].clingen_allele_id == f"CA{mapped_variants[0].id}" + + # Verify annotation statuses: 1 success, 3 failed + success_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.status == "success", + ) + ).all() + failed_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.status == "failed", + ) + ).all() + assert len(success_annotations) == 1 + assert len(failed_annotations) == 3 + + async def test_submit_score_set_mappings_to_car_hgvs_not_found( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Get the mapped variants from score set before submission + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where(Variant.score_set_id == submit_score_set_mappings_to_car_sample_job_run.job_params["score_set_id"]) + ).all() + + # Patch ClinGenAlleleRegistryService to return registered alleles + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + # Patch get_hgvs_from_post_mapped to not find any HGVS in registered alleles + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + + async def test_submit_score_set_mappings_to_car_propagates_exception( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + pytest.raises(Exception) as exc_info, + ): + await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert str(exc_info.value) == "ClinGen service error" + + async def test_submit_score_set_mappings_to_car_success( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + sample_score_set, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Get the mapped variants from score set before submission + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).where(Variant.score_set_id == sample_score_set.id) + ).all() + assert len(mapped_variants) == 4 + + # Patch ClinGenAlleleRegistryService to return registered alleles + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + assert ann.annotation_type == "clingen_allele_id" + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarIntegration: + """Integration tests for the Clingen submit_score_set_mappings_to_car function.""" + + async def test_submit_score_set_mappings_to_car_independent_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == len(mapped_variants) + for ann in annotation_statuses: + assert ann.status == "success" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_pipeline_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == len(mapped_variants) + for ann in annotation_statuses: + assert ann.status == "success" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_submission_disabled( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SKIPPED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SKIPPED + + async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_car_no_mappings( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + ): + """Test submitting score set mappings to ClinGen when there are no mappings.""" + with ( + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_no_registered_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return no registered alleles + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=[], + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_car_no_linked_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles that do not match submitted HGVS + registered_alleles_mock = [ + {"@id": "CA123456", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>C"}]}, + {"@id": "CA234567", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>G"}]}, + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_car_partial_failure( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test that partial CAR failures result in SUCCEEDED status with per-variant failure annotations committed.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Return a CAR response that only matches the first variant's HGVS + mapped_variants = session.scalars(select(MappedVariant)).all() + first_hgvs = get_hgvs_from_post_mapped(mapped_variants[0].post_mapped) + registered_alleles_mock = [ + { + "@id": f"CA{mapped_variants[0].id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": first_hgvs}], + } + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + mock_send_slack_error.assert_not_called() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["matched_count"] == 1 + assert result.data["failed_count"] == 3 + + # Verify the successfully matched variant got a CAID + variants_with_caid = session.scalars( + select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None)) + ).all() + assert len(variants_with_caid) == 1 + assert variants_with_caid[0].clingen_allele_id == f"CA{mapped_variants[0].id}" + + # Verify annotation statuses: 1 success, 3 failed + success_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.status == "success", + ) + ).all() + failed_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.status == "failed", + ) + ).all() + assert len(success_annotations) == 1 + assert len(failed_annotations) == 3 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_car_error_details_stored_in_annotation_metadata( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test that explicit CAR error details (errorType, hgvs, message) are stored in annotation_metadata.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Return a CAR response where: first variant succeeds, second has explicit CAR error, rest are silent failures + mapped_variants = session.scalars(select(MappedVariant)).all() + first_hgvs = get_hgvs_from_post_mapped(mapped_variants[0].post_mapped) + second_hgvs = get_hgvs_from_post_mapped(mapped_variants[1].post_mapped) + registered_alleles_mock = [ + { + "@id": f"CA{mapped_variants[0].id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": first_hgvs}], + }, + { + "errorType": "InvalidHGVS", + "hgvs": second_hgvs, + "message": "The HGVS string is invalid.", + "description": "error", + "inputLine": second_hgvs, + "position": "0", + }, + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error"), + ): + await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + # Verify the variant whose HGVS returned an explicit CAR error has error details in annotation_metadata. + # Only 1 annotation should have EXTERNAL_SERVICE_REJECTED since only one CAR error was in the response. + car_rejected_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.failure_category == "external_service_rejected", + ) + ).all() + assert len(car_rejected_annotations) == 1 + rejected = car_rejected_annotations[0] + assert rejected.annotation_metadata["submitted_hgvs"] == second_hgvs + assert rejected.annotation_metadata["car_error_type"] == "InvalidHGVS" + assert rejected.annotation_metadata["car_error_message"] == "The HGVS string is invalid." + + # The remaining 2 failures (variants 3 and 4) got no CAR response — silent failures get EXTERNAL_API_ERROR. + silent_failure_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.failure_category == "external_api_error", + ) + ).all() + assert len(silent_failure_annotations) == 2 + for ann in silent_failure_annotations: + assert ann.annotation_metadata["submitted_hgvs"] is not None + assert "car_error_type" not in ann.annotation_metadata + + async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorator( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + mock_send_slack_job_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + assert str(result.exception) == "ClinGen service error" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.ERRORED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarArqContext: + """Tests for the Clingen submit_score_set_mappings_to_car function with ARQ context.""" + + async def test_submit_score_set_mappings_to_car_with_arq_context_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + + async def test_submit_score_set_mappings_to_car_with_arq_context_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.ERRORED + assert submit_score_set_mappings_to_car_sample_job_run.error_message == "ClinGen service error" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 0 + + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.ERRORED + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.error_message == "ClinGen service error" + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 0 + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhUnit: + """Unit tests for the Clingen submit_score_set_mappings_to_car function.""" + + async def test_submit_score_set_mappings_to_ldh_no_variants( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_submission_failure(*args, **kwargs): + return ([], [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 4) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_failure(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise HGVS not found exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_propagates_exception( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + pytest.raises(Exception) as exc_info, + ): + await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), + ) + + assert str(exc_info.value) == "LDH service error" + + async def test_submit_score_set_mappings_to_ldh_partial_submission( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + variants = session.scalars(select(Variant)).all() + + async def dummy_partial_submission(*args, **kwargs): + return ( + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants[2:], start=1) + ], + [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 2, + ) + + # Patch ClinGenLdhService to simulate partial submission success + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_partial_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_all_successful_submission( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + variants = session.scalars(select(Variant)).all() + + async def dummy_successful_submission(*args, **kwargs): + return ( + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], + [], + ) + + # Patch ClinGenLdhService to simulate all submissions succeeding + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_successful_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhIntegration: + """Integration tests for the Clingen submit_score_set_mappings_to_ldh function.""" + + async def test_submit_score_set_mappings_to_ldh_independent( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + variants = session.scalars(select(Variant)).all() + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_pipeline_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + variants = session.scalars(select(Variant)).all() + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorator( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + mock_send_slack_job_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + assert str(result.exception) == "LDH service error" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.ERRORED + + async def test_submit_score_set_mappings_to_ldh_no_linked_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_no_linked_alleles_submission(*args, **kwargs): + return ([], []) + + # Patch ClinGenLdhService to simulate no linked alleles found + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_no_linked_alleles_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify annotation statuses were created with failures + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise HGVS not found exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_submission_failure(*args, **kwargs): + return ([], [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 4) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_failure(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify annotation statuses were created with failures + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + + # Verify the job status is updated in the database + # TODO:XXX: Change status to 'failed' once decorator supports it + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_ldh_partial_submission( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + variants = session.scalars(select(Variant)).all() + + async def dummy_partial_submission(*args, **kwargs): + return ( + [ + { + "data": { + "entId": variants[0].urn, + "ldhId": f"LDH123400{1}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{1}", + }, + "status": {"code": 200, "name": "OK"}, + } + ], + [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 3, + ) + + # Patch ClinGenLdhService to simulate partial submission success + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_partial_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + success_count = 0 + failure_count = 0 + for ann in annotation_statuses: + if ann.status == "success": + success_count += 1 + elif ann.status == "failed": + failure_count += 1 + + assert success_count == 1 + assert failure_count == 3 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_all_successful_submission( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + variants = session.scalars(select(Variant)).all() + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], + [], + ) + + # Patch ClinGenLdhService to simulate all submissions succeeding + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhArqIntegration: + """ARQ Integration tests for the Clingen submit_score_set_mappings_to_ldh function.""" + + async def test_submit_score_set_mappings_to_ldh_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + variants = session.scalars(select(Variant)).all() + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_in_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + variants = session.scalars(select(Variant)).all() + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handling( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.ERRORED + assert submit_score_set_mappings_to_ldh_sample_job_run.error_message == "LDH service error" + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handling_pipeline_ctx( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.ERRORED + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.error_message == "LDH service error" + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_clingen_cache.py b/tests/worker/jobs/external_services/test_clingen_cache.py new file mode 100644 index 000000000..a55eb6b88 --- /dev/null +++ b/tests/worker/jobs/external_services/test_clingen_cache.py @@ -0,0 +1,240 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import AsyncMock, patch + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.clingen_cache import warm_clingen_cache +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestWarmClingenCacheUnit: + """Tests for the warm_clingen_cache job function.""" + + async def test_no_mapped_variants_succeeds( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job completes successfully when there are no mapped variants.""" + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_warms_cache_for_variants_with_caids( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job calls get_clingen_allele_data for each distinct allele ID.""" + score_set = session.get(ScoreSet, sample_warm_clingen_cache_job_run.job_params["score_set_id"]) + + # Create two variants with the same CAID — should only warm once (distinct) + for i, caid in enumerate(["CA111111", "CA222222", "CA111111"]): + variant = Variant( + urn=f"urn:variant:warm-test-{i}", + score_set_id=score_set.id, + hgvs_nt=f"NM_000000.1:c.{i + 1}A>G", + hgvs_pro=f"NP_000000.1:p.Met{i + 1}Val", + data={}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=caid, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + mock_get_allele_data = AsyncMock(return_value={"some": "data"}) + + with patch( + "mavedb.worker.jobs.external_services.clingen_cache.get_clingen_allele_data", + mock_get_allele_data, + ): + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + # Should be called exactly 2 times (CA111111 and CA222222, deduplicated) + assert mock_get_allele_data.call_count == 2 + called_ids = {call.args[0] for call in mock_get_allele_data.call_args_list} + assert called_ids == {"CA111111", "CA222222"} + + async def test_skips_null_and_multi_variant_caids( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job ignores variants with null or multi-variant (comma-separated) ClinGen IDs.""" + score_set = session.get(ScoreSet, sample_warm_clingen_cache_job_run.job_params["score_set_id"]) + + caids = ["CA333333", None, "CA-MULTI-001,CA-MULTI-002"] + for i, caid in enumerate(caids): + variant = Variant( + urn=f"urn:variant:warm-filter-{i}", + score_set_id=score_set.id, + hgvs_nt=f"NM_000000.1:c.{i + 10}A>G", + hgvs_pro=f"NP_000000.1:p.Met{i + 10}Val", + data={}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=caid, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + mock_get_allele_data = AsyncMock(return_value={"some": "data"}) + + with patch( + "mavedb.worker.jobs.external_services.clingen_cache.get_clingen_allele_data", + mock_get_allele_data, + ): + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + # Only CA333333 should be warmed; null and multi-variant IDs are excluded + assert mock_get_allele_data.call_count == 1 + mock_get_allele_data.assert_called_once_with("CA333333") + + async def test_continues_on_individual_fetch_failure( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job continues warming remaining alleles when one fetch fails.""" + score_set = session.get(ScoreSet, sample_warm_clingen_cache_job_run.job_params["score_set_id"]) + + for i, caid in enumerate(["CA444444", "CA555555"]): + variant = Variant( + urn=f"urn:variant:warm-fail-{i}", + score_set_id=score_set.id, + hgvs_nt=f"NM_000000.1:c.{i + 20}A>G", + hgvs_pro=f"NP_000000.1:p.Met{i + 20}Val", + data={}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=caid, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # First call raises, second succeeds + mock_get_allele_data = AsyncMock( + side_effect=[Exception("ClinGen API timeout"), {"some": "data"}], + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen_cache.get_clingen_allele_data", + mock_get_allele_data, + ): + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + # Job should still succeed — individual failures are non-fatal + assert result.status == JobStatus.SUCCEEDED + assert mock_get_allele_data.call_count == 2 + + async def test_only_warms_current_mapped_variants( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job only fetches allele IDs from current (not superseded) mapped variants.""" + score_set = session.get(ScoreSet, sample_warm_clingen_cache_job_run.job_params["score_set_id"]) + + variant = Variant( + urn="urn:variant:warm-current-test", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.30A>G", + hgvs_pro="NP_000000.1:p.Met30Val", + data={}, + ) + session.add(variant) + session.commit() + + # Non-current mapped variant should be ignored + old_mv = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA666666", + current=False, + mapped_date="2023-01-01T00:00:00Z", + mapping_api_version="0.9.0", + ) + # Current mapped variant should be included + current_mv = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA777777", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add_all([old_mv, current_mv]) + session.commit() + + mock_get_allele_data = AsyncMock(return_value={"some": "data"}) + + with patch( + "mavedb.worker.jobs.external_services.clingen_cache.get_clingen_allele_data", + mock_get_allele_data, + ): + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + mock_get_allele_data.assert_called_once_with("CA777777") diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py new file mode 100644 index 000000000..d43891d9e --- /dev/null +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -0,0 +1,1449 @@ +# ruff: noqa: E402 + +import pytest +import requests + +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, FailureCategory, JobStatus, PipelineStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +pytest.importorskip("arq") + +from unittest.mock import patch + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + +MOCK_CLINVAR_DATA = { + "VCV000000123": { + "GeneSymbol": "TEST", + "ClinicalSignificance": "benign", + "ReviewStatus": "reviewed by expert panel", + }, +} + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestRefreshClinvarControlsUnit: + """Tests for the refresh_clinvar_controls job function.""" + + @pytest.fixture(autouse=True) + def _mock_clinvar_versions(self): + """Mock generate_clinvar_versions to return a single version for testing.""" + with patch( + "mavedb.worker.jobs.external_services.clinvar.generate_clinvar_versions", + return_value=[(2026, 1)], + ): + yield + + async def test_refresh_clinvar_controls_skips_version_on_fetch_failure( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that a fetch failure for a version is logged and skipped, not propagated.""" + + async def awaitable_exception(*args, **kwargs): + raise Exception("Network error") + + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + side_effect=awaitable_exception, + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["versions_completed"] == 0 + + async def test_refresh_clinvar_controls_no_mapped_variants( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job completes successfully when there are no mapped variants.""" + + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value={}, + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_variants_have_caids( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job completes successfully when no variants have CAIDs.""" + # Add a variant without a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:test-variant-no-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.2G>A", + hgvs_pro="NP_000000.1:p.Val2Ile", + data={"hgvs_c": "NM_000000.1:c.2G>A", "hgvs_p": "NP_000000.1:p.Val2Ile"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant without a CAID + variant_no_caid = ( + session.query(VariantAnnotationStatus).filter(VariantAnnotationStatus.variant_id == variant.id).one() + ) + assert variant_no_caid.status == AnnotationStatus.SKIPPED + assert variant_no_caid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert variant_no_caid.error_message == "Mapped variant does not have an associated ClinGen allele ID." + + async def test_refresh_clinvar_controls_variants_are_multivariants( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job completes successfully when all variants are multi-variant CAIDs.""" + # Update the mapped variant to have a multi-variant CAID + mapped_variant = session.query(MappedVariant).first() + mapped_variant.clingen_allele_id = "CA-MULTI-001,CA-MULTI-002" + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the multi-variant CAID + variant_with_multicid = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_multicid.status == AnnotationStatus.SKIPPED + assert variant_with_multicid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert ( + variant_with_multicid.error_message + == "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data." + ) + + async def test_refresh_clinvar_controls_clingen_api_failure( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles ClinGen API failures gracefully.""" + + # Mock the get_associated_clinvar_allele_id function to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=requests.exceptions.RequestException("ClinGen API error"), + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DEPENDENCY_FAILURE + + # Verify an annotation status was created for the variant due to ClinGen API failure + mapped_variant = session.query(MappedVariant).first() + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles no associated ClinVar Allele ID gracefully.""" + + # Mock the get_associated_clinvar_allele_id function to return None + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value=None, + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID + mapped_variant = session.query(MappedVariant).first() + variant_no_clinvar_allele = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_allele.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_allele.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar allele ID found for ClinGen allele ID" in variant_no_clinvar_allele.error_message + + async def test_refresh_clinvar_controls_no_clinvar_data_found( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles no ClinVar data found for the associated ClinVar Allele ID.""" + + # TSV data with a different allele ID than the one being looked up + non_matching_clinvar_data = { + "VCV000000001": { + "GeneSymbol": "TEST", + "ClinicalSignificance": "benign", + "ReviewStatus": "reviewed by expert panel", + }, + } + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=non_matching_clinvar_data, + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant due to no ClinVar data found + mapped_variant = session.query(MappedVariant).first() + variant_no_clinvar_data = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_data.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_data.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar data found for ClinVar allele ID" in variant_no_clinvar_data.error_message + + async def test_refresh_clinvar_controls_successful_annotation_existing_control( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job successfully annotates a variant with ClinVar control data.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant with successful annotation + mapped_variant = session.query(MappedVariant).first() + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + async def test_refresh_clinvar_controls_successful_annotation_new_control( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job successfully annotates a variant with ClinVar control data when no prior status exists.""" + # Add a variant and mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.3C>T", + hgvs_pro="NP_000000.1:p.Ala3Val", + data={"hgvs_c": "NM_000000.1:c.3C>T", "hgvs_p": "NP_000000.1:p.Ala3Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA124", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + async def test_refresh_clinvar_controls_idempotent_run( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that running the job multiple times does not create duplicate annotation statuses.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + side_effect=[MOCK_CLINVAR_DATA, MOCK_CLINVAR_DATA], + ), + ): + # First run + result1 = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + session.commit() + + # Second run + result2 = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result1, JobExecutionOutcome) + assert result1.status == JobStatus.SUCCEEDED + assert isinstance(result2, JobExecutionOutcome) + assert result2.status == JobStatus.SUCCEEDED + + # Verify only one clinical control annotation exists for the variant + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 1 + + # Verify two annotated variants exist but both reflect the same successful annotation, and only + # one is current + annotated_variants = session.query(VariantAnnotationStatus).all() + assert len(annotated_variants) == 2 + statuses = [av.status for av in annotated_variants] + assert statuses.count(AnnotationStatus.SUCCESS) == 2 + current_statuses = [av for av in annotated_variants if av.current] + assert len(current_statuses) == 1 + + async def test_refresh_clinvar_controls_partial_failure( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles partial failures gracefully.""" + + variant1, mapped_variant1 = setup_sample_variants_with_caid + + # Add an additional mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant2 = Variant( + urn="urn:variant:test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.4G>C", + hgvs_pro="NP_000000.1:p.Gly4Ala", + data={"hgvs_c": "NM_000000.1:c.4G>C", "hgvs_p": "NP_000000.1:p.Gly4Ala"}, + ) + session.add(variant2) + session.commit() + mapped_variant2 = MappedVariant( + variant_id=variant2.id, + clingen_allele_id="CA125", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant2) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to raise an exception for the first call + def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): + if clingen_allele_id == "CA125": + raise requests.exceptions.RequestException("ClinGen API error") + return "VCV000000123" + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=side_effect_get_associated_clinvar_allele_id, + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify annotation statuses for both variants + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant2.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + annotated_variant2 = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant1.variant_id) + .one() + ) + assert annotated_variant2.status == AnnotationStatus.SUCCESS + assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant2.error_message is None + + async def test_total_api_failure_returns_failed( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job returns FAILED when all ClinVar lookups fail.""" + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=requests.exceptions.RequestException("ClinGen API error"), + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DEPENDENCY_FAILURE + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestRefreshClinvarControlsIntegration: + """Integration tests for the refresh_clinvar_controls job function.""" + + @pytest.fixture(autouse=True) + def _mock_clinvar_versions(self): + """Mock generate_clinvar_versions to return a single version for testing.""" + with patch( + "mavedb.worker.jobs.external_services.clinvar.generate_clinvar_versions", + return_value=[(2026, 1)], + ): + yield + + async def test_refresh_clinvar_controls_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when there are no mapped variants.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify no controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_variants_with_caid( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when no variants have CAIDs.""" + # Add a variant without a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-no-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.5T>A", + hgvs_pro="NP_000000.1:p.Leu5Gln", + data={"hgvs_c": "NM_000000.1:c.5T>A", "hgvs_p": "NP_000000.1:p.Leu5Gln"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant without a CAID + variant_no_caid = ( + session.query(VariantAnnotationStatus).filter(VariantAnnotationStatus.variant_id == variant.id).one() + ) + assert variant_no_caid.status == AnnotationStatus.SKIPPED + assert variant_no_caid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert variant_no_caid.error_message == "Mapped variant does not have an associated ClinGen allele ID." + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controlsvariants_are_multivariants( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when all variants are multi-variant CAIDs.""" + # Add a variant with a multi-variant CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-multicid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.6A>G", + hgvs_pro="NP_000000.1:p.Thr6Ala", + data={"hgvs_c": "NM_000000.1:c.6A>G", "hgvs_p": "NP_000000.1:p.Thr6Ala"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA-MULTI-003,CA-MULTI-004", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the multi-variant CAID + variant_with_multicid = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_multicid.status == AnnotationStatus.SKIPPED + assert variant_with_multicid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert ( + variant_with_multicid.error_message + == "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data." + ) + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job handles no associated ClinVar Allele ID gracefully.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.7C>A", + hgvs_pro="NP_000000.1:p.Ser7Tyr", + data={"hgvs_c": "NM_000000.1:c.7C>A", "hgvs_p": "NP_000000.1:p.Ser7Tyr"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA126", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return None + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value=None, + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID + variant_no_clinvar_allele = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_allele.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_allele.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar allele ID found for ClinGen allele ID" in variant_no_clinvar_allele.error_message + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_clinvar_data( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job handles no ClinVar data found for the associated ClinVar Allele ID.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.8G>T", + hgvs_pro="NP_000000.1:p.Val8Phe", + data={"hgvs_c": "NM_000000.1:c.8G>T", "hgvs_p": "NP_000000.1:p.Val8Phe"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA127", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000001", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant due to no ClinVar data found + variant_no_clinvar_data = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_data.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_data.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar data found for ClinVar allele ID" in variant_no_clinvar_data.error_message + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_existing_control( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job successfully annotates a variant with ClinVar control data.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.9A>C", + hgvs_pro="NP_000000.1:p.Lys9Thr", + data={"hgvs_c": "NM_000000.1:c.9A>C", "hgvs_p": "NP_000000.1:p.Lys9Thr"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA128", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + clinical_control = ClinicalControl( + db_name="ClinVar", + db_identifier="VCV000000123", + clinical_significance="likely pathogenic", + gene_symbol="TEST", + clinical_review_status="criteria provided, single submitter", + db_version="01_2026", + ) + session.add(clinical_control) + session.commit() + + mapped_variant.clinical_controls.append(clinical_control) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was updated + session.refresh(clinical_control) + assert clinical_control.clinical_significance == "benign" + assert clinical_control.clinical_review_status == "reviewed by expert panel" + assert mapped_variant in clinical_control.mapped_variants + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_new_control( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job successfully annotates a variant with ClinVar control data when no prior status exists.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.10C>G", + hgvs_pro="NP_000000.1:p.Pro10Arg", + data={"hgvs_c": "NM_000000.1:c.10C>G", "hgvs_p": "NP_000000.1:p.Pro10Arg"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA129", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was added + clinical_control = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant)).one() + ) + assert clinical_control.db_identifier == "VCV000000123" + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_pipeline_context( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_pipeline, + sample_refresh_clinvar_controls_job_in_pipeline, + ): + """Integration test: job successfully annotates a variant with ClinVar control data in a pipeline context.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_in_pipeline.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.12G>A", + hgvs_pro="NP_000000.1:p.Met12Ile", + data={"hgvs_c": "NM_000000.1:c.12G>A", "hgvs_p": "NP_000000.1:p.Met12Ile"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA130", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_in_pipeline.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was added + clinical_control = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant)).one() + ) + assert clinical_control.db_identifier == "VCV000000123" + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_in_pipeline) + assert sample_refresh_clinvar_controls_job_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline is marked as completed + session.refresh(sample_refresh_clinvar_controls_pipeline) + assert sample_refresh_clinvar_controls_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_idempotent_run( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: running the job multiple times does not create duplicate annotation statuses.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + side_effect=[MOCK_CLINVAR_DATA, MOCK_CLINVAR_DATA], + ), + ): + # First run + result1 = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + session.commit() + # reset the job run status to pending for the second run + sample_refresh_clinvar_controls_job_run.status = JobStatus.PENDING + session.commit() + + # Second run + result2 = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result1, JobExecutionOutcome) + assert result1.status == JobStatus.SUCCEEDED + assert isinstance(result2, JobExecutionOutcome) + assert result2.status == JobStatus.SUCCEEDED + + # Verify only one clinical control annotation exists for the variant + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 1 + + # Verify two annotated variants exist but both reflect the same successful annotation, and only + # one is current + annotated_variants = session.query(VariantAnnotationStatus).all() + assert len(annotated_variants) == 2 + statuses = [av.status for av in annotated_variants] + assert statuses.count(AnnotationStatus.SUCCESS) == 2 + current_statuses = [av for av in annotated_variants if av.current] + assert len(current_statuses) == 1 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_partial_failure( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles partial failures gracefully.""" + + variant1, mapped_variant1 = setup_sample_variants_with_caid + # Add an additional mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant2 = Variant( + urn="urn:variant:integration-test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.11G>C", + hgvs_pro="NP_000000.1:p.Gly11Ala", + data={"hgvs_c": "NM_000000.1:c.11G>C", "hgvs_p": "NP_000000.1:p.Gly11Ala"}, + ) + session.add(variant2) + session.commit() + mapped_variant2 = MappedVariant( + variant_id=variant2.id, + clingen_allele_id="CA130", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant2) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to raise an exception for the first call + def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): + if clingen_allele_id == "CA130": + raise requests.exceptions.RequestException("ClinGen API error") + return "VCV000000123" + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=side_effect_get_associated_clinvar_allele_id, + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify annotation statuses for both variants + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant2.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + annotated_variant2 = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant1.variant_id) + .one() + ) + assert annotated_variant2.status == AnnotationStatus.SUCCESS + assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant2.error_message is None + + # Verify a clinical control was added for the successfully annotated variant and not the unsuccessful one + clinical_control1 = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant1)).one() + ) + assert clinical_control1.db_identifier == "VCV000000123" + + clinical_control2 = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant2)).all() + ) + assert len(clinical_control2) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that unexpected exceptions are propagated.""" + + # Mock the get_associated_clinvar_allele_id function to raise an unexpected exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as errored (unhandled exception caught by decorator) + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshClinvarControlsArqContext: + """Tests for running the refresh_clinvar_controls job function within an ARQ worker context.""" + + @pytest.fixture(autouse=True) + def _mock_clinvar_versions(self): + """Mock generate_clinvar_versions to return a single version for testing.""" + with patch( + "mavedb.worker.jobs.external_services.clinvar.generate_clinvar_versions", + return_value=[(2026, 1)], + ): + yield + + async def test_refresh_clinvar_controls_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job completes successfully within an ARQ worker context.""" + + # Patch external service calls + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) > 0 + + # Verify annotation status was created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job completes successfully within an ARQ worker context in a pipeline context.""" + + # Patch external service calls + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) > 0 + + # Verify annotation status was created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + # Verify the pipeline is marked as completed + pass + + async def test_refresh_clinvar_controls_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles exceptions properly within an ARQ worker context.""" + # Patch external service calls to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as errored (unhandled exception caught by decorator) + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED + + async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles exceptions properly within an ARQ worker context in a pipeline context.""" + # Patch external service calls to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as errored (unhandled exception caught by decorator) + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED + + # Verify the pipeline is marked as failed + pass diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py new file mode 100644 index 000000000..fc8e211c0 --- /dev/null +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -0,0 +1,461 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import MagicMock, patch + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.gnomad_variant import GnomADVariant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus +from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestLinkGnomadVariantsUnit: + """Unit tests for the link_gnomad_variants job.""" + + async def test_link_gnomad_variants_no_variants_with_caids( + self, + session, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + ): + """Test linking gnomAD variants when no mapped variants have CAIDs.""" + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_no_gnomad_matches( + self, + session, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test linking gnomAD variants when no gnomAD variants match the CAIDs.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value={}, + ), + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_call_linking_method( + self, + session, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test that the linking method is called when gnomAD variants match CAIDs.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[MagicMock()], + ), + patch( + "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", + return_value=1, + ) as mock_linking_method, + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + mock_linking_method.assert_called_once() + + async def test_link_gnomad_variants_propagates_exceptions( + self, + session, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test that exceptions during the linking process are propagated.""" + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + with pytest.raises(Exception) as exc_info: + await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert str(exc_info.value) == "Test exception" + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestLinkGnomadVariantsIntegration: + """Integration tests for the link_gnomad_variants job.""" + + async def test_link_gnomad_variants_no_variants_with_caids( + self, + session, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + ): + """Test the end-to-end functionality of the link_gnomad_variants job when no variants have CAIDs.""" + + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify that no gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify no annotations were rendered (since there were no variants with CAIDs) + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_no_matching_caids( + self, + session, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job when no matching CAIDs are found.""" + # Update the created mapped variant to have a CAID that won't match any gnomAD data + mapped_variant = session.query(MappedVariant).first() + mapped_variant.clingen_allele_id = "NON_MATCHING_CAID" + session.commit() + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify that no gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify a skipped annotation status was rendered (since there were variants with CAIDs) + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "skipped" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_successful_linking_independent( + self, + session, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job with successful linking.""" + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify that gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_successful_linking_pipeline( + self, + session, + with_populated_domain_data, + mock_worker_ctx, + sample_link_gnomad_variants_run_pipeline, + sample_link_gnomad_variants_pipeline, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job with successful linking in a pipeline.""" + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run_pipeline.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify that gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED + + # Verify pipeline status updates + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_link_gnomad_variants_exceptions_handled_by_decorators( + self, + session, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test that exceptions during the linking process are handled by decorators.""" + + # Patch the athena engine to use the mock athena_engine fixture + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await link_gnomad_variants( + mock_worker_ctx, + sample_link_gnomad_variants_run.id, + ) + + mock_send_slack_job_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.ERRORED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestLinkGnomadVariantsArqContext: + """Tests for link_gnomad_variants job using the ARQ context fixture.""" + + async def test_link_gnomad_variants_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_gnomad_linking_job, + athena_engine, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that the link_gnomad_variants job works with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + + # Verify that the job completed successfully + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + athena_engine, + sample_link_gnomad_variants_run_pipeline, + sample_link_gnomad_variants_pipeline, + setup_sample_variants_with_caid, + ): + """Test that the link_gnomad_variants job works with the ARQ context fixture in a pipeline.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + + # Verify that the job completed successfully + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED + + # Verify pipeline status updates + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_link_gnomad_variants_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_gnomad_linking_job, + athena_engine, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that exceptions in the link_gnomad_variants job are handled with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify that no gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job errored + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.ERRORED + + async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + athena_engine, + sample_link_gnomad_variants_pipeline, + sample_link_gnomad_variants_run_pipeline, + setup_sample_variants_with_caid, + ): + """Test that exceptions in the link_gnomad_variants job are handled with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify that no gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job errored + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.ERRORED + + # Verify that the pipeline failed + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py new file mode 100644 index 000000000..946724cc5 --- /dev/null +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -0,0 +1,544 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus +from mavedb.worker.jobs.external_services.hgvs import populate_hgvs_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + +SAMPLE_CA_ALLELE_DATA = { + "genomicAlleles": [ + { + "referenceGenome": "GRCh38", + "hgvs": ["NC_000001.11:g.12345A>G"], + } + ], + "transcriptAlleles": [ + { + "hgvs": ["NM_000000.1:c.1A>G"], + "proteinEffect": {"hgvs": "NP_000000.1:p.Met1Val"}, + "MANE": { + "nucleotide": {"RefSeq": {"hgvs": "NM_000000.1:c.1A>G"}}, + "protein": {"RefSeq": {"hgvs": "NP_000000.1:p.Met1Val"}}, + }, + } + ], +} + +SAMPLE_PA_ALLELE_DATA = { + "aminoAcidAlleles": [ + { + "hgvs": ["NP_000000.1:p.Met1Val"], + } + ], +} + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestPopulateHgvsForScoreSetUnit: + """Unit tests for the populate_hgvs_for_score_set job.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + ): + """Test populating HGVS when no mapped variants exist.""" + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_variant_without_caid_skipped( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that a variant without a CAID gets a skipped annotation.""" + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + mapped_variant.clingen_allele_id = None + session.commit() + + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["skipped_count"] == 1 + + async def test_variant_with_multi_caid_skipped( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that a variant with a multi-variant CAID gets a skipped annotation.""" + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + mapped_variant.clingen_allele_id = "CA123,CA456" + session.commit() + + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["skipped_count"] == 1 + + async def test_successful_ca_allele_hgvs_population( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test successful HGVS population for a CA allele.""" + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ), + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["populated_count"] == 1 + + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + session.refresh(mapped_variant) + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + async def test_clingen_api_error_recorded_as_failed( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that ClinGen API errors are recorded as failed annotations.""" + import requests + + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=requests.exceptions.ConnectionError("Connection refused"), + ), + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["failed_count"] == 1 + + async def test_total_api_failure_sends_slack_alert( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that a Slack alert is sent when all variants fail HGVS population.""" + import requests + + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=requests.exceptions.ConnectionError("Connection refused"), + ), + patch("mavedb.worker.jobs.external_services.hgvs.log_and_send_slack_message") as mock_slack, + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["failed_count"] == 1 + assert result.data["populated_count"] == 0 + mock_slack.assert_called_once() + + async def test_clingen_allele_not_found_skipped( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that a 404 from ClinGen results in a skipped annotation.""" + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=None, + ), + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["skipped_count"] == 1 + + async def test_propagates_exceptions( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that unexpected exceptions are propagated.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=Exception("Test exception"), + ): + with pytest.raises(Exception) as exc_info: + await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert str(exc_info.value) == "Test exception" + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateHgvsForScoreSetIntegration: + """Integration tests for the populate_hgvs_for_score_set job.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + ): + """Test end-to-end when no mapped variants exist.""" + result = await populate_hgvs_for_score_set(mock_worker_ctx, sample_populate_hgvs_run.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.SUCCEEDED + + async def test_successful_hgvs_population( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test end-to-end successful HGVS population.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ): + result = await populate_hgvs_for_score_set(mock_worker_ctx, sample_populate_hgvs_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify mapped variant was updated with HGVS + mapped_variant = session.query(MappedVariant).first() + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.SUCCEEDED + + async def test_successful_hgvs_population_pipeline( + self, + session, + with_populated_domain_data, + mock_worker_ctx, + sample_populate_hgvs_run_pipeline, + sample_populate_hgvs_pipeline, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test end-to-end HGVS population in a pipeline.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ): + result = await populate_hgvs_for_score_set(mock_worker_ctx, sample_populate_hgvs_run_pipeline.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify mapped variant was updated + mapped_variant = session.query(MappedVariant).first() + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + # Verify annotation status + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + # Verify job and pipeline status + session.refresh(sample_populate_hgvs_run_pipeline) + assert sample_populate_hgvs_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_hgvs_pipeline) + assert sample_populate_hgvs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_variant_without_caid_creates_skipped_annotation( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that variants without CAIDs get a skipped annotation status.""" + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + mapped_variant.clingen_allele_id = None + session.commit() + + result = await populate_hgvs_for_score_set(mock_worker_ctx, sample_populate_hgvs_run.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "skipped" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.SUCCEEDED + + async def test_exceptions_handled_by_decorators( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that unexpected exceptions are handled by decorators.""" + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + sample_populate_hgvs_run.id, + ) + + mock_send_slack_job_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.ERRORED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateHgvsForScoreSetArqContext: + """Tests for populate_hgvs_for_score_set job using the ARQ context fixture.""" + + async def test_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_hgvs_job, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that the job works with the ARQ context fixture.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ): + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify mapped variant was updated + mapped_variant = session.query(MappedVariant).first() + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + # Verify annotation status + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + # Verify job completed + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.SUCCEEDED + + async def test_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_hgvs_run_pipeline, + sample_populate_hgvs_pipeline, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that the job works with the ARQ context fixture in a pipeline.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ): + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify mapped variant was updated + mapped_variant = session.query(MappedVariant).first() + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + # Verify annotation status + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + # Verify job and pipeline status + session.refresh(sample_populate_hgvs_run_pipeline) + assert sample_populate_hgvs_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_hgvs_pipeline) + assert sample_populate_hgvs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_hgvs_job, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that exceptions are handled with the ARQ context fixture.""" + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify job errored + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.ERRORED + + async def test_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_hgvs_pipeline, + sample_populate_hgvs_run_pipeline, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that exceptions in pipeline context are handled.""" + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify job errored + session.refresh(sample_populate_hgvs_run_pipeline) + assert sample_populate_hgvs_run_pipeline.status == JobStatus.ERRORED + + # Verify pipeline failed + session.refresh(sample_populate_hgvs_pipeline) + assert sample_populate_hgvs_pipeline.status == PipelineStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py new file mode 100644 index 000000000..8f1bf1304 --- /dev/null +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -0,0 +1,1927 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus +from mavedb.models.target_gene import TargetGene +from mavedb.models.target_sequence import TargetSequence +from mavedb.worker.jobs.external_services.uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import ( + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + TEST_UNIPROT_SWISS_PROT_TYPE, + VALID_NT_ACCESSION, + VALID_UNIPROT_ACCESSION, +) + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsForScoreSetUnit: + """Unit tests for submit_uniprot_mapping_jobs_for_score_set function.""" + + async def test_submit_uniprot_mapping_jobs_no_targets( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no target genes are present.""" + + # Ensure the sample score set has no target genes + sample_score_set.target_genes = [] + session.commit() + + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no ACs are present in post mapped metadata.""" + + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when too many ACs are present in post mapped metadata.""" + + # Arrange the post mapped metadata to have multiple ACs + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION, "P67890"]}} + session.commit() + + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no jobs are submitted.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value=None, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": None, "accession": VALID_NT_ACCESSION} + } + + async def test_submit_uniprot_mapping_jobs_api_failure_raises( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test handling of UniProt API failure during job submission.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + pytest.raises(Exception, match="UniProt API failure"), + ): + await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test handling when dependent polling job is not available.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ): + result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify that the job metadata contains the submitted jobs (which were submitted before the error) + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + + async def test_submit_uniprot_mapping_jobs_successful_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Test successful submission of UniProt mapping jobs.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + async def test_submit_uniprot_mapping_jobs_partial_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Test partial submission of UniProt mapping jobs.""" + + # Add another target gene to the score set to simulate multiple submissions + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + session.add(new_target_gene) + session.commit() + + # Arrange the post mapped metadata to have a single AC for both target genes + target_gene_1 = sample_score_set.target_genes[0] + target_gene_1.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + target_gene_2 = new_target_gene + target_gene_2.post_mapped_metadata = {"protein": {"sequence_accessions": ["NM_000546"]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=["job_12345", None], + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + expected_submitted_jobs = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": None, "accession": "NM_000546"}, + } + + # Verify that the job metadata contains both submitted and failed jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsForScoreSetIntegration: + """Integration tests for submit_uniprot_mapping_jobs_for_score_set function.""" + + async def test_submit_uniprot_mapping_jobs_success_independent_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_called_once() + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending (non-pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + + async def test_submit_uniprot_mapping_jobs_success_pipeline_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + """Integration test for submitting UniProt mapping jobs in a pipeline context.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + + mock_submit_id_mapping.assert_called_once() + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert ( + sample_dummy_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] + == expected_submitted_jobs + ) + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is now queued (pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.QUEUED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.RUNNING + + async def test_submit_uniprot_mapping_jobs_no_targets( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no target genes are present.""" + + # Ensure the sample score set has no target genes + sample_score_set.target_genes = [] + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no ACs are present in post mapped metadata.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when too many ACs are present in post mapped metadata.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_propagates_exceptions( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_send_slack_job_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None + + # Verify that the submission job errored + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.ERRORED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no jobs are submitted.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value=None, + ), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": None, "accession": VALID_NT_ACCESSION} + } + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_partial_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for partial submission of UniProt mapping jobs.""" + + # Add another target gene to the score set to simulate multiple submissions + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + session.add(new_target_gene) + session.commit() + + # Add accessions to both target genes' post mapped metadata + for idx, tg in enumerate(sample_score_set.target_genes): + tg.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION + f"{idx:05d}"]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=["job_12345", None], + ), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + expected_submitted_jobs = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION + "00000"}, + "2": {"job_id": None, "accession": VALID_NT_ACCESSION + "00001"}, + } + + # Verify that the job metadata contains both submitted and failed jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and params were updated correctly + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == expected_submitted_jobs + + async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Integration test to ensure error is raised to the decorator when dependent polling job is not available.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + # Verify that the job metadata contains the job we submitted before the error + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + + # nothing to verify for dependent polling job since it does not exist + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsArqContext: + """Integration tests for submit_uniprot_mapping_jobs_for_score_set function in ARQ context.""" + + async def test_submit_uniprot_mapping_jobs_with_arq_context_independent( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending (non-pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + + async def test_submit_uniprot_mapping_jobs_with_arq_context_pipeline( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert ( + sample_dummy_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] + == expected_submitted_jobs + ) + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is now queued (pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.QUEUED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.RUNNING + + async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_independent( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None + + # Verify that the submission job errored + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.ERRORED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_pipeline( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_.get("submitted_jobs") is None + + # Verify that the submission job errored + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.ERRORED + + # Verify that the dependent polling job is now cancelled and no param changes were made + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.SKIPPED + assert sample_dummy_polling_job_for_submission_run_in_pipeline.job_params.get("mapping_jobs") == {} + + # Verify that the pipeline run status is failed + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.FAILED + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetUnit: + """Unit tests for poll_uniprot_mapping_jobs_for_score_set function.""" + + async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( + self, + session, + mock_worker_ctx, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Ensure there are no mapping jobs in the polling job params + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = {} + session.commit() + + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_results_not_ready( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=False, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.SERVICE_UNAVAILABLE + assert "1" in job_result.data["pending_target_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_no_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={"results": []}, # minimal response with no results + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.DATA_ERROR + assert "1" in job_result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_ambiguous_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={ + "results": [ + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": f"{VALID_UNIPROT_ACCESSION}", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": "P67890", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + ] + }, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.DATA_ERROR + assert "1" in job_result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_nonexistent_target( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job with a non-existent target gene ID + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "999": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.DATA_ERROR + assert "999" in job_result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_successful_update( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + async def test_poll_uniprot_mapping_jobs_partial_success( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have two mapping jobs + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": "job_67890", "accession": "NONEXISTENT_AC"}, + } + session.commit() + + # Add another target gene to the score set to correspond to the second mapping job + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + session.add(new_target_gene) + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=[True, False], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + side_effect=[ + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, # Successful result for the first mapping job + {"results": []}, # No results for the second mapping job + ], + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.SERVICE_UNAVAILABLE + assert str(new_target_gene.id) in job_result.data["pending_target_genes"] + + # Verify the target gene uniprot id has been updated for the successful mapping and + # remains None for the failed mapping + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + assert sample_score_set.target_genes[1].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_propagates_exceptions( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + pytest.raises(Exception) as exc_info, + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert str(exc_info.value) == "UniProt API failure" + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetIntegration: + """Integration tests for poll_uniprot_mapping_jobs_for_score_set function.""" + + async def test_poll_uniprot_mapping_jobs_success_independent_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_success_pipeline_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_poll_uniprot_mapping_jobs_run_in_pipeline.id + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded (this is the only job in the test pipeline) + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Ensure there are no mapping jobs in the polling job params + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = {} + session.commit() + + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job succeeded + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_partial_mapping_jobs( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have two mapping jobs + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": None, "accession": "NONEXISTENT_AC"}, + } + session.commit() + + # Add another target gene to the score set to correspond to the second mapping job + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + session.add(new_target_gene) + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=[True], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + side_effect=[TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE], + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED + + # Verify the target gene uniprot id has been updated for the successful mapping and + # remains None for the mapping with no job id + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + assert sample_score_set.target_genes[1].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job succeeded + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_results_not_ready( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=False, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.SERVICE_UNAVAILABLE + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # The decorator detects SERVICE_UNAVAILABLE as retryable and resets the job to PENDING + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.PENDING + + async def test_poll_uniprot_mapping_jobs_no_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={"results": []}, # minimal response with no results + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DATA_ERROR + assert "1" in result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_ambiguous_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={ + "results": [ + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": f"{VALID_UNIPROT_ACCESSION}", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": "P67890", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + ] + }, + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DATA_ERROR + assert "1" in result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_nonexistent_target( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job with a non-existent target gene ID + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "999": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DATA_ERROR + assert "999" in result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + mock_send_slack_job_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job errored + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetArqContext: + """Integration tests for poll_uniprot_mapping_jobs_for_score_set function with ARQ context.""" + + async def test_poll_uniprot_mapping_jobs_with_arq_context_independent( + self, + session, + arq_worker, + arq_redis, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_with_arq_context_pipeline( + self, + session, + arq_worker, + arq_redis, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", + sample_poll_uniprot_mapping_jobs_run_in_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded (this is the only job in the test pipeline) + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_independent( + self, + session, + arq_worker, + arq_redis, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify that the polling job errored + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pipeline( + self, + session, + arq_worker, + arq_redis, + mock_worker_ctx, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", + sample_poll_uniprot_mapping_jobs_run_in_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify that the polling job errored + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.ERRORED + + # Verify that the pipeline run status is failed + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.FAILED + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None diff --git a/tests/worker/jobs/external_services/test_variant_translation.py b/tests/worker/jobs/external_services/test_variant_translation.py new file mode 100644 index 000000000..0b1677df5 --- /dev/null +++ b/tests/worker/jobs/external_services/test_variant_translation.py @@ -0,0 +1,702 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus +from mavedb.models.variant_translation import VariantTranslation +from mavedb.worker.jobs.external_services.variant_translation import populate_variant_translations_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +# --- Unit Tests --- + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestPopulateVariantTranslationsUnit: + """Unit tests for the populate_variant_translations_for_score_set job.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + ): + """Test that the job succeeds with zero translations when no mapped variants exist.""" + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 0 + + async def test_variant_without_caid_no_translations( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a variant without a CAID results in no translations.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = None + session.commit() + + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 0 + + async def test_ca_allele_creates_translations( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a CA allele creates translations via PA lookup.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00001"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=["CA11111", "CA22222"], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + # 1 for PA00001->CA9765210 (the original CA), 2 for PA00001->CA11111 and PA00001->CA22222 + assert result.data["translations_created"] == 3 + + translations = session.scalars(select(VariantTranslation)).all() + assert len(translations) == 3 + + annotation = session.scalars(select(VariantAnnotationStatus)).one() + assert annotation is not None + + async def test_pa_allele_creates_translations( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a PA allele creates translations via CA lookup.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "PA99999" + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=["CA33333", "CA44444"], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 2 + + translations = session.scalars(select(VariantTranslation)).all() + assert len(translations) == 2 + aa_ids = {t.aa_clingen_id for t in translations} + assert aa_ids == {"PA99999"} + + async def test_multi_variant_caid_expanded( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that comma-separated CAIDs are expanded and each processed independently.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "CA55555,CA66666" + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00002"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + # PA00002->CA55555 and PA00002->CA66666 + assert result.data["translations_created"] == 2 + + async def test_ca_allele_no_pa_ids_skipped( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a CA allele with no canonical PA IDs results in a skip.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=[], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["alleles_skipped"] == 1 + assert result.data["translations_created"] == 0 + + annotation = session.scalars(select(VariantAnnotationStatus)).one() + assert annotation.status == "skipped" + + async def test_pa_allele_no_ca_ids_skipped( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a PA allele with no registered CA IDs results in a skip.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "PA88888" + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["alleles_skipped"] == 1 + assert result.data["translations_created"] == 0 + + async def test_ca_allele_api_failure_records_failed_annotation( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a ClinGen API failure for CA allele records a failed annotation.""" + import requests + + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=requests.exceptions.ConnectionError("Connection failed"), + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DEPENDENCY_FAILURE + assert result.data["alleles_failed"] == 1 + + annotation = session.scalars(select(VariantAnnotationStatus)).one() + assert annotation.status == "failed" + + async def test_unrecognized_allele_format_skipped( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that an unrecognized allele ID format is skipped.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "XX12345" + session.commit() + + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["alleles_skipped"] == 1 + + async def test_duplicate_translations_not_created( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that duplicate translations are not created on re-run.""" + # Pre-populate a translation + session.add(VariantTranslation(aa_clingen_id="PA00003", nt_clingen_id="CA9765210")) + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00003"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 0 + + translations = session.scalars( + select(VariantTranslation).where(VariantTranslation.aa_clingen_id == "PA00003") + ).all() + assert len(translations) == 1 + + async def test_propagates_exceptions( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that unexpected exceptions are propagated.""" + with patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=Exception("Test exception"), + ): + with pytest.raises(Exception) as exc_info: + await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert str(exc_info.value) == "Test exception" + + async def test_total_api_failure_returns_failed( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that the job returns FAILED when all variant translation lookups fail.""" + import requests + + with patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=requests.exceptions.ConnectionError("Connection failed"), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DEPENDENCY_FAILURE + assert result.data["alleles_failed"] == 1 + assert result.data["translations_created"] == 0 + + +# --- Integration Tests --- + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateVariantTranslationsIntegration: + """Integration tests that exercise the full decorator stack.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + ): + """Test end-to-end when no mapped variants exist.""" + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, sample_populate_variant_translations_run.id + ) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + async def test_successful_job_updates_status( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a successful job run updates the job status to SUCCEEDED.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00004"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=["CA77777"], + ), + ): + await populate_variant_translations_for_score_set( + mock_worker_ctx, + sample_populate_variant_translations_run.id, + ) + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + translations = session.scalars(select(VariantTranslation)).all() + assert len(translations) == 2 # PA00004->CA9765210 and PA00004->CA77777 + + async def test_job_with_pipeline_updates_pipeline_status( + self, + session, + with_populated_domain_data, + mock_worker_ctx, + sample_populate_variant_translations_run_pipeline, + sample_populate_variant_translations_pipeline, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a job in a pipeline updates the pipeline status on success.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00005"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + await populate_variant_translations_for_score_set( + mock_worker_ctx, + sample_populate_variant_translations_run_pipeline.id, + ) + + session.refresh(sample_populate_variant_translations_run_pipeline) + session.refresh(sample_populate_variant_translations_pipeline) + assert sample_populate_variant_translations_run_pipeline.status == JobStatus.SUCCEEDED + assert sample_populate_variant_translations_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_variant_without_caid_creates_skipped_annotation( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that variants without CAIDs produce no annotations (filtered before processing).""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = None + session.commit() + + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, sample_populate_variant_translations_run.id + ) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 0 + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + async def test_unrecognized_allele_creates_skipped_annotation( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that unrecognized allele formats create skipped annotations through the full stack.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "XX12345" + session.commit() + + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, sample_populate_variant_translations_run.id + ) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "skipped" + assert annotation_statuses[0].annotation_type == "variant_translation" + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + async def test_exceptions_handled_by_decorators( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that unexpected exceptions are handled by decorators.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + sample_populate_variant_translations_run.id, + ) + + mock_send_slack_job_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.ERRORED + + +# --- ARQ Context Tests --- + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateVariantTranslationsArqContext: + """Tests for populate_variant_translations_for_score_set job using the ARQ context fixture.""" + + async def test_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that the job works with the ARQ context fixture.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00006"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=["CA88888"], + ), + ): + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + translations = session.scalars(select(VariantTranslation)).all() + assert len(translations) == 2 # PA00006->CA9765210 and PA00006->CA88888 + + async def test_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_variant_translations_run_pipeline, + sample_populate_variant_translations_pipeline, + setup_sample_variants_with_caid_for_translation, + ): + """Test that the job works with the ARQ context fixture in a pipeline.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00007"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "variant_translation" + + session.refresh(sample_populate_variant_translations_run_pipeline) + assert sample_populate_variant_translations_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_variant_translations_pipeline) + assert sample_populate_variant_translations_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that exceptions are handled with the ARQ context fixture.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.ERRORED + + async def test_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_variant_translations_pipeline, + sample_populate_variant_translations_run_pipeline, + setup_sample_variants_with_caid_for_translation, + ): + """Test that exceptions in pipeline context are handled.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_variant_translations_run_pipeline) + assert sample_populate_variant_translations_run_pipeline.status == JobStatus.ERRORED + + session.refresh(sample_populate_variant_translations_pipeline) + assert sample_populate_variant_translations_pipeline.status == PipelineStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_vep.py b/tests/worker/jobs/external_services/test_vep.py new file mode 100644 index 000000000..a9c14b05d --- /dev/null +++ b/tests/worker/jobs/external_services/test_vep.py @@ -0,0 +1,370 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, JobStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus +from mavedb.worker.jobs.external_services.vep import populate_vep_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestPopulateVepForScoreSetUnit: + """Unit tests for populate_vep_for_score_set.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_vep_job, + mock_worker_ctx, + sample_populate_vep_run, + ): + """Job succeeds with zero counts when no mapped variants exist.""" + result = await populate_vep_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_vep_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["variants_processed"] == 0 + assert result.data["variants_with_consequences"] == 0 + assert result.data["variants_recoder_failed"] == 0 + + async def test_variant_without_hgvs_assay_level_skipped( + self, + session, + with_populated_domain_data, + with_populate_vep_job, + mock_worker_ctx, + sample_populate_vep_run, + setup_sample_variants_for_vep, + ): + """A mapped variant with no hgvs_assay_level gets a SKIPPED annotation.""" + _, mapped_variant = setup_sample_variants_for_vep + mapped_variant.hgvs_assay_level = None + session.commit() + + result = await populate_vep_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_vep_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["variants_processed"] == 0 + + annotation = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == mapped_variant.variant_id, + VariantAnnotationStatus.annotation_type == AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + ) + ).one() + assert annotation.status == AnnotationStatus.SKIPPED + assert annotation.failure_category == AnnotationFailureCategory.MISSING_IDENTIFIER + + async def test_vep_api_success_sets_consequence_and_annotation( + self, + session, + with_populated_domain_data, + with_populate_vep_job, + mock_worker_ctx, + sample_populate_vep_run, + setup_sample_variants_for_vep, + ): + """VEP returns a consequence: mapped variant and SUCCESS annotation are updated.""" + _, mapped_variant = setup_sample_variants_for_vep + hgvs = mapped_variant.hgvs_assay_level + + with patch( + "mavedb.worker.jobs.external_services.vep.get_functional_consequence", + return_value={hgvs: "missense_variant"}, + ): + result = await populate_vep_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_vep_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["variants_processed"] == 1 + assert result.data["variants_with_consequences"] == 1 + assert result.data["variants_recoder_failed"] == 0 + + session.refresh(mapped_variant) + assert mapped_variant.vep_functional_consequence == "missense_variant" + assert mapped_variant.vep_access_date is not None + + annotation = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == mapped_variant.variant_id, + VariantAnnotationStatus.annotation_type == AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + ) + ).one() + assert annotation.status == AnnotationStatus.SUCCESS + + async def test_vep_missing_triggers_variant_recoder_fallback( + self, + session, + with_populated_domain_data, + with_populate_vep_job, + mock_worker_ctx, + sample_populate_vep_run, + setup_sample_variants_for_vep, + ): + """When VEP misses a variant, Variant Recoder is called and its result fed back to VEP.""" + _, mapped_variant = setup_sample_variants_for_vep + hgvs = mapped_variant.hgvs_assay_level + genomic_hgvs = "NC_000017.11:g.43094692C>T" + + with ( + patch( + "mavedb.worker.jobs.external_services.vep.get_functional_consequence", + side_effect=[ + {}, # initial VEP pass returns nothing + {genomic_hgvs: "missense_variant"}, # second VEP pass on recoded HGVS + ], + ), + patch( + "mavedb.worker.jobs.external_services.vep.run_variant_recoder", + return_value={hgvs: [genomic_hgvs]}, + ), + ): + result = await populate_vep_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_vep_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["variants_with_consequences"] == 1 + assert result.data["variants_recoder_failed"] == 0 + + session.refresh(mapped_variant) + assert mapped_variant.vep_functional_consequence == "missense_variant" + + async def test_variant_recoder_failure_annotated_as_failed( + self, + session, + with_populated_domain_data, + with_populate_vep_job, + mock_worker_ctx, + sample_populate_vep_run, + setup_sample_variants_for_vep, + ): + """Variant Recoder returning no result for an HGVS produces a FAILED annotation.""" + _, mapped_variant = setup_sample_variants_for_vep + + with ( + patch( + "mavedb.worker.jobs.external_services.vep.get_functional_consequence", + return_value={}, + ), + patch( + "mavedb.worker.jobs.external_services.vep.run_variant_recoder", + return_value={}, # recoder has no result + ), + ): + result = await populate_vep_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_vep_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["variants_without_consequences"] == 0 + assert result.data["variants_recoder_failed"] == 1 + + annotation = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == mapped_variant.variant_id, + VariantAnnotationStatus.annotation_type == AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + ) + ).one() + assert annotation.status == AnnotationStatus.FAILED + assert annotation.failure_category == AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND + + async def test_vep_failure_after_recoder_annotated_as_failed( + self, + session, + with_populated_domain_data, + with_populate_vep_job, + mock_worker_ctx, + sample_populate_vep_run, + setup_sample_variants_for_vep, + ): + """VEP returning no consequence even after Variant Recoder produces a FAILED annotation.""" + _, mapped_variant = setup_sample_variants_for_vep + hgvs = mapped_variant.hgvs_assay_level + genomic_hgvs = "NC_000017.11:g.43094692C>T" + + with ( + patch( + "mavedb.worker.jobs.external_services.vep.get_functional_consequence", + return_value={}, # VEP returns nothing in both passes + ), + patch( + "mavedb.worker.jobs.external_services.vep.run_variant_recoder", + return_value={hgvs: [genomic_hgvs]}, + ), + ): + result = await populate_vep_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_vep_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["variants_without_consequences"] == 1 + assert result.data["variants_recoder_failed"] == 0 + + annotation = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == mapped_variant.variant_id, + VariantAnnotationStatus.annotation_type == AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + ) + ).one() + assert annotation.status == AnnotationStatus.FAILED + assert annotation.failure_category == AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND + + async def test_vep_batch_api_exception_returns_errored( + self, + session, + with_populated_domain_data, + with_populate_vep_job, + mock_worker_ctx, + sample_populate_vep_run, + setup_sample_variants_for_vep, + ): + """An unexpected exception from the VEP API returns an ERRORED outcome.""" + with patch( + "mavedb.worker.jobs.external_services.vep.get_functional_consequence", + side_effect=RuntimeError("VEP API unreachable"), + ): + result = await populate_vep_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_vep_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + + async def test_variant_recoder_api_exception_returns_errored( + self, + session, + with_populated_domain_data, + with_populate_vep_job, + mock_worker_ctx, + sample_populate_vep_run, + setup_sample_variants_for_vep, + ): + """An unexpected exception from the Variant Recoder API returns an ERRORED outcome.""" + with ( + patch( + "mavedb.worker.jobs.external_services.vep.get_functional_consequence", + return_value={}, + ), + patch( + "mavedb.worker.jobs.external_services.vep.run_variant_recoder", + side_effect=RuntimeError("Recoder API unreachable"), + ), + ): + result = await populate_vep_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_vep_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateVepForScoreSetIntegration: + """Integration tests for populate_vep_for_score_set run through an ARQ worker context.""" + + async def test_populate_vep_with_arq_context( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_vep_job, + sample_populate_vep_run, + setup_sample_variants_for_vep, + ): + """Job completes successfully within an ARQ worker context.""" + _, mapped_variant = setup_sample_variants_for_vep + hgvs = mapped_variant.hgvs_assay_level + + with patch( + "mavedb.worker.jobs.external_services.vep.get_functional_consequence", + return_value={hgvs: "missense_variant"}, + ): + await arq_redis.enqueue_job("populate_vep_for_score_set", sample_populate_vep_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(sample_populate_vep_run) + assert sample_populate_vep_run.status == JobStatus.SUCCEEDED + + session.refresh(mapped_variant) + assert mapped_variant.vep_functional_consequence == "missense_variant" + + annotation = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == mapped_variant.variant_id, + VariantAnnotationStatus.annotation_type == AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + ) + ).one() + assert annotation.status == AnnotationStatus.SUCCESS + + async def test_populate_vep_in_pipeline_context( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_vep_run_pipeline, + sample_populate_vep_pipeline, + setup_sample_variants_for_vep, + ): + """Job completes and advances the pipeline when run in a pipeline context.""" + from mavedb.models.enums.job_pipeline import PipelineStatus + + _, mapped_variant = setup_sample_variants_for_vep + hgvs = mapped_variant.hgvs_assay_level + + with patch( + "mavedb.worker.jobs.external_services.vep.get_functional_consequence", + return_value={hgvs: "synonymous_variant"}, + ): + await arq_redis.enqueue_job("populate_vep_for_score_set", sample_populate_vep_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(sample_populate_vep_run_pipeline) + assert sample_populate_vep_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_vep_pipeline) + assert sample_populate_vep_pipeline.status == PipelineStatus.SUCCEEDED diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py new file mode 100644 index 000000000..e43f07522 --- /dev/null +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -0,0 +1,288 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.jobs.pipeline_management.start_pipeline import start_pipeline +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestStartPipelineUnit: + """Unit tests for starting pipelines.""" + + @pytest.fixture(autouse=True) + def setup_start_pipeline_job_run(self, session, with_dummy_pipeline, sample_dummy_pipeline): + """Fixture to ensure a start pipeline job run exists in the database.""" + job_run = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="start_pipeline", + job_function="start_pipeline", + ) + session.add(job_run) + session.commit() + + return job_run + + async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_job( + self, + session, + mock_worker_ctx, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline raises an exception when no pipeline is associated with the job.""" + + # Remove pipeline association from job run + setup_start_pipeline_job_run.pipeline_id = None + session.commit() + + result = await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.error == "No pipeline associated with this job." + assert result.exception is None + + async def test_start_pipeline_starts_pipeline_successfully( + self, + session, + mock_worker_ctx, + mock_pipeline_manager, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline completes successfully.""" + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, + patch.object(PipelineManager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + result = await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + mock_coordinate_pipeline.assert_called_once() + + async def test_start_pipeline_raises_exception( + self, + session, + mock_worker_ctx, + mock_pipeline_manager, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline raises an exception.""" + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, + patch.object( + PipelineManager, + "coordinate_pipeline", + side_effect=Exception("Simulated pipeline start failure"), + ), + pytest.raises(Exception, match="Simulated pipeline start failure"), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestStartPipelineIntegration: + """Integration tests for starting pipelines.""" + + async def test_start_pipeline_on_job_without_pipeline_fails( + self, + session, + mock_worker_ctx, + with_full_dummy_pipeline, + sample_dummy_pipeline_start, + ): + """Test that starting a pipeline on a job without an associated pipeline fails.""" + + sample_dummy_pipeline_start.pipeline_id = None + session.commit() + + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure: + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + mock_send_slack_job_failure.assert_called_once() + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.FAILED + + async def test_start_pipeline_on_valid_job_succeeds_and_coordinates_pipeline( + self, session, mock_worker_ctx, with_full_dummy_pipeline, sample_dummy_pipeline_start, sample_dummy_pipeline + ): + """Test that starting a pipeline on a valid job succeeds and coordinates the pipeline.""" + + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.RUNNING + + async def test_start_pipeline_handles_exceptions_gracefully( + self, + session, + mock_worker_ctx, + with_full_dummy_pipeline, + sample_dummy_pipeline, + sample_dummy_pipeline_start, + ): + """Test that starting a pipeline handles exceptions gracefully.""" + # Mock a coordination failure during pipeline start. Realistically if this failed in pipeline start + # it would likely also fail during the final coordination attempt in the exception handler, but for testing purposes + # we only mock the initial failure here. In a real-world scenario, we'd likely have to rely on our alerting here and + # intervene manually or via a separate recovery job to fix the pipeline state. + real_coordinate_pipeline = PipelineManager.coordinate_pipeline + call_count = {"n": 0} + + async def custom_side_effect(*args, **kwargs): + if call_count["n"] == 0: + call_count["n"] += 1 + raise Exception("Simulated pipeline start failure") + return await real_coordinate_pipeline( + PipelineManager(session, session, sample_dummy_pipeline.id), *args, **kwargs + ) # Allow the final coordination attempt to proceed 'normally' + + with ( + patch( + "mavedb.worker.lib.managers.pipeline_manager.PipelineManager.coordinate_pipeline", + side_effect=custom_side_effect, + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + mock_send_slack_job_error.assert_called_once() + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.ERRORED + + # Verify that the pipeline state is updated to FAILED + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.FAILED + + async def test_start_pipeline_no_jobs_in_pipeline( + self, + session, + mock_worker_ctx, + with_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline that has no jobs defined.""" + + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify that a JobRun was created for the start_pipeline job and it succeeded + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.SUCCEEDED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestStartPipelineArqContext: + """Test starting pipelines using an ARQ worker context.""" + + async def test_start_pipeline_with_arq_context( + self, + session, + arq_redis, + arq_worker, + with_full_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline using an ARQ worker context.""" + + await arq_redis.enqueue_job("start_pipeline", sample_dummy_pipeline_start.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.RUNNING + + # Verify that other pipeline steps have been queued + pipeline_steps = ( + session.execute( + select(JobRun).where( + JobRun.pipeline_id == sample_dummy_pipeline.id, JobRun.id != sample_dummy_pipeline_start.id + ) + ) + .scalars() + .all() + ) + assert len(pipeline_steps) == 1 + assert pipeline_steps[0].job_type == "dummy_step" + assert pipeline_steps[0].status == JobStatus.QUEUED + + async def test_start_pipeline_with_arq_context_no_jobs_in_pipeline( + self, + session, + arq_redis, + arq_worker, + with_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline with no jobs using an ARQ worker context.""" + + await arq_redis.enqueue_job("start_pipeline", sample_dummy_pipeline_start.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that a JobRun was created for the start_pipeline job and it succeeded + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.SUCCEEDED diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py new file mode 100644 index 000000000..927679741 --- /dev/null +++ b/tests/worker/jobs/system/test_cleanup.py @@ -0,0 +1,2526 @@ +# ruff: noqa: E402 +"""Comprehensive tests for the cleanup_stalled_jobs worker function. + +Tests cover: +- Unit tests: Mock database queries and verify cleanup logic +- Integration tests: Use real database and verify end-to-end behavior +- ARQ integration tests: Verify full worker integration +- Edge cases: Empty results, multiple jobs, different states +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, patch + +from arq.constants import result_key_prefix +from arq.jobs import JobStatus as ArqJobStatus +from arq.jobs import job_key_prefix +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums import DependencyType +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.jobs.system.cleanup import ( + PENDING_TIMEOUT_MINUTES, + PIPELINE_STUCK_TIMEOUT_MINUTES, + RUNNING_TIMEOUT_MINUTES, + cleanup_stalled_jobs, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.utils import arq_job_id +from tests.helpers.transaction_spy import TransactionSpy + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.fixture +def mock_arq_job_not_found(): + """Mock ArqJob.status() to return not_found, simulating a crashed-enqueue QUEUED job.""" + with patch("mavedb.worker.jobs.system.cleanup.ArqJob") as mock_arq_job: + mock_arq_job.return_value.status = AsyncMock(return_value=ArqJobStatus.not_found) + yield mock_arq_job + + +@pytest.fixture +def mock_arq_job_in_redis(): + """Mock ArqJob.status() to return queued, simulating a legitimately queued job in ARQ Redis.""" + with patch("mavedb.worker.jobs.system.cleanup.ArqJob") as mock_arq_job: + mock_arq_job.return_value.status = AsyncMock(return_value=ArqJobStatus.queued) + yield mock_arq_job + + +############################################################################################################################################ +# Unit Tests +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestCleanupStalledJobsUnit: + """Unit tests for the cleanup_stalled_jobs function.""" + + async def test_cleanup_with_no_stalled_jobs( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup when no stalled jobs are found.""" + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + assert result.data["queued_jobs"] == [] + assert result.data["running_jobs"] == [] + assert result.data["pending_jobs"] == [] + + async def test_cleanup_stalled_queued_job_with_retries_remaining( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test cleanup of a stalled QUEUED job with retries remaining.""" + # Create a stalled QUEUED job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["queued_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # job was re-enqueued but not yet started, so it remains QUEUED + assert stalled_job.retry_count == 1 + assert stalled_job.started_at is None + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_queued_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test cleanup of a stalled QUEUED job with max retries reached.""" + # Create a stalled QUEUED job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["queued_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_running_job_with_retries( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled RUNNING job with retries remaining.""" + # Create a stalled RUNNING job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=1, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["running_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Moved back to QUEUED for retry + assert stalled_job.retry_count == 2 # Incremented from 1 + assert stalled_job.started_at is None # Cleared for retry + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_running_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled RUNNING job with max retries reached.""" + # Create a stalled RUNNING job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["running_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_running_job_missing_started_at( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup handles RUNNING job with missing started_at timestamp.""" + # Add session to worker context for real DB operations + mock_worker_ctx["db"] = session + + # Create a RUNNING job without started_at (data inconsistency) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=None, # Missing timestamp + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_error") as mock_slack: + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + # Job should be skipped (not cleaned up) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + + # Slack error should have been sent + mock_slack.assert_called_once() + + # Job should remain unchanged + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.RUNNING + assert stalled_job.retry_count == 0 + + async def test_cleanup_stalled_pending_job_with_retries( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled PENDING job with retries remaining.""" + # Create a stalled PENDING job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["pending_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Moved back to QUEUED for retry + assert stalled_job.retry_count == 1 # Incremented from 0 + assert stalled_job.started_at is None + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_pending_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled PENDING job with max retries reached.""" + # Create a stalled PENDING job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["pending_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_pending_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled PENDING job is marked FAILED if ARQ enqueue fails.""" + # Create a stalled PENDING job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_multiple_stalled_jobs_mixed_states( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test cleanup of multiple stalled jobs in different states.""" + # Create a pipeline and stalled jobs in all three states + test_pipeline = Pipeline( + urn="test:pipeline:multi", + name="Test Pipeline Multi", + description="Pipeline for multi-job test", + status=PipelineStatus.CREATED, + correlation_id="test_multi", + ) + session.add(test_pipeline) + session.flush() + + stalled_queued = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + stalled_running = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 1), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + stalled_pending = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 1), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([stalled_queued, stalled_running, stalled_pending]) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 3 + assert stalled_queued.urn in result.data["queued_jobs"] + assert stalled_running.urn in result.data["running_jobs"] + assert stalled_pending.urn in result.data["pending_jobs"] + + # Verify all jobs were updated correctly + session.refresh(stalled_queued) + session.refresh(stalled_running) + session.refresh(stalled_pending) + # All jobs should be QUEUED after successful retry and enqueue + assert stalled_queued.status == JobStatus.QUEUED + assert stalled_queued.retry_count == 1 + assert stalled_running.status == JobStatus.QUEUED + assert stalled_running.retry_count == 1 + assert stalled_pending.status == JobStatus.QUEUED + assert stalled_pending.retry_count == 1 + + async def test_cleanup_stalled_queued_standalone_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test that stalled standalone QUEUED job is marked FAILED if ARQ enqueue fails.""" + + # Create a stalled QUEUED job WITHOUT pipeline_id (standalone) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=None, # Standalone job + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_stalled_running_standalone_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled standalone RUNNING job is marked FAILED if ARQ enqueue fails.""" + + # Create a stalled RUNNING job WITHOUT pipeline_id (standalone) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=None, # Standalone job + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_satisfied( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test that stalled pipeline QUEUED job with satisfied dependencies is enqueued.""" + # Create a pipeline with all dependencies satisfied + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_ok", + name="Test Pipeline Queued Deps OK", + description="Pipeline for queued job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create a stalled QUEUED job WITH pipeline_id + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, # Part of pipeline + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify job was enqueued (dependencies were satisfied) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_satisfied( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with satisfied dependencies is enqueued.""" + # Create a pipeline with all dependencies satisfied + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_ok", + name="Test Pipeline Running Deps OK", + description="Pipeline for running job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create a stalled RUNNING job WITH pipeline_id + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, # Part of pipeline + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify job was enqueued (dependencies were satisfied) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test that stalled pipeline QUEUED job with failed dependencies is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_failed", + name="Test Pipeline Queued Deps Failed", + description="Pipeline for queued job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify the job was immediately skipped (no retry bookkeeping for unfulfillable deps) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test that stalled pipeline QUEUED job with unmet dependencies stays PENDING.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_not_ready", + name="Test Pipeline Queued Deps Not Ready", + description="Pipeline for queued job with unmet dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that's still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the running job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies not ready) + # Job should remain in PENDING state waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with failed dependencies is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_failed", + name="Test Pipeline Running Deps Failed", + description="Pipeline for running job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + # Use recent created_at to avoid being detected as stalled PENDING after reset from RUNNING + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify the job was immediately skipped (no retry bookkeeping for unfulfillable deps) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 + + async def test_cleanup_stalled_pending_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline PENDING job with failed dependencies is cleaned up.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_failed", + name="Test Pipeline Pending Deps Failed", + description="Pipeline for pending job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify the job was immediately skipped (no retry bookkeeping for unfulfillable deps) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with dependencies not ready is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_not_ready", + name="Test Pipeline Running Deps Not Ready", + description="Pipeline for running job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies not ready) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_pending_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that blocked pipeline PENDING job with dependencies not ready is not treated as stalled.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_not_ready", + name="Test Pipeline Pending Deps Not Ready", + description="Pipeline for pending job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + + # Verify job was left untouched because dependencies are not satisfied + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 0 + + async def test_cleanup_stalled_pending_pipeline_completion_required_dependency_cancelled( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline PENDING job with cancelled completion-required dependency is cleaned up.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_completion_cancelled", + name="Test Pipeline Pending Completion Cancelled", + description="Pipeline for pending job with cancelled completion-required dependency", + status=PipelineStatus.CREATED, + correlation_id="test_pending_completion_cancelled", + ) + session.add(test_pipeline) + session.flush() + + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.CANCELLED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.COMPLETION_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 + + async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_in_redis + ): + """Test that cleanup does not alter jobs that are not stalled.""" + # Create a non-stalled RUNNING job + valid_running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=30), + started_at=datetime.now(timezone.utc) - timedelta(minutes=25), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + # Create a non-stalled PENDING job in a pipeline (well within timeout) + test_pipeline = Pipeline( + urn="test:pipeline:valid", + name="Test Pipeline Valid", + description="Pipeline for valid job test", + status=PipelineStatus.CREATED, + correlation_id="test_valid", + ) + session.add(test_pipeline) + session.flush() + valid_pending_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), # 5 min before timeout + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + # Create a non-stalled QUEUED job (well within timeout) + valid_queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) + - timedelta(minutes=5), # legitimately present in ARQ Redis (mock_arq_job_in_redis) + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([valid_running_job, valid_pending_job, valid_queued_job]) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + + # Verify the valid job was not altered + session.refresh(valid_running_job) + assert valid_running_job.status == JobStatus.RUNNING + session.refresh(valid_pending_job) + assert valid_pending_job.status == JobStatus.PENDING + session.refresh(valid_queued_job) + assert valid_queued_job.status == JobStatus.QUEUED + + @pytest.mark.parametrize( + "pipeline_status", + [PipelineStatus.RUNNING, PipelineStatus.CREATED], + ) + async def test_cleanup_calls_coordinate_pipeline_for_stuck_pipeline( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, pipeline_status + ): + """Unit test: coordinate_pipeline() is called for each non-terminal pipeline with no active jobs.""" + test_pipeline = Pipeline( + urn=f"test:pipeline:stuck:{pipeline_status.value}", + name="Test Stuck Pipeline", + description="Stuck pipeline for unit test", + status=pipeline_status, + correlation_id=f"unit_test_stuck_{pipeline_status.value}", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + # Add a terminal job so query filter passes + terminal_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.SUCCEEDED, + pipeline_id=None, # set after flush + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(test_pipeline) + session.flush() + terminal_job.pipeline_id = test_pipeline.id + session.add(terminal_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.PipelineManager") as mock_pm_class: + mock_pm = AsyncMock() + mock_pm.coordinate_pipeline = AsyncMock() + mock_pm_class.return_value = mock_pm + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn in result.data["fixed_pipelines"] + mock_pm.coordinate_pipeline.assert_awaited_once() + + async def test_cleanup_coordinate_pipeline_exception_is_caught_and_reported( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Unit test: if coordinate_pipeline() raises, the error is caught, logged, and Slack-reported.""" + test_pipeline = Pipeline( + urn="test:pipeline:error", + name="Test Error Pipeline", + description="Pipeline that will raise on coordinate", + status=PipelineStatus.RUNNING, + correlation_id="unit_test_error", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + session.flush() + terminal_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.SUCCEEDED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(terminal_job) + session.commit() + + with ( + patch("mavedb.worker.jobs.system.cleanup.PipelineManager") as mock_pm_class, + patch("mavedb.worker.jobs.system.cleanup.send_slack_error") as mock_slack, + ): + mock_pm = AsyncMock() + mock_pm.coordinate_pipeline = AsyncMock(side_effect=RuntimeError("coordinate failed")) + mock_pm_class.return_value = mock_pm + + # Should not raise — exception is caught inside the loop + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn not in result.data["fixed_pipelines"] + mock_slack.assert_called_once() + + async def test_cleanup_sends_slack_when_max_retries_reached_queued_job( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test that send_slack_job_failure is called when sweeper permanently fails a stalled QUEUED job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["job_urn"] == stalled_job.urn + assert call_kwargs["retry_count"] == 3 + assert call_kwargs["max_retries"] == 3 + + async def test_cleanup_sends_slack_when_max_retries_reached_running_job( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that send_slack_job_failure is called when sweeper permanently fails a stalled RUNNING job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["job_urn"] == stalled_job.urn + + async def test_cleanup_sends_slack_when_max_retries_reached_pending_job( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that send_slack_job_failure is called when sweeper permanently fails a stalled PENDING job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["job_urn"] == stalled_job.urn + + async def test_cleanup_sends_slack_on_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that send_slack_job_failure is called when sweeper fails to re-enqueue a stalled job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["job_urn"] == stalled_job.urn + assert "Failed to enqueue" in call_kwargs["reason"] + + async def test_cleanup_does_not_send_slack_when_job_is_retried( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that send_slack_job_failure is NOT called when the sweeper successfully retries a stalled job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, # Has retries remaining + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_not_called() + assert result.data["total_cleaned"] == 1 + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCleanupStalledJobsIntegration: + """Integration tests for cleanup_stalled_jobs with real database.""" + + async def test_cleanup_integration_no_stalled_jobs(self, standalone_worker_context, session): + """Integration test: cleanup with no stalled jobs.""" + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify the cleanup job itself was created and succeeded + cleanup_job = session.execute( + select(JobRun).where(JobRun.job_function == "cleanup_stalled_jobs") + ).scalar_one_or_none() + + assert cleanup_job is not None + assert cleanup_job.status == JobStatus.SUCCEEDED + assert cleanup_job.job_type == "cron_job" + + # Verify no jobs were cleaned + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + + async def test_cleanup_integration_stalled_queued_job_gets_retried(self, standalone_worker_context, session): + """Integration test: stalled QUEUED job is retried.""" + # Create a stalled QUEUED job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify the stalled job was reset to PENDING for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_running_job_gets_retried(self, standalone_worker_context, session): + """Integration test: stalled RUNNING job is retried.""" + # Create a stalled RUNNING job (simulating worker crash) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify the stalled job was reset to PENDING for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + assert stalled_job.error_message is None # Cleared on retry + assert stalled_job.finished_at is None # Cleared on retry + + async def test_cleanup_integration_max_retries_reached_fails_job(self, standalone_worker_context, session): + """Integration test: stalled job with max retries is failed.""" + # Create a stalled job at max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify the stalled job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_integration_pending_job_in_pipeline(self, standalone_worker_context, session): + """Integration test: stalled PENDING job in pipeline is retried.""" + test_pipeline = Pipeline( + urn="test:pipeline:cleanup", + name="Test Cleanup Pipeline", + description="Pipeline for cleanup test", + status=PipelineStatus.CREATED, + correlation_id="test_cleanup_correlation", + ) + session.add(test_pipeline) + session.flush() # Get the pipeline ID + + # Create a stalled PENDING job in the pipeline + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, # Reference the real pipeline + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Verify the stalled job was reset for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_excludes_recent_jobs(self, standalone_worker_context, session): + """Integration test: jobs not treated as stalled are left alone. + + RUNNING jobs are protected by the time threshold. + QUEUED jobs are protected by the ARQ Redis presence check. + """ + # Create jobs that should not be cleaned up + recent_queued = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=1), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + recent_running = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=30), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES - 5), # Within threshold + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([recent_queued, recent_running]) + session.commit() + + # Enqueue recent_queued in ARQ Redis so the Redis presence check marks it as + # legitimately queued (not a crashed-enqueue job). + arq_redis = standalone_worker_context["redis"] + await arq_redis.enqueue_job("test_function", recent_queued.id, _job_id=arq_job_id(recent_queued)) + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify no jobs were cleaned + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + + # Verify jobs remain unchanged + session.refresh(recent_queued) + session.refresh(recent_running) + assert recent_queued.status == JobStatus.QUEUED + assert recent_running.status == JobStatus.RUNNING + assert recent_queued.retry_count == 0 + assert recent_running.retry_count == 0 + + async def test_cleanup_integration_updates_progress_correctly(self, standalone_worker_context, session): + """Integration test: cleanup job updates progress correctly and returns proper data.""" + # Create stalled jobs to trigger progress updates across different states + queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add_all([queued_job, running_job]) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded with progress through all states + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 2 + + # Verify result structure contains detailed breakdown + assert "queued_jobs" in result.data + assert "running_jobs" in result.data + assert "pending_jobs" in result.data + + # Verify both jobs were processed + assert len(result.data["queued_jobs"]) == 1 + assert len(result.data["running_jobs"]) == 1 + assert len(result.data["pending_jobs"]) == 0 + + async def test_cleanup_integration_stalled_running_job_max_retries_reached( + self, standalone_worker_context, session + ): + """Integration test: stalled RUNNING job at max retries is failed.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.retry_count == 3 + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + + async def test_cleanup_integration_stalled_running_job_missing_started_at(self, standalone_worker_context, session): + """Integration test: stalled RUNNING job without started_at is skipped (not cleaned).""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + started_at=None, # Missing started_at - causes job to be skipped + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Job is skipped (not cleaned) when started_at is missing + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + + # Job remains unchanged + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.RUNNING + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_stalled_pending_job_with_retries(self, standalone_worker_context, session): + """Integration test: stalled PENDING job is retried.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_pending_job_max_retries_reached( + self, standalone_worker_context, session + ): + """Integration test: stalled PENDING job at max retries is failed.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.retry_count == 3 + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + + async def test_cleanup_integration_multiple_stalled_jobs_mixed_states(self, standalone_worker_context, session): + """Integration test: cleanup handles multiple jobs in different states.""" + queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + pending_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([queued_job, running_job, pending_job]) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 3 + + session.refresh(queued_job) + session.refresh(running_job) + session.refresh(pending_job) + + assert queued_job.status == JobStatus.QUEUED + assert running_job.status == JobStatus.QUEUED + assert pending_job.status == JobStatus.QUEUED + assert queued_job.retry_count == 1 + assert running_job.retry_count == 1 + assert pending_job.retry_count == 1 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_satisfied( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with satisfied dependencies is enqueued.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_ok", + name="Test Pipeline Queued Deps OK", + description="Pipeline for queued job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that succeeded + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.SUCCEEDED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on successful job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with failed dependencies is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_failed", + name="Test Pipeline Queued Deps Failed", + description="Pipeline for queued job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Job should be skipped immediately (no retry bookkeeping for unfulfillable deps) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with dependencies not ready is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_not_ready", + name="Test Pipeline Queued Deps Not Ready", + description="Pipeline for queued job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Job should be in PENDING, waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline RUNNING job with failed dependencies is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_failed", + name="Test Pipeline Running Deps Failed", + description="Pipeline for running job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Job should be skipped immediately (no retry bookkeeping for unfulfillable deps) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline PENDING job with failed dependencies is cleaned up.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_failed", + name="Test Pipeline Pending Deps Failed", + description="Pipeline for pending job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Job should be skipped immediately (no retry bookkeeping for unfulfillable deps) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline RUNNING job with dependencies not ready is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_not_ready", + name="Test Pipeline Running Deps Not Ready", + description="Pipeline for running job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + # Job should be in PENDING, waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: blocked pipeline PENDING job with dependencies not ready is not treated as stalled.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_not_ready", + name="Test Pipeline Pending Deps Not Ready", + description="Pipeline for pending job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + + # Job should remain untouched because dependencies are not satisfied + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_stalled_pending_pipeline_completion_required_dependency_cancelled( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline PENDING job with cancelled completion-required dependency is cleaned up.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_completion_cancelled", + name="Test Pipeline Pending Completion Cancelled", + description="Pipeline for pending job with cancelled completion-required dependency", + status=PipelineStatus.CREATED, + correlation_id="test_pending_completion_cancelled", + ) + session.add(test_pipeline) + session.flush() + + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.CANCELLED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.COMPLETION_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_retries_running_job_when_arq_job_key_is_stale( + self, standalone_worker_context, session + ): + """Regression test: a crashed RUNNING job leaves arq:job: in Redis at the prior attempt's + id. Because each retry uses a distinct ARQ job id (urn#), the stale key + cannot block re-enqueueing — the retry lives in its own Redis slot.""" + arq_redis = standalone_worker_context["redis"] + + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Simulate a worker crash: arq:job: key for attempt 0 was never cleaned up by ARQ's finish_job. + prior_arq_id = f"{stalled_job.urn}#0" + await arq_redis.set(job_key_prefix + prior_arq_id, b"stale_job_data") + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + # The retry is enqueued under a fresh ARQ job id (attempt 1); the stale key for attempt 0 + # is irrelevant to deduplication. + retried_arq_id = f"{stalled_job.urn}#1" + assert await arq_redis.exists(job_key_prefix + retried_arq_id) == 1 + + async def test_cleanup_integration_retries_job_when_arq_result_key_is_stale( + self, standalone_worker_context, session + ): + """Regression test: a job that previously ran leaves arq:result: in Redis for up to 1 hour. + Because each retry uses a distinct ARQ job id (urn#), the prior attempt's + result key cannot block re-enqueueing.""" + arq_redis = standalone_worker_context["redis"] + + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Simulate a prior run result still within ARQ's default 1-hour keep_result TTL. + prior_arq_id = f"{stalled_job.urn}#0" + await arq_redis.set(result_key_prefix + prior_arq_id, b"stale_result_data") + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + # The retry is enqueued under a fresh ARQ job id (attempt 1); the stale result key for + # attempt 0 is irrelevant to deduplication. + retried_arq_id = f"{stalled_job.urn}#1" + assert await arq_redis.exists(job_key_prefix + retried_arq_id) == 1 + + async def test_cleanup_resolves_stuck_pipeline_all_jobs_terminal(self, standalone_worker_context, session): + """Integration test: pipeline stuck in RUNNING with all jobs terminal gets resolved.""" + test_pipeline = Pipeline( + urn="test:pipeline:stuck_running", + name="Test Pipeline Stuck Running", + description="Pipeline stuck in RUNNING after all jobs finished", + status=PipelineStatus.RUNNING, + correlation_id="test_stuck_running", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + session.flush() + + # All jobs are in terminal states — no active work remaining + for status in [JobStatus.SUCCEEDED, JobStatus.SUCCEEDED, JobStatus.SKIPPED]: + job = JobRun( + job_type="test_job", + job_function="test_function", + status=status, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(job) + session.commit() + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn in result.data["fixed_pipelines"] + + session.refresh(test_pipeline) + assert test_pipeline.status not in [PipelineStatus.RUNNING, PipelineStatus.CREATED] + + async def test_cleanup_does_not_touch_pipeline_with_active_jobs(self, standalone_worker_context, session): + """Integration test: pipeline with active jobs is not touched.""" + test_pipeline = Pipeline( + urn="test:pipeline:still_running", + name="Test Pipeline Still Running", + description="Pipeline legitimately still running", + status=PipelineStatus.RUNNING, + correlation_id="test_still_running", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + session.flush() + + active_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + started_at=datetime.now(timezone.utc) - timedelta(minutes=5), + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(active_job) + session.commit() + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn not in result.data["fixed_pipelines"] + + session.refresh(test_pipeline) + assert test_pipeline.status == PipelineStatus.RUNNING + + async def test_cleanup_does_not_touch_recent_stuck_pipeline(self, standalone_worker_context, session): + """Integration test: recently created pipeline within the threshold is not touched.""" + test_pipeline = Pipeline( + urn="test:pipeline:recent_stuck", + name="Test Pipeline Recent Stuck", + description="Recently created pipeline that may not have started yet", + status=PipelineStatus.RUNNING, + correlation_id="test_recent_stuck", + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + ) + session.add(test_pipeline) + session.commit() + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn not in result.data["fixed_pipelines"] + + session.refresh(test_pipeline) + assert test_pipeline.status == PipelineStatus.RUNNING + + async def test_cleanup_does_not_touch_terminal_pipeline(self, standalone_worker_context, session): + """Integration test: already-terminal pipelines are not touched.""" + for terminal_status in [PipelineStatus.SUCCEEDED, PipelineStatus.FAILED, PipelineStatus.CANCELLED]: + test_pipeline = Pipeline( + urn=f"test:pipeline:terminal:{terminal_status.value}", + name=f"Test Pipeline Terminal {terminal_status.value}", + description=f"Already terminal pipeline ({terminal_status.value})", + status=terminal_status, + correlation_id=f"test_terminal_{terminal_status.value}", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + session.commit() + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["fixed_pipelines"] == [] + + +############################################################################################################################################ +# ARQ Integration Tests +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCleanupStalledJobsArqIntegration: + """Integration tests for cleanup_stalled_jobs using ARQ worker.""" + + async def test_cleanup_arq_integration(self, arq_redis, arq_worker, standalone_worker_context, session): + """Integration test: cleanup_stalled_jobs runs via ARQ worker.""" + # Create a stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Enqueue cleanup job via ARQ + await arq_redis.enqueue_job("cleanup_stalled_jobs") + + # Run the worker (just cleanup_stalled_jobs, not the retried test_function) + await arq_worker.async_run() + # Don't call run_check() - the retried test_function doesn't exist and would fail + + # Verify the cleanup job succeeded + cleanup_job = session.execute( + select(JobRun).where(JobRun.job_function == "cleanup_stalled_jobs") + ).scalar_one_or_none() + + assert cleanup_job is not None + assert cleanup_job.status == JobStatus.SUCCEEDED + assert cleanup_job.job_type == "cron_job" + + # Verify the stalled job was cleaned up + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 diff --git a/tests/worker/jobs/utils/test_setup.py b/tests/worker/jobs/utils/test_setup.py new file mode 100644 index 000000000..70c407596 --- /dev/null +++ b/tests/worker/jobs/utils/test_setup.py @@ -0,0 +1,34 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import Mock + +from mavedb.models.job_run import JobRun +from mavedb.worker.jobs.utils.setup import validate_job_params + + +@pytest.mark.unit +def test_validate_job_params_success(): + job = Mock(spec=JobRun, job_params={"foo": 1, "bar": 2}) + + # Should not raise + validate_job_params(["foo", "bar"], job) + + +@pytest.mark.unit +def test_validate_job_params_missing_param(): + job = Mock(spec=JobRun, job_params={"foo": 1}) + + with pytest.raises(ValueError, match="Missing required job param: bar"): + validate_job_params(["foo", "bar"], job) + + +@pytest.mark.unit +def test_validate_job_params_no_params(): + job = Mock(spec=JobRun, job_params=None) + + with pytest.raises(ValueError, match="Job has no job_params defined."): + validate_job_params(["foo"], job) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py new file mode 100644 index 000000000..d0d37562a --- /dev/null +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -0,0 +1,1190 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +import math +from unittest.mock import ANY, MagicMock, call, patch + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.variant import Variant +from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.unit +@pytest.mark.asyncio +@pytest.mark.usefixtures("patch_db_session_ctxmgr") +class TestCreateVariantsForScoreSetUnit: + """Unit tests for create_variants_for_score_set job.""" + + async def test_create_variants_for_score_set_raises_key_error_on_missing_hdp_from_ctx( + self, + mock_worker_ctx, + mock_job_manager, + ): + ctx = mock_worker_ctx.copy() + del ctx["hdp"] + + with pytest.raises(KeyError) as exc_info: + await create_variants_for_score_set(ctx, 999, mock_job_manager) + + assert str(exc_info.value) == "'hdp'" + + async def test_create_variants_for_score_set_calls_s3_client_with_correct_parameters( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None) as mock_download_fileobj, + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + + # Use ANY for dynamically created Fileobj parameters. + mock_download_fileobj.assert_has_calls( + [ + call(Bucket="score-set-csv-uploads-dev", Key="sample_scores.csv", Fileobj=ANY), + call(Bucket="score-set-csv-uploads-dev", Key="sample_counts.csv", Fileobj=ANY), + ] + ) + + async def test_create_variants_for_score_set_s3_file_not_found( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object( + mock_s3_client, + "download_fileobj", + side_effect=Exception("The specified key does not exist."), + ), + pytest.raises(Exception, match="The specified key does not exist."), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + + async def test_create_variants_for_score_set_counts_file_can_be_optional( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Remove counts_file_key to test optional behavior + create_variants_sample_params_without_counts = create_variants_sample_params.copy() + create_variants_sample_params_without_counts["counts_file_key"] = None + create_variants_sample_params_without_counts["count_columns_metadata"] = None + sample_independent_variant_creation_run.job_params = create_variants_sample_params_without_counts + session.add(sample_independent_variant_creation_run) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample score dataframe only + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + None, + create_variants_sample_params_without_counts["score_columns_metadata"], + None, + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + + async def test_create_variants_for_score_set_raises_when_no_targets_exist( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Remove all TargetGene entries to simulate no targets existing + sample_score_set.target_genes = [] + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + pytest.raises(ValueError, match="Can't create variants when score set has no targets."), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + + async def test_create_variants_for_score_set_handles_empty_variant_data( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants_data", return_value=[]), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + # If no exceptions are raised, the test passes for handling empty variant data. + + async def test_create_variants_for_score_set_removes_existing_variants_before_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Add existing variants to the score set to test removal + sample_score_set.num_variants = 1 + variant = Variant(data={}, score_set_id=sample_score_set.id) + session.add(variant) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + + # Verify that existing variants have been removed + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == 0 + session.refresh(sample_score_set) + assert sample_score_set.num_variants == 0 # Updated after creation + + async def test_create_variants_for_score_set_updates_processing_state( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + assert sample_score_set.processing_errors is None + + async def test_create_variants_for_score_set_retains_existing_variants_when_exception_occurs( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Add existing variants to the score set to test retention on failure + sample_score_set.num_variants = 1 + variant = Variant(data={}, score_set_id=sample_score_set.id) + session.add(variant) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Test exception during data validation"), + ), + pytest.raises(Exception, match="Test exception during data validation"), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + + # Verify that existing variants are still present + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == 1 + session.refresh(sample_score_set) + assert sample_score_set.num_variants == 1 # Should remain unchanged + + async def test_create_variants_for_score_set_handles_exception_and_updates_state( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Test exception during data validation"), + ), + pytest.raises(Exception, match="Test exception during data validation"), + ): + await create_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), + ) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Test exception during data validation" in sample_score_set.processing_errors["exception"] + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestCreateVariantsForScoreSetIntegration: + """Integration tests for create_variants_for_score_set job.""" + + ## Common success workflows + + async def test_create_variants_for_score_set_independent_job( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + # Assume the S3 client works as expected. + # + # Moto is omitted here for brevity since this + # function doesn't have S3 side effects. We assume the file is already in S3 for this test, + # and any cases where the file is not present will be handled by the job manager and tested + # in unit tests. + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + # + # A side effect of not mocking S3 more thoroughly + # is that our S3 download has no return value and just side effects data into a file-like object, + # so we mock pd.read_csv directly to avoid it trying to read from an empty file. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_pipeline_job( + self, + session, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that pipeline job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + # Verify that pipeline status is updated. Pipeline will remain RUNNING + # as our default test pipeline includes the mapping job as well. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + + ## Common edge cases + + async def test_create_variants_for_score_set_replaces_variants( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # First run to create initial variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + initial_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(initial_variants) == sample_score_dataframe.shape[0] + + # Modify dataframes to simulate updated data + updated_score_dataframe = sample_score_dataframe.copy() + updated_score_dataframe["score"] += 10 # Increment scores by 10 + + updated_count_dataframe = sample_count_dataframe.copy() + updated_count_dataframe["c_0"] += 5 # Increment counts by 5 + + # Mock a second run with updated dataframes + sample_independent_variant_creation_run.status = JobStatus.PENDING + session.commit() + + # Second run to replace existing variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[updated_score_dataframe, updated_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + replaced_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(replaced_variants) == sample_score_dataframe.shape[0] + + # Verify that the variants have been replaced with updated data + for variant in replaced_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = updated_score_dataframe.loc[ + updated_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = updated_count_dataframe.loc[ + updated_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(replaced_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_handles_missing_counts_file( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + sample_independent_variant_creation_run.job_params["counts_file_key"] = None + sample_independent_variant_creation_run.job_params["count_columns_metadata"] = {} + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return only the score dataframe + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present but... + assert variant.data["count_data"] == {} # ...ensure count_data is empty since no counts file was provided + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + ## Common failure workflows + + async def test_create_variants_for_score_set_validation_error_during_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + sample_score_dataframe.loc[0, "hgvs_nt"] = "c.G>X" # Introduce invalid value to trigger validation error + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + mock_send_slack_job_failure.assert_called_once() + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "encountered 1 invalid variant strings" in sample_score_set.processing_errors["exception"] + assert len(sample_score_set.processing_errors["detail"]) > 0 + + # Verify that no variants were created + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == 0 + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.status == JobStatus.FAILED + + async def test_create_variants_for_score_set_generic_exception_handling_during_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + mock_send_slack_job_error.assert_called_once() + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.status == JobStatus.ERRORED + + async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # First run to create initial variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + initial_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(initial_variants) == sample_score_dataframe.shape[0] + + # Mock a second run to replace existing variants + sample_independent_variant_creation_run.status = JobStatus.PENDING + session.commit() + + # Second run to replace existing variants but trigger a generic exception + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + mock_send_slack_job_error.assert_called_once() + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that initial variants are still present + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == len(initial_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.status == JobStatus.ERRORED + + ## Pipeline failure workflow + + async def test_create_variants_for_score_set_pipeline_job_generic_exception_handling( + self, + session, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) + + mock_send_slack_job_error.assert_called_once() + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.status == JobStatus.ERRORED + + # Verify that pipeline status is updated. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED + # Verify other pipeline runs are marked as failed + other_runs = ( + session.query(Pipeline) + .filter( + JobRun.pipeline_id == sample_variant_creation_pipeline.id, + Pipeline.id != sample_pipeline_variant_creation_run.id, + ) + .all() + ) + for run in other_runs: + assert run.status == JobStatus.SKIPPED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCreateVariantsForScoreSetArqContext: + """Integration tests for create_variants_for_score_set job using ARQ worker context.""" + + async def test_create_variants_for_score_set_with_arq_context_independent_ctx( + self, + session, + arq_redis, + arq_worker, + with_independent_processing_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_with_arq_context_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that pipeline job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + # Verify that pipeline status is updated. Pipeline will remain RUNNING + # as our default test pipeline includes the mapping job as well. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + + async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_independent_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_independent_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.status == JobStatus.ERRORED + + async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.status == JobStatus.ERRORED + + # Verify that pipeline status is updated. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED + + # Verify other pipeline runs are marked as cancelled + other_runs = ( + session.query(Pipeline) + .filter( + JobRun.pipeline_id == sample_variant_creation_pipeline.id, + Pipeline.id != sample_pipeline_variant_creation_run.id, + ) + .all() + ) + for run in other_runs: + assert run.status == JobStatus.SKIPPED diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py new file mode 100644 index 000000000..430e18d60 --- /dev/null +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -0,0 +1,1769 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import MagicMock, patch + +from sqlalchemy.exc import NoResultFound + +from mavedb.lib.mapping import EXCLUDED_PREMAPPED_ANNOTATION_KEYS +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.variant import Variant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus +from mavedb.worker.jobs.variant_processing.mapping import map_variants_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER +from tests.helpers.util.setup.worker import construct_mock_mapping_output, create_variants_in_score_set + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestMapVariantsForScoreSetUnit: + """Unit tests for map_variants_for_score_set job.""" + + async def dummy_mapping_output(self, output_data={}): + return output_data + + async def test_map_variants_for_score_set_no_mapping_results( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no mapping results are found.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=self.dummy_mapping_output({})), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Mapping results were not returned from VRS mapping service" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + + async def test_map_variants_for_score_set_no_mapped_scores( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no scores are mapped.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + {"mapped_scores": [], "error_message": "No variants were mapped for this score set"} + ), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "No variants were mapped for this score set" in sample_score_set.mapping_errors["error_message"] + + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + + async def test_map_variants_for_score_set_no_reference_data( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no reference data is available.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + {"mapped_scores": [MagicMock()], "error_message": "Reference metadata missing from mapping results"} + ), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] + + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + + async def test_map_variants_for_score_set_nonexistent_target_gene( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when the target gene does not exist.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + { + "mapped_scores": [MagicMock()], + "reference_sequences": {"some_key": "some_value"}, + } + ), + ), + pytest.raises(ValueError), + ): + await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + + async def test_map_variants_for_score_set_returns_variants_not_in_score_set( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when variants not in score set are returned.""" + # Add a non-existent variant to the mapped output to ensure at least one invalid mapping + mapping_output = await construct_mock_mapping_output( + session=session, score_set=sample_score_set, with_layers={"g", "c", "p"} + ) + mapping_output["mapped_scores"].append({"variant_id": "not_in_score_set", "some_other_data": "value"}) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output(mapping_output), + ), + pytest.raises(NoResultFound), + ): + await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + + async def test_map_variants_for_score_set_success_missing_gene_info( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with missing gene info.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=False, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + session.add(variant) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the gene info is missing from the target gene reference sequence + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is None + + # Verify that a mapped variant was created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 1 + + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "success" + + @pytest.mark.parametrize( + "with_layers", + [ + {"g"}, + {"c"}, + {"p"}, + {"g", "c"}, + {"g", "p"}, + {"c", "p"}, + {"g", "c", "p"}, + ], + ) + async def test_map_variants_for_score_set_success_layer_permutations( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + with_layers, + ): + """Test successful mapping variants with annotation layer permutations.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers=with_layers, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + session.add(variant) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the annotation layers presence/absence + for target in sample_score_set.target_genes: + if "g" in with_layers: + assert target.pre_mapped_metadata["genomic"] is not None + assert target.post_mapped_metadata["genomic"] is not None + pre_mapped_comparator = TEST_GENOMIC_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["genomic"] == pre_mapped_comparator + assert target.post_mapped_metadata["genomic"] == TEST_GENOMIC_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("genomic") is None + + if "c" in with_layers: + assert target.pre_mapped_metadata["cdna"] is not None + assert target.post_mapped_metadata["cdna"] is not None + pre_mapped_comparator = TEST_CODING_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["cdna"] == pre_mapped_comparator + assert target.post_mapped_metadata["cdna"] == TEST_CODING_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("cdna") is None + + if "p" in with_layers: + assert target.pre_mapped_metadata["protein"] is not None + assert target.post_mapped_metadata["protein"] is not None + pre_mapped_comparator = TEST_PROTEIN_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["protein"] == pre_mapped_comparator + assert target.post_mapped_metadata["protein"] == TEST_PROTEIN_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("protein") is None + + # Verify that a mapped variant was created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 1 + + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "success" + + async def test_map_variants_for_score_set_success_no_successful_mapping( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with no successful mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=False, # Missing post-mapped + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + session.add(variant) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." + + # Verify that one mapped variant was created. Although no successful mapping, an entry is still created. + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 1 + + # Verify that the mapped variant has no post-mapped data + mapped_variant = mapped_variants[0] + assert mapped_variant.post_mapped == {} + + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "failed" + + async def test_map_variants_for_score_set_incomplete_mapping( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with incomplete mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=False, # Only some variants mapped + ) + + # Create two variants in the score set to be mapped + variant1 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={}, + urn="variant:1", + ) + variant2 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.2G>T", + hgvs_pro="NP_000000.1:p.Val2Leu", + data={}, + urn="variant:2", + ) + session.add_all([variant1, variant2]) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + assert sample_score_set.mapping_state == MappingState.incomplete + assert sample_score_set.mapping_errors is None + + # Although only one variant was successfully mapped, verify that an entity was created + # for each variant in the score set + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 2 + + # Verify that only one variant has post-mapped data + mapped_variant_with_post_data = ( + session.query(MappedVariant).filter(MappedVariant.post_mapped != {}).one_or_none() + ) + assert mapped_variant_with_post_data is not None + + mapped_variant_without_post_data = ( + session.query(MappedVariant).filter(MappedVariant.post_mapped == {}).one_or_none() + ) + assert mapped_variant_without_post_data is not None + + # Verify that annotation statuses were created and correct + annotation_status_success = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, VariantAnnotationStatus.status == "success") + .all() + ) + assert len(annotation_status_success) == 1 + assert annotation_status_success[0].annotation_type == "vrs_mapping" + annotation_status_failed = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, VariantAnnotationStatus.status == "failed") + .all() + ) + assert len(annotation_status_failed) == 1 + assert annotation_status_failed[0].annotation_type == "vrs_mapping" + + async def test_map_variants_for_score_set_complete_mapping( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with complete mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, # All variants mapped + ) + + # Create two variants in the score set to be mapped + variant1 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={}, + urn="variant:1", + ) + variant2 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.2G>T", + hgvs_pro="NP_000000.1:p.Val2Leu", + data={}, + urn="variant:2", + ) + session.add_all([variant1, variant2]) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 2 + + # Verify that both variants have post-mapped data. I'm comfortable assuming the + # data is correct given our layer permutation tests above. + for urn in ["variant:1", "variant:2"]: + mapped_variant = session.query(MappedVariant).filter(MappedVariant.variant.has(urn=urn)).one_or_none() + assert mapped_variant is not None + assert mapped_variant.post_mapped != {} + assert mapped_variant.hgvs_assay_level is not None + + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 2 + for status in annotation_statuses: + assert status.annotation_type == "vrs_mapping" + assert status.status == "success" + + async def test_map_variants_for_score_set_updates_existing_mapped_variants( + self, + with_independent_processing_runs, + session, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants updates existing mapped variants.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant and associated mapped data/annotation status in the score set to be updated + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2023-01-01T00:00:00Z", + mapping_api_version="v1.0.0", + ) + session.add(mapped_variant) + session.commit() + variant_annotation_status = VariantAnnotationStatus( + variant_id=variant.id, current=True, annotation_type="vrs_mapping", status="success" + ) + session.add(variant_annotation_status) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the existing mapped variant was marked as non-current + non_current_mapped_variant = ( + session.query(MappedVariant) + .filter(MappedVariant.id == mapped_variant.id, MappedVariant.current.is_(False)) + .one_or_none() + ) + assert non_current_mapped_variant is not None + + # Verify a new mapped variant entry was created + new_mapped_variant = ( + session.query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + assert new_mapped_variant is not None + + # Verify that the new mapped variant has updated mapping data + assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" + assert new_mapped_variant.mapping_api_version != "v1.0.0" + + # Verify the non-current annotation status still exists + old_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter( + VariantAnnotationStatus.variant_id == non_current_mapped_variant.variant_id, + VariantAnnotationStatus.current.is_(False), + ) + .one_or_none() + ) + assert old_annotation_status is not None + + # Verify that a new annotation status was created + new_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(True)) + .one_or_none() + ) + assert new_annotation_status is not None + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestMapVariantsForScoreSetIntegration: + """Integration tests for map_variants_for_score_set job.""" + + async def test_map_variants_for_score_set_independent_job( + self, + session, + with_independent_processing_runs, + mock_s3_client, + mock_worker_ctx, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + """Test mapping variants for an independent processing run.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + result = await map_variants_for_score_set(mock_worker_ctx, sample_independent_variant_mapping_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify that mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that target gene info was updated + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is not None + assert target.post_mapped_metadata is not None + + # Verify that each variant has a corresponding mapped variant + variants = ( + session.query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + + # Verify that the job status was updated + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_pipeline_context( + self, + session, + with_variant_creation_pipeline_runs, + with_variant_mapping_pipeline_runs, + mock_s3_client, + mock_worker_ctx, + sample_pipeline_variant_creation_run, + sample_pipeline_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + ): + """Test mapping variants for a pipeline processing run.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_pipeline_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + result = await map_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_mapping_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify that mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that target gene info was updated + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is not None + assert target.post_mapped_metadata is not None + + # Verify that each variant has a corresponding mapped variant + variants = ( + session.query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + + # Verify that the job status was updated + processing_run = ( + session.query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status was updated. We expect RUNNING here because + # the mapping job is not the only job in our dummy pipeline. + pipeline_run = ( + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.RUNNING + + async def test_map_variants_for_score_set_empty_mapping_results( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no mapping results are returned.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return {} + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=dummy_mapping_job()), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Mapping results were not returned from VRS mapping service" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job status was updated. + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_no_mapped_scores( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no variants are mapped.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=False, + with_reference_metadata=True, + with_mapped_scores=False, # No mapped scores + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # Error message originates from our mock mapping construction function + assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job status was updated. + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_no_reference_data( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no reference data is provided.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=False, # No reference metadata + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job status was updated. + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_updates_current_mapped_variants( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants updates current mapped variants even if no changes occur.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + # Associate mapped variants with all variants just created in the score set + variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + for variant in variants: + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2023-01-01T00:00:00Z", + mapping_api_version="v1.0.0", + ) + annotation_status = VariantAnnotationStatus( + variant_id=variant.id, current=True, annotation_type="vrs_mapping", status="success" + ) + session.add(annotation_status) + session.add(mapped_variant) + session.commit() + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that mapped variants were marked as non-current and new entries created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == len(variants) * 2 # Each variant has two mapped entries now + for variant in variants: + non_current_mapped_variant = ( + session.query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(False)) + .one_or_none() + ) + assert non_current_mapped_variant is not None + + new_mapped_variant = ( + session.query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + assert new_mapped_variant is not None + + # Verify that the new mapped variant has updated mapping data + assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" + assert new_mapped_variant.mapping_api_version != "v1.0.0" + + # Verify that annotation statuses where marked as non-current and new entries created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == len(variants) * 2 # Each variant has two annotation statuses now + for variant in variants: + old_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(False)) + .one_or_none() + ) + assert old_annotation_status is not None + + new_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(True)) + .one_or_none() + ) + assert new_annotation_status is not None + + # Verify that the job status was updated. + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_no_variants( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no variants exist in the score set.""" + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + mock_send_slack_job_failure.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job status was updated. + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_exception_in_mapping( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + mock_send_slack_job_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, ValueError) + # exception messages are persisted in internal properties + assert "test exception during mapping" in str(result.exception) + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job status was updated. + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.ERRORED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestMapVariantsForScoreSetArqContext: + """Integration tests for map_variants_for_score_set job using ARQ worker context.""" + + async def test_create_variants_for_score_set_with_arq_context_independent_ctx( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + with_independent_processing_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, + ): + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + standalone_worker_context, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that each variant has a corresponding mapped variant + variants = ( + session.query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + + # Verify that the job status was updated + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_with_arq_context_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + with_variant_creation_pipeline_runs, + with_variant_mapping_pipeline_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_pipeline_variant_creation_run, + sample_pipeline_variant_mapping_run, + ): + """Test mapping variants for a pipeline processing run using ARQ context.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + standalone_worker_context, + sample_pipeline_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=session, + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that each variant has a corresponding mapped variant + variants = ( + session.query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + + # Verify that the job status was updated + processing_run = ( + session.query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status was updated. We expect RUNNING here because + # the mapping job is not the only job in our dummy pipeline. + pipeline_run = ( + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.RUNNING + + async def test_map_variants_for_score_set_with_arq_context_generic_exception_handling( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + with_independent_processing_runs, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants with ARQ context when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job status was updated. + processing_run = ( + session.query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.ERRORED + + async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + with_variant_mapping_pipeline_runs, + sample_pipeline_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants with ARQ context in pipeline when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_job_error.assert_called_once() + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = session.query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify that the job status was updated. + processing_run = ( + session.query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.ERRORED + + # Verify that the pipeline run status was updated to FAILED. + pipeline_run = ( + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.FAILED + + # Verify that other jobs in the pipeline were skipped + for job_run in pipeline_run.job_runs: + if job_run.id != sample_pipeline_variant_mapping_run.id: + assert job_run.status == JobStatus.SKIPPED diff --git a/tests/worker/lib/decorators/conftest.py b/tests/worker/lib/decorators/conftest.py new file mode 100644 index 000000000..851d7497a --- /dev/null +++ b/tests/worker/lib/decorators/conftest.py @@ -0,0 +1,10 @@ +import os + +import pytest + + +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py new file mode 100644 index 000000000..1829ea2de --- /dev/null +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -0,0 +1,120 @@ +# ruff: noqa: E402 +""" +Unit and integration tests for the with_guaranteed_job_run_record async decorator. +Covers JobRun creation, status transitions, error handling, and DB persistence. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from sqlalchemy import select + +from mavedb import __version__ +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from tests.helpers.transaction_spy import TransactionSpy + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@with_guaranteed_job_run_record("test_job") +async def sample_job(ctx: dict, job_id: int): + """Sample job function to test the decorator. + + NOTE: The job_id parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + return JobExecutionOutcome.succeeded() + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestJobGuaranteeDecoratorUnit: + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ctx): + with pytest.raises(ValueError) as exc_info: + await sample_job() + + assert "Managed functions must receive context as first argument" in str(exc_info.value) + + async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): + result = await sample_job(mock_worker_ctx) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_decorator_creates_job_run(self, mock_worker_ctx, session): + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + ): + await sample_job(mock_worker_ctx) + + job_run = session.execute(select(JobRun)).scalars().first() + assert job_run is not None + assert job_run.status == JobStatus.PENDING + assert job_run.job_type == "test_job" + assert job_run.job_function == "sample_job" + assert job_run.mavedb_version == __version__ + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestJobGuaranteeDecoratorIntegration: + async def test_decorator_persists_job_run_record(self, session, standalone_worker_context): + # Flush called implicitly by commit + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + job_task = await sample_job(standalone_worker_context) + + assert isinstance(job_task, JobExecutionOutcome) + assert job_task.status == JobStatus.SUCCEEDED + + job_run = session.execute(select(JobRun).order_by(JobRun.id.desc())).scalars().first() + assert job_run.status == JobStatus.PENDING + assert job_run.job_type == "test_job" + assert job_run.job_function == "sample_job" + assert job_run.mavedb_version is not None + + async def test_decorator_skips_creation_when_job_id_provided(self, session, standalone_worker_context): + """When a job_id is already provided (e.g. from run_job script), the decorator + should use it instead of creating a new JobRun record.""" + # Pre-create a JobRun like run_job.py does + existing_job = JobRun( + job_type="test_job", + job_function="sample_job", + status=JobStatus.PENDING, + mavedb_version=__version__, + ) # type: ignore[call-arg] + session.add(existing_job) + session.flush() + existing_job_id = existing_job.id + + job_count_before = session.execute(select(JobRun)).scalars().all() + + # Call with the pre-existing job_id as the second argument + result = await sample_job(standalone_worker_context, existing_job_id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # No new JobRun should have been created + job_count_after = session.execute(select(JobRun)).scalars().all() + assert len(job_count_after) == len(job_count_before) + + async def test_decorator_raises_on_invalid_job_id(self, session, standalone_worker_context): + """When a job_id int is provided but doesn't correspond to a real JobRun, + the decorator should raise immediately to uphold its guarantee.""" + nonexistent_job_id = 999999 + + job_count_before = len(session.execute(select(JobRun)).scalars().all()) + + with pytest.raises(ValueError, match="does not correspond to an existing JobRun"): + await sample_job(standalone_worker_context, nonexistent_job_id) + + # No new JobRun should have been created by the decorator + job_count_after = len(session.execute(select(JobRun)).scalars().all()) + assert job_count_after == job_count_before diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py new file mode 100644 index 000000000..dd2e10ef4 --- /dev/null +++ b/tests/worker/lib/decorators/test_job_management.py @@ -0,0 +1,549 @@ +# ruff : noqa: E402 + +""" +Unit and integration tests for the with_job_management async decorator. +Covers status transitions, error handling, and JobManager interaction. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +import asyncio +from datetime import datetime +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.exceptions import JobStateError +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.transaction_spy import TransactionSpy + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@with_job_management +async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + """Sample job function to test the decorator. + + NOTE: The job_manager parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + return JobExecutionOutcome.succeeded() + + +@with_job_management +async def sample_raise(ctx: dict, job_id: int, job_manager: JobManager): + """Sample job function to test the decorator in cases where the wrapped function raises an exception. + + NOTE: The job_manager parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + raise RuntimeError("error in wrapped function") + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestManagedJobDecoratorUnit: + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_job_manager): + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_job_manager.db): + await sample_job() + + assert "Managed functions must receive context as first argument" in str(exc_info.value) + + async def test_decorator_calls_wrapped_function_and_returns_result( + self, session, mock_job_manager, mock_worker_ctx + ): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "succeed_job", return_value=None), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + + result = await sample_job(mock_worker_ctx, 999) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( + self, session, mock_worker_ctx, mock_job_manager + ): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "succeed_job", return_value=None) as mock_succeed_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_succeed_job.assert_called_once() + + async def test_decorator_calls_fail_job_when_wrapped_function_returns_failed( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.failed(reason="simulated failure") + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error"), + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_fail(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_fail_job.assert_called_once() + + async def test_decorator_calls_error_job_when_wrapped_function_returns_errored( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_error(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.errored(exception=RuntimeError("simulated crash")) + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error"), + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_error(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_error_job.assert_called_once() + + async def test_decorator_calls_start_job_and_skip_job_when_wrapped_function_returns_skipped_status( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_skip(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.skipped() + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_skip(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_skip_job.assert_called_once() + + async def test_decorator_calls_error_job_when_wrapped_function_raises_and_no_retry( + self, session, mock_worker_ctx, mock_job_manager + ): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_raise(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_error_job.assert_called_once() + mock_send_slack_job_error.assert_called_once() + + async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( + self, session, mock_worker_ctx, mock_job_manager + ): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "error_job", return_value=None), + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_raise(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") + mock_send_slack_job_error.assert_not_called() # Slack suppressed — job will retry + + @pytest.mark.parametrize("missing_key", ["redis"]) + async def test_decorator_raises_value_error_if_required_context_missing( + self, mock_job_manager, mock_worker_ctx, missing_key + ): + del mock_worker_ctx[missing_key] + + with ( + pytest.raises(ValueError) as exc_info, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + await sample_job(mock_worker_ctx, 999) + + mock_send_slack_error.assert_called_once() + assert missing_key.replace("_", " ") in str(exc_info.value).lower() + assert "not found in job context" in str(exc_info.value).lower() + + async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( + self, session, mock_job_manager, mock_worker_ctx + ): + raised_exc = JobStateError("error in job start") + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + patch.object(mock_job_manager, "start_job", side_effect=raised_exc), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "error_job", return_value=None), + TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + result = await sample_job(mock_worker_ctx, 999) + + assert result.status == JobStatus.ERRORED + assert result.exception is raised_exc + mock_send_slack_job_error.assert_called_once() + + async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): + # Remove job_id from args to simulate missing job_id + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(session), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + await sample_job(mock_worker_ctx) + + mock_send_slack_error.assert_called_once() + assert "job id not found in function arguments" in str(exc_info.value).lower() + + async def test_decorator_swallows_exception_from_wrapped_function_inside_except( + self, session, mock_job_manager, mock_worker_ctx + ): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "error_job", side_effect=JobStateError("error in error_job")), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, + ): + mock_job_manager_class.return_value = mock_job_manager + result = await sample_raise(mock_worker_ctx, 999) + + # Should notify twice: once for the internal error_job failure, once for the main exception + mock_send_slack_error.assert_called_once() # for the inner error_job failure + mock_send_slack_job_error.assert_called_once() # for the main exception (in finally) + # Errors within the main try block should take precedence + assert result.status == JobStatus.ERRORED + assert str(result.exception) == "error in wrapped function" + + async def test_decorator_passes_job_manager_to_wrapped(self, session, mock_job_manager, mock_worker_ctx): + @with_job_management + async def assert_manager_passed_job(ctx, job_id: int, job_manager): + assert isinstance(job_manager, JobManager) + return JobExecutionOutcome.succeeded() + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "succeed_job", return_value=None), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + assert await assert_manager_passed_job(mock_worker_ctx, 999) + + async def test_decorator_still_transitions_errored_when_slack_is_unreachable( + self, session, mock_job_manager, mock_worker_ctx + ): + """When Slack is unreachable, the job should still transition to ERRORED + and the result should be returned (not an exception). send_slack_error + handles Slack failures internally, so the decorator is unaffected.""" + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, + TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + result = await sample_raise(mock_worker_ctx, 999) + + mock_error_job.assert_called_once() + assert result.status == JobStatus.ERRORED + assert str(result.exception) == "error in wrapped function" + + async def test_decorator_still_transitions_errored_when_slack_is_unreachable_and_error_job_fails( + self, session, mock_job_manager, mock_worker_ctx + ): + """When error_job fails and Slack is unreachable, the original exception is still + returned as an ERRORED result. The decorator logs critical for the error_job failure, + and send_slack_error handles Slack failures internally.""" + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + patch("mavedb.worker.lib.decorators.job_management.logger") as mock_logger, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "error_job", side_effect=JobStateError("error in error_job")), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + result = await sample_raise(mock_worker_ctx, 999) + + assert result.status == JobStatus.ERRORED + assert str(result.exception) == "error in wrapped function" + # Decorator logs critical when error_job itself fails, regardless of Slack status + mock_logger.critical.assert_called() + + async def test_decorator_passes_will_retry_false_to_slack_on_failed_result_no_retry( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.failed(reason="timeout") + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_slack, + patch.object(mock_job_manager, "start_job"), + patch.object(mock_job_manager, "fail_job"), + patch.object(mock_job_manager, "should_retry", return_value=False), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_fail(mock_worker_ctx, 999) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["retry_count"] == 0 + assert call_kwargs["max_retries"] == 3 + + async def test_decorator_suppresses_slack_on_failed_result_with_retry( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.failed(reason="timeout") + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_slack, + patch.object(mock_job_manager, "start_job"), + patch.object(mock_job_manager, "fail_job"), + patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "prepare_retry", return_value=None), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_fail(mock_worker_ctx, 999) + + mock_slack.assert_not_called() # Slack suppressed — job will retry + + async def test_decorator_passes_will_retry_false_to_slack_on_exception_no_retry( + self, session, mock_worker_ctx, mock_job_manager + ): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_slack, + patch.object(mock_job_manager, "start_job"), + patch.object(mock_job_manager, "error_job"), + patch.object(mock_job_manager, "should_retry", return_value=False), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_raise(mock_worker_ctx, 999) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["retry_count"] == 0 + assert call_kwargs["max_retries"] == 3 + + async def test_decorator_suppresses_slack_on_exception_with_retry(self, session, mock_worker_ctx, mock_job_manager): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_slack, + patch.object(mock_job_manager, "start_job"), + patch.object(mock_job_manager, "error_job"), + patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "prepare_retry", return_value=None), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_raise(mock_worker_ctx, 999) + + mock_slack.assert_not_called() # Slack suppressed — job will retry + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestManagedJobDecoratorIntegration: + """Integration tests for with_job_management decorator.""" + + async def test_decorator_integrated_job_lifecycle_success( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + return JobExecutionOutcome.succeeded() + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Now allow the job to complete + event.set() + await job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + async def test_decorator_integrated_job_lifecycle_skipped( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.skipped() + + # Run the job + await sample_job(standalone_worker_context, sample_job_run.id) + + # After completion, status should be SKIPPED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + async def test_decorator_integrated_job_lifecycle_failed( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.failed(reason="Simulated job failure") + + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure: + # Run the job + await sample_job(standalone_worker_context, sample_job_run.id) + + mock_send_slack_job_failure.assert_called_once() + # After completion, status should be FAILED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.error_message == "Simulated job failure" + + async def test_decorator_integrated_job_lifecycle_raised_exception( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure") + + # Start the job (it will block at event.wait()) + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Now allow the job to complete with failure. This failure + # should be swallowed by the job_task. + event.set() + await job_task + + mock_send_slack_job_error.assert_called_once() + + # After failure, status should be ERRORED (unhandled exception) + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.ERRORED + assert job.error_message == "Simulated job failure" + + async def test_decorator_integrated_job_lifecycle_retry( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise ConnectionError("Simulated network failure for retry") + + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # ConnectionError is classified as NETWORK_ERROR (retryable), so retry + # logic triggers automatically without patching should_retry. + event.set() + await job_task + + mock_send_slack_job_error.assert_not_called() # Slack suppressed — job will retry + + # After failure with retry, status should be PENDING + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 # Ensure it attempted once before retrying + + async def test_decorator_integrated_recovers_stale_running_job( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + """Integration test: when a job is stuck RUNNING from a crashed worker, + start_job() accepts the RUNNING state and the job completes successfully.""" + + # Simulate a stale RUNNING state from a previous worker crash + sample_job_run.status = JobStatus.RUNNING + sample_job_run.started_at = datetime.now() + session.commit() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.succeeded() + + await sample_job(standalone_worker_context, sample_job_run.id) + + # Job should have recovered and completed successfully + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + # started_at should be refreshed (not the stale timestamp) + assert job.started_at is not None diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py new file mode 100644 index 000000000..be5fb8179 --- /dev/null +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -0,0 +1,598 @@ +# ruff : noqa: E402 + +""" +Unit tests for the with_pipeline_management async decorator. +Covers orchestration steps, error handling, and PipelineManager interaction. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +import asyncio +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from tests.helpers.transaction_spy import TransactionSpy + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +async def sample_job(ctx=None, job_id=None): + """Sample job function to test the decorator. When called, it patches + the with_job_management decorator to be a no-op so we can test the + with_pipeline_management decorator in isolation. + + NOTE: The job_manager parameter is normally injected by the with_job_management + decorator. Since we are patching that decorator to be a no-op here, + we do not include it in the function signature. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + # patch the with_job_management decorator to be a no-op + with patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f + ) as mock_job_mgmt: + + @with_pipeline_management + async def patched_sample_job(ctx: dict, job_id: int): + return JobExecutionOutcome.succeeded() + + return await patched_sample_job(ctx, job_id) + + # Ensure the mock was called + mock_job_mgmt.assert_called_once() + + +async def sample_raise(ctx: dict, job_id: int): + """Sample job function to test the decorator when a job raises. + When called, it patches the with_job_management decorator to be + a no-op so we can test the with_pipeline_management decorator in isolation. + + NOTE: The job_manager parameter is normally injected by the with_job_management + decorator. Since we are patching that decorator to be a no-op here, + we do not include it in the function signature. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + # patch the with_job_management decorator to be a no-op + with patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f + ) as mock_job_mgmt: + + @with_pipeline_management + async def patched_sample_job(ctx: dict, job_id: int): + raise RuntimeError("error in wrapped function") + + return await patched_sample_job(ctx, job_id) + + # Ensure the mock was called + mock_job_mgmt.assert_called_once() + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestPipelineManagementDecoratorUnit: + """Unit tests for the with_pipeline_management decorator.""" + + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_pipeline_manager): + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + await sample_job() + + assert "Managed functions must receive context as first argument" in str(exc_info.value) + + @pytest.mark.parametrize("missing_key", ["redis"]) + async def test_decorator_raises_value_error_if_required_context_missing( + self, mock_pipeline_manager, mock_worker_ctx, missing_key + ): + del mock_worker_ctx[missing_key] + + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(mock_pipeline_manager.db), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): + await sample_job(mock_worker_ctx, 999) + + assert missing_key.replace("_", " ") in str(exc_info.value).lower() + assert "not found in pipeline context" in str(exc_info.value).lower() + mock_send_slack_error.assert_called_once() + + async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipeline_manager, mock_worker_ctx): + # Remove job_id from args to simulate missing job_id + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(mock_pipeline_manager.db), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): + await sample_job(mock_worker_ctx) + + assert "job id not found in function arguments" in str(exc_info.value).lower() + mock_send_slack_error.assert_called_once() + + async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id( + self, session, mock_pipeline_manager, mock_worker_ctx + ): + with ( + TransactionSpy.mock_database_execution_failure( + session, + exception=ValueError("job id not found in pipeline context"), + expect_rollback=True, + ), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): + await sample_job(mock_worker_ctx, 999) + mock_send_slack_error.assert_called_once() + + async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + result = await sample_job(mock_worker_ctx, sample_job_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data + ): + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + # We shouldn't expect any commits since no pipeline coordination occurs + TransactionSpy.spy(session), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + result = await sample_job(mock_worker_ctx, sample_independent_job_run.id) + + mock_coordinate_pipeline.assert_not_called() + mock_start_pipeline.assert_not_called() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_decorator_starts_pipeline_when_in_created_state( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + result = await sample_job(mock_worker_ctx, sample_job_run.id) + + mock_start_pipeline.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + @pytest.mark.parametrize( + "pipeline_state", + [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], + ) + async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state + ): + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + result = await sample_job(mock_worker_ctx, sample_job_run.id) + + mock_start_pipeline.assert_not_called() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + await sample_job(mock_worker_ctx, sample_job_run.id) + + mock_coordinate_pipeline.assert_called_once() + + async def test_decorator_swallows_exception_from_wrapped_function( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + await sample_raise(mock_worker_ctx, sample_job_run.id) + + mock_send_slack_error.assert_called_once() + + async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_pipeline_manager, + "coordinate_pipeline", + side_effect=RuntimeError("error in coordinate_pipeline"), + ), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + # Exception raised from coordinate_pipeline should trigger rollback, + # and commit will be called when pipeline status is set to running + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + await sample_job(mock_worker_ctx, sample_job_run.id) + + assert mock_send_slack_error.call_count == 2 + + async def test_decorator_swallows_exception_from_job_management_decorator( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + def passthrough_decorator(f): + return f + + with ( + # patch the with_job_management decorator to raise an error + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=passthrough_decorator, + side_effect=ValueError("error in job management decorator"), + ) as mock_with_job_mgmt, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return JobExecutionOutcome.succeeded() + + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + + mock_with_job_mgmt.assert_called_once() + mock_send_slack_error.assert_called_once() + + async def test_decorator_still_returns_result_when_slack_is_unreachable( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + """When Slack is unreachable and the pipeline fails, the result should still be returned. + send_slack_error handles Slack failures internally, so the decorator is unaffected.""" + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + result = await sample_raise(mock_worker_ctx, sample_job_run.id) + + assert result.status == JobStatus.ERRORED + + async def test_decorator_still_returns_result_when_slack_is_unreachable_and_coordination_fails( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + """When pipeline coordination fails and Slack is unreachable, the result should still be returned. + The decorator logs critical for the coordination failure, and send_slack_error handles + Slack failures internally.""" + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_pipeline_manager, + "coordinate_pipeline", + side_effect=RuntimeError("coordination failed"), + ), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + patch("mavedb.worker.lib.decorators.pipeline_management.logger") as mock_logger, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + result = await sample_job(mock_worker_ctx, sample_job_run.id) + + assert result.status == JobStatus.ERRORED + # Decorator logs critical when cleanup coordination also fails, regardless of Slack status + mock_logger.critical.assert_called() + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPipelineManagementDecoratorIntegration: + """Integration tests for the with_pipeline_management decorator.""" + + @pytest.mark.parametrize("initial_status", [PipelineStatus.CREATED, PipelineStatus.RUNNING]) + async def test_decorator_integrated_pipeline_lifecycle_success( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + with_populated_job_data, + sample_pipeline, + initial_status, + ): + # Use an event to control when the job completes + event = asyncio.Event() + dep_event = asyncio.Event() + + # Set initial pipeline status to the parameterized value. + # This allows testing both CREATED and RUNNING start states. + sample_pipeline.status = initial_status + session.commit() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + return JobExecutionOutcome.succeeded() + + @with_pipeline_management + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + await dep_event.wait() # Simulate async work, block until test signals + return JobExecutionOutcome.succeeded() + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + event.set() + await job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + + # Pipeline remains RUNNING after job success, another job was queued. + assert pipeline.status == PipelineStatus.RUNNING + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 # Ensure the next job was queued + + # Simulate execution of next job by running the dependent job. + # Start the job (it will block at event.wait()) + dependent_job_task = asyncio.create_task( + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id) + ) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + dep_event.set() + await dependent_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + # Now that all jobs are complete, the pipeline should be SUCCEEDED + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + # No further jobs should be queued + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + with_populated_job_data, + sample_pipeline, + ): + # Use an event to control when the job completes + event = asyncio.Event() + retry_event = asyncio.Event() + dep_event = asyncio.Event() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise ConnectionError("Simulated network failure for retry") + + @with_pipeline_management + async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): + await retry_event.wait() # Simulate async work, block until test signals + return JobExecutionOutcome.succeeded() + + @with_pipeline_management + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + await dep_event.wait() # Simulate async work, block until test signals + return JobExecutionOutcome.succeeded() + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # ConnectionError is classified as NETWORK_ERROR (retryable), so retry + # logic triggers automatically without patching should_retry. + event.set() + await job_task + # Slack error is deferred-- job is retryable. + + # After failure with retry, status should be QUEUED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + assert job.retry_count == 1 # Ensure it attempted once before retrying + + # Now start the retried job (it will block at retry_event.wait()) + retried_job_task = asyncio.create_task(sample_retried_job(standalone_worker_context, sample_job_run.id)) + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # The pipeline should remain running + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the retried job to complete successfully + await arq_redis.flushdb() + retry_event.set() + await retried_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 # Ensure the next job was queued + + # Simulate execution of next job by running the dependent job. + # Start the job (it will block at event.wait()) + dependent_job_task = asyncio.create_task( + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id) + ) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + dep_event.set() + await dependent_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + # Now that all jobs are complete, the pipeline should be SUCCEEDED + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 # Ensure no further jobs were queued + + async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + with_populated_job_data, + sample_pipeline, + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure") + + # job management handles slack alerting in this context + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete with failure and flush the Redis queue. This failure + # should be swallowed by the pipeline manager + await arq_redis.flushdb() + event.set() + await job_task + + mock_send_slack_job_error.assert_called_once() + + # After failure with no retry, status should be ERRORED (unhandled exception) + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.ERRORED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + + # Pipeline should be marked FAILED after job failure + assert pipeline.status == PipelineStatus.FAILED + + # No further jobs should be queued + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + # Dependent job should transition to skipped since it was never queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED diff --git a/tests/worker/lib/decorators/test_utils.py b/tests/worker/lib/decorators/test_utils.py new file mode 100644 index 000000000..70d5399f4 --- /dev/null +++ b/tests/worker/lib/decorators/test_utils.py @@ -0,0 +1,177 @@ +# ruff : noqa: E402 + +""" +Unit tests for ensure_session_ctx, verifying task-local session isolation. + +ARQ runs multiple jobs concurrently as asyncio Tasks sharing +the same ctx dict. Without task-local sessions, one job closing its session can +invalidate sessions used by other jobs, causing them to silently error and +preventing pipeline coordination. +""" + +import asyncio +from contextlib import contextmanager +from unittest.mock import MagicMock, patch + +import pytest + +pytest.importorskip("arq") + +from mavedb.worker.lib.decorators.utils import _task_db_session, ensure_session_ctx + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +def _mock_session_factory(*sessions): + """Return a context-manager factory that yields sessions in order.""" + it = iter(sessions) + + @contextmanager + def factory(): + yield next(it) + + return factory + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestEnsureSessionCtxConcurrency: + """Concurrent asyncio Tasks must each get their own DB session.""" + + async def test_concurrent_tasks_get_isolated_sessions(self): + """Two Tasks sharing the same ctx dict should each create their own session, + not reuse the other's via ctx['db'].""" + shared_ctx: dict = {} + results: dict = {} + + task_a_entered = asyncio.Event() + task_b_entered = asyncio.Event() + task_a_can_exit = asyncio.Event() + + session_a = MagicMock(name="session_a") + session_b = MagicMock(name="session_b") + + with patch( + "mavedb.worker.lib.decorators.utils.db_session", + _mock_session_factory(session_a, session_b), + ): + + async def task_a(): + with ensure_session_ctx(shared_ctx) as session: + results["a"] = session + task_a_entered.set() + await task_a_can_exit.wait() + + async def task_b(): + await task_a_entered.wait() + with ensure_session_ctx(shared_ctx) as session: + results["b"] = session + task_b_entered.set() + + t_a = asyncio.create_task(task_a()) + t_b = asyncio.create_task(task_b()) + + await task_b_entered.wait() + task_a_can_exit.set() + await asyncio.gather(t_a, t_b) + + assert results["a"] is session_a + assert results["b"] is session_b + assert results["a"] is not results["b"] + + async def test_session_survives_other_task_cleanup(self): + """After Task A exits and cleans up its session, Task B's session + should remain valid and accessible.""" + shared_ctx: dict = {} + results: dict = {} + + task_a_exited = asyncio.Event() + task_b_can_check = asyncio.Event() + + session_a = MagicMock(name="session_a") + session_b = MagicMock(name="session_b") + + with patch( + "mavedb.worker.lib.decorators.utils.db_session", + _mock_session_factory(session_a, session_b), + ): + + async def task_a(): + with ensure_session_ctx(shared_ctx): + pass + task_a_exited.set() + + async def task_b(): + await task_a_exited.wait() + with ensure_session_ctx(shared_ctx) as session: + results["b"] = session + task_b_can_check.set() + + t_a = asyncio.create_task(task_a()) + t_b = asyncio.create_task(task_b()) + await task_b_can_check.wait() + await asyncio.gather(t_a, t_b) + + assert results["b"] is session_b + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestEnsureSessionCtxNesting: + """Nested calls within the same Task should reuse the outer session.""" + + async def test_nested_call_reuses_outer_session(self): + """The inner ensure_session_ctx should return the same session + as the outer one, without creating a new session.""" + ctx: dict = {} + outer_session = MagicMock(name="outer_session") + call_count = 0 + + @contextmanager + def counting_factory(): + nonlocal call_count + call_count += 1 + yield outer_session + + with patch("mavedb.worker.lib.decorators.utils.db_session", counting_factory): + with ensure_session_ctx(ctx) as s1: + with ensure_session_ctx(ctx) as s2: + assert s1 is s2 is outer_session + + assert call_count == 1 + + async def test_context_var_cleaned_up_after_exit(self): + """After the outermost ensure_session_ctx exits, the context var + should be None so a subsequent call creates a fresh session.""" + ctx: dict = {} + session_1 = MagicMock(name="session_1") + session_2 = MagicMock(name="session_2") + + with patch( + "mavedb.worker.lib.decorators.utils.db_session", + _mock_session_factory(session_1, session_2), + ): + with ensure_session_ctx(ctx) as s1: + assert s1 is session_1 + assert _task_db_session.get() is None + + with ensure_session_ctx(ctx) as s2: + assert s2 is session_2 + assert _task_db_session.get() is None + + async def test_context_var_cleaned_up_on_exception(self): + """If an exception occurs inside the context manager, the context + var should still be cleaned up.""" + ctx: dict = {} + session = MagicMock(name="session") + + @contextmanager + def raising_db_session(): + yield session + + with patch("mavedb.worker.lib.decorators.utils.db_session", raising_db_session): + with pytest.raises(RuntimeError): + with ensure_session_ctx(ctx): + raise RuntimeError("boom") + + assert _task_db_session.get() is None diff --git a/tests/worker/lib/managers/test_base_manager.py b/tests/worker/lib/managers/test_base_manager.py new file mode 100644 index 000000000..7f5c3a919 --- /dev/null +++ b/tests/worker/lib/managers/test_base_manager.py @@ -0,0 +1,19 @@ +# ruff: noqa: E402 +import pytest + +pytest.importorskip("arq") + +from mavedb.worker.lib.managers.base_manager import BaseManager + + +@pytest.mark.integration +class TestInitialization: + """Tests for BaseManager initialization.""" + + def test_initialization(self, session, arq_redis): + """Test that BaseManager initializes with db and redis attributes.""" + + manager = BaseManager(db=session, redis=arq_redis) + + assert manager.db == session + assert manager.redis == arq_redis diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py new file mode 100644 index 000000000..980a30f02 --- /dev/null +++ b/tests/worker/lib/managers/test_job_manager.py @@ -0,0 +1,2321 @@ +# ruff: noqa: E402 +""" +Comprehensive test suite for JobManager class. + +Tests cover all aspects of job lifecycle management, pipeline coordination, +error handling, and database interactions. +""" + +import pytest + +pytest.importorskip("arq") + +import re +from unittest.mock import Mock, PropertyMock, patch + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.constants import ( + CANCELLED_JOB_STATUSES, + RETRYABLE_FAILURE_CATEGORIES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, + TERMINAL_PROGRESS_MESSAGES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.transaction_spy import TransactionSpy + +HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( + AttributeError("Mock attribute error"), + KeyError("Mock key error"), + TypeError("Mock type error"), + ValueError("Mock value error"), +) + + +@pytest.mark.integration +class TestJobManagerInitialization: + """Test JobManager initialization and setup.""" + + def test_init_with_valid_job(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful initialization with valid job ID.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + assert manager.db == session + assert manager.job_id == sample_job_run.id + assert manager.pipeline_id == sample_job_run.pipeline_id + + def test_init_with_no_pipeline(self, session, arq_redis, with_populated_job_data, sample_independent_job_run): + """Test initialization with job that has no pipeline.""" + manager = JobManager(session, arq_redis, sample_independent_job_run.id) + + assert manager.job_id == sample_independent_job_run.id + assert manager.pipeline_id is None + + def test_init_with_invalid_job_id(self, session, arq_redis): + """Test initialization failure with non-existent job ID.""" + job_id = 999 # Assuming this ID does not exist + with pytest.raises(DatabaseConnectionError, match=f"Failed to fetch job {job_id}"): + JobManager(session, arq_redis, job_id) + + +@pytest.mark.unit +class TestJobStartUnit: + """Unit tests for job start lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in STARTABLE_JOB_STATUSES], + ) + def test_start_job_raises_job_transition_error_when_managed_job_has_unstartable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + # Set initial job status to an invalid (unstartable) status. + mock_job_run.status = invalid_status + + # Start job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=f"Cannot start job {mock_job_manager.job_id} from status {invalid_status}", + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.start_job() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_start_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run, valid_status + ): + """Test job start failure due to exception during job object manipulation.""" + # Set initial job status to a valid status. Job status must be startable for this test. + mock_job_run.status = valid_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return QUEUED. + def get_or_error(*args): + if args: + raise exception + return valid_status + + # Start job. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises(JobStateError, match="Failed to update job start state"), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.start_job() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == valid_status + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_start_job_success(self, mock_job_manager, mock_job_run, valid_status): + """Test successful job start.""" + # Set initial job status to a valid status. Job status must be startable for this test. + mock_job_run.status = valid_status + + # Start job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.start_job() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.RUNNING + assert mock_job_run.started_at is not None + assert mock_job_run.progress_message == "Job began execution" + + def test_start_job_logs_warning_for_running_recovery(self, mock_job_manager, mock_job_run): + """When start_job is called on a RUNNING job (stale from crashed worker), it logs a warning + and resets the start time rather than raising an error.""" + mock_job_run.status = JobStatus.RUNNING + mock_job_run.started_at = "2025-01-01T00:00:00" + + with ( + TransactionSpy.spy(mock_job_manager.db), + patch("mavedb.worker.lib.managers.job_manager.logger") as mock_logger, + ): + mock_job_manager.start_job() + + mock_logger.warning.assert_called_once() + assert "already RUNNING" in mock_logger.warning.call_args[0][0] + assert mock_job_run.status == JobStatus.RUNNING + assert mock_job_run.started_at is not None + + +@pytest.mark.integration +class TestJobStartIntegration: + """Integration tests for job start lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in STARTABLE_JOB_STATUSES], + ) + def test_job_exception_is_raised_when_job_has_invalid_status( + self, session, arq_redis, with_populated_job_data, sample_job_run, invalid_status + ): + """Test job start failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to invalid status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = invalid_status + session.commit() + + # Start job. Verify a JobTransitionError is raised due to the previously set invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Although the job might still set some attributes before the error is raised, the exception + # indicates to the caller that the job was not started successfully and the transaction should be rolled back. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=f"Cannot start job {sample_job_run.id} from status {invalid_status.value}", + ), + ): + manager.start_job() + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status): + """Test successful job start.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to invalid status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = valid_status + session.commit() + + # Start job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.start_job() + + # Commit pending changes made by start job. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + assert job.started_at is not None + assert job.progress_message == "Job began execution" + + +@pytest.mark.unit +class TestJobCompletionUnit: + """Unit tests for job completion lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in TERMINAL_JOB_STATUSES], + ) + def test_complete_job_raises_job_transition_error_when_managed_job_has_non_terminal_status( + self, mock_job_manager, mock_job_run, invalid_status + ): + # Set initial job status to an invalid (non-terminal) status. + mock_job_run.status = invalid_status + + # Complete job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape( + f"Cannot complete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.complete_job(status=invalid_status, result=JobExecutionOutcome.succeeded()) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_complete_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, mock_job_run, exception, valid_status + ): + """Test job completion failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting on job status, raise exception. If only accessing, return whatever the mock + # objects original status was (starting job status doesn't matter for this test). + base_status = mock_job_run.status + + def get_or_error(*args): + if args: + raise exception + return base_status + + # Complete job. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises(JobStateError, match="Failed to update job completion state"), + TransactionSpy.spy(mock_job_manager.db), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.complete_job(status=valid_status, result=JobExecutionOutcome.succeeded()) + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == base_status + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_job_manager, mock_job_run): + """Test job completion sets default failure category when job failed without error.""" + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job( + status=JobStatus.FAILED, result=JobExecutionOutcome.failed(reason="test failure") + ) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == { + "result": { + "status": "failed", + "data": {}, + "error": "test failure", + "exception_details": None, + } + } + assert mock_job_run.error_message == "test failure" + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + @pytest.mark.parametrize( + "exception", + [ValueError("Test error"), None], + ) + def test_complete_job_success(self, mock_job_manager, valid_status, exception, mock_job_run): + """Test successful job completion.""" + + # Build the appropriate JobExecutionOutcome based on whether an exception is present. + if exception: + outcome = JobExecutionOutcome.errored(exception=exception, data={"output": "test"}) + else: + outcome = JobExecutionOutcome.succeeded(data={"output": "test"}) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=valid_status, result=outcome) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == valid_status + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_["result"] == { + "status": outcome.status.value, + "data": {"output": "test"}, + "error": outcome.error, + "exception_details": format_raised_exception_info_as_dict(exception) if exception else None, + } + + # If an exception was provided, verify error fields are set appropriately. + if exception: + assert mock_job_run.error_message == str(exception) + assert mock_job_run.error_traceback is not None + + # failure_category is only set for FAILED/ERRORED statuses + if valid_status in (JobStatus.FAILED, JobStatus.ERRORED): + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + else: + assert mock_job_run.failure_category is None + + else: + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + + # Proper handling of failure category only applies to FAILED status. See + # test_complete_job_sets_default_failure_category_when_job_failed for that case. + + def test_complete_job_uses_explicit_failure_category_from_outcome(self, mock_job_manager, mock_job_run): + """Test that an explicit failure_category on the outcome takes priority.""" + result = JobExecutionOutcome.failed(reason="rate limited", failure_category=FailureCategory.NETWORK_ERROR) + + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.FAILED, result=result) + + assert mock_job_run.failure_category == FailureCategory.NETWORK_ERROR + + def test_complete_job_classifies_exception_when_no_explicit_category(self, mock_job_manager, mock_job_run): + """Test that classify_exception is used when outcome has no explicit category but has an exception.""" + result = JobExecutionOutcome.errored(exception=ConnectionError("connection refused")) + + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.ERRORED, result=result) + + assert mock_job_run.failure_category == FailureCategory.NETWORK_ERROR + + def test_complete_job_classifies_timeout_exception(self, mock_job_manager, mock_job_run): + """Test that TimeoutError is classified as TIMEOUT.""" + result = JobExecutionOutcome.errored(exception=TimeoutError("timed out")) + + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.ERRORED, result=result) + + assert mock_job_run.failure_category == FailureCategory.TIMEOUT + + def test_complete_job_explicit_category_overrides_exception_classification(self, mock_job_manager, mock_job_run): + """Test that explicit failure_category takes priority over exception classification.""" + result = JobExecutionOutcome.errored( + exception=ConnectionError("conn refused"), + failure_category=FailureCategory.SERVICE_UNAVAILABLE, + ) + + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.ERRORED, result=result) + + assert mock_job_run.failure_category == FailureCategory.SERVICE_UNAVAILABLE + + @pytest.mark.parametrize( + "status, expected_message", + list(TERMINAL_PROGRESS_MESSAGES.items()), + ) + def test_complete_job_sets_terminal_progress_message( + self, mock_job_manager, mock_job_run, status, expected_message + ): + """complete_job sets a generic terminal progress_message for all terminal statuses.""" + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=status, result=result) + assert mock_job_run.progress_message == expected_message + + @pytest.mark.parametrize("status", [JobStatus.CANCELLED, JobStatus.SKIPPED]) + def test_complete_job_clears_numeric_progress_for_cancelled_and_skipped( + self, mock_job_manager, mock_job_run, status + ): + """CANCELLED/SKIPPED jobs null out progress_current/total since they never completed.""" + mock_job_run.progress_current = 42 + mock_job_run.progress_total = 100 + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=status, result=result) + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + + @pytest.mark.parametrize("status", [JobStatus.FAILED, JobStatus.ERRORED]) + def test_complete_job_preserves_numeric_progress_for_failed_and_errored( + self, mock_job_manager, mock_job_run, status + ): + """FAILED/ERRORED jobs keep progress_current/total to show how far the job reached.""" + mock_job_run.progress_current = 42 + mock_job_run.progress_total = 100 + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=status, result=result) + assert mock_job_run.progress_current == 42 + assert mock_job_run.progress_total == 100 + + def test_complete_job_pins_progress_current_to_total_on_success(self, mock_job_manager, mock_job_run): + """SUCCEEDED jobs advance progress_current to match progress_total.""" + mock_job_run.progress_current = 75 + mock_job_run.progress_total = 100 + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.SUCCEEDED, result=result) + assert mock_job_run.progress_current == 100 + + def test_complete_job_success_with_no_progress_total_does_not_set_current(self, mock_job_manager, mock_job_run): + """SUCCEEDED jobs with no progress_total leave progress_current untouched.""" + mock_job_run.progress_current = None + mock_job_run.progress_total = None + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.SUCCEEDED, result=result) + assert mock_job_run.progress_current is None + + +@pytest.mark.integration +class TestJobCompletionIntegration: + """Test job completion lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in TERMINAL_JOB_STATUSES], + ) + def test_job_exception_is_raised_when_job_has_invalid_status( + self, session, arq_redis, with_populated_job_data, sample_job_run, invalid_status + ): + """Test job completion failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Although the job might still set some attributes before the error is raised, the exception + # indicates to the caller that the job was not completed successfully and the transaction should be rolled back. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=re.escape( + f"Cannot complete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ), + ), + ): + manager.complete_job(status=invalid_status, result=JobExecutionOutcome.succeeded(data={"output": "test"})) + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_job_updated_successfully_without_error( + self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status + ): + """Test successful job completion.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.complete_job(status=valid_status, result=JobExecutionOutcome.succeeded(data={"output": "test"})) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == valid_status + assert job.finished_at is not None + assert job.metadata_ == { + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} + } + assert job.error_message is None + assert job.error_traceback is None + + # For cases where no error is provided, verify failure category is set appropriately based + # on status. We automatically set UNKNOWN for FAILED/ERRORED status if no error is given. + if valid_status in (JobStatus.FAILED, JobStatus.ERRORED): + assert job.failure_category == FailureCategory.UNKNOWN + else: + assert job.failure_category is None + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_job_updated_successfully_with_error( + self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status + ): + """Test successful job completion.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + test_exception = ValueError("Test error") + with TransactionSpy.spy(manager.db): + manager.complete_job( + status=valid_status, + result=JobExecutionOutcome.errored(exception=test_exception, data={"output": "test"}), + ) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == valid_status + assert job.finished_at is not None + assert job.metadata_ == { + "result": { + "status": "errored", + "data": {"output": "test"}, + "error": "Test error", + "exception_details": format_raised_exception_info_as_dict(test_exception), + } + } + assert job.error_message == "Test error" + assert job.error_traceback is not None + + # failure_category is only set for FAILED/ERRORED statuses + if valid_status in (JobStatus.FAILED, JobStatus.ERRORED): + assert job.failure_category == FailureCategory.UNKNOWN + else: + assert job.failure_category is None + + +@pytest.mark.unit +class TestJobFailureUnit: + """Unit tests for job failure lifecycle management.""" + + def test_fail_job_success(self, mock_job_manager, mock_job_run): + """Test that fail_job calls complete_job with status=JobStatus.FAILED.""" + + # Fail job with a controlled failure reason. Spy on transaction to ensure nothing is + # flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.failed(reason="Test exception", data={"output": "test"}) + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.fail_job(result=result) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with( + status=JobStatus.FAILED, + result=result, + ) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == { + "result": { + "status": "failed", + "data": {"output": "test"}, + "error": "Test exception", + "exception_details": None, + } + } + assert mock_job_run.error_message == "Test exception" + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + +class TestJobFailureIntegration: + """Test job failure lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful job failure.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Fail job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.fail_job(result=JobExecutionOutcome.failed(reason="Test error")) + + # Commit pending changes made by fail job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.FAILED + assert job.finished_at is not None + assert job.metadata_ == { + "result": {"status": "failed", "data": {}, "error": "Test error", "exception_details": None} + } + assert job.error_message == "Test error" + assert job.error_traceback is None + assert job.failure_category == FailureCategory.UNKNOWN + + +@pytest.mark.unit +class TestJobSuccessUnit: + """Unit tests for job success lifecycle management.""" + + def test_succeed_job_success(self, mock_job_manager, mock_job_run): + """Test that succeed_job calls complete_job with status=JobStatus.SUCCEEDED.""" + + # Succeed job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.succeeded(data={"output": "test"}) + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.succeed_job(result=result) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.SUCCEEDED, result=result) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.SUCCEEDED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == { + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} + } + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +class TestJobSuccessIntegration: + """Test job success lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful job succeeding.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.succeed_job(result=JobExecutionOutcome.succeeded(data={"output": "test"})) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.SUCCEEDED + assert job.finished_at is not None + assert job.metadata_ == { + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} + } + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestJobCancellationUnit: + """Unit tests for job cancellation lifecycle management.""" + + def test_cancel_job_success(self, mock_job_manager, mock_job_run): + """Test that cancel_job calls complete_job with status=JobStatus.CANCELLED.""" + + # Cancel job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome(status=JobStatus.CANCELLED, data={"output": "test"}, error=None, exception=None) + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.cancel_job(result=result) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.CANCELLED, result=result) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.CANCELLED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == { + "result": {"status": "cancelled", "data": {"output": "test"}, "error": None, "exception_details": None} + } + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +class TestJobCancellationIntegration: + """Test job cancellation lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful job cancellation.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.cancel_job( + result=JobExecutionOutcome( + status=JobStatus.CANCELLED, data={"output": "test"}, error=None, exception=None + ) + ) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.CANCELLED + assert job.finished_at is not None + assert job.metadata_ == { + "result": {"status": "cancelled", "data": {"output": "test"}, "error": None, "exception_details": None} + } + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestJobSkipUnit: + """Unit tests for job skip lifecycle management.""" + + def test_skip_job_success(self, mock_job_manager, mock_job_run): + """Test that skip_job calls complete_job with status=JobStatus.SKIPPED.""" + + # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.skipped(data={"output": "test"}) + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.skip_job(result=result) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.SKIPPED, result=result) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.SKIPPED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == { + "result": {"status": "skipped", "data": {"output": "test"}, "error": None, "exception_details": None} + } + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +@pytest.mark.integration +class TestJobSkipIntegration: + """Test job skip lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful job skipping.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.skip_job(result=JobExecutionOutcome.skipped(data={"output": "test"})) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.SKIPPED + assert job.finished_at is not None + assert job.metadata_ == { + "result": {"status": "skipped", "data": {"output": "test"}, "error": None, "exception_details": None} + } + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestPrepareRetryUnit: + """Unit tests for job retry lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], + ) + @pytest.mark.asyncio + async def test_prepare_retry_raises_job_transition_error_when_managed_job_has_unretryable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + # Set initial job status to an invalid (unretryable) status. + mock_job_run.status = invalid_status + + # Preprare retry job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape(f"Cannot retry job {mock_job_manager.job_id} due to invalid state ({invalid_status})"), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + await mock_job_manager.prepare_retry() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.retry_count == 0 + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + @pytest.mark.asyncio + async def test_prepare_retry_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job prepare retry failure due to exception during job object manipulation.""" + # Set initial job status to FAILED. Job status must be retryable for this test. + initial_status = JobStatus.FAILED + mock_job_run.status = initial_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return FAILED. + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare retry. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job retry state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + await mock_job_manager.prepare_retry() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.retry_count == 0 + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + + @pytest.mark.asyncio + async def test_prepare_retry_success(self, mock_job_manager, mock_job_run): + """Test successful job prepare retry.""" + # Set initial job status to FAILED. Job status must be retryable for this test. + mock_job_run.status = JobStatus.FAILED + + # Prepare retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Mock the flag_modified function: mock objects don't have _sa_instance_state attribute required by SQLAlchemy + # funcs and it's easier to mock the functions that manipulate the state than to fully mock the state itself. + with ( + patch("mavedb.worker.lib.managers.job_manager.flag_modified") as mock_flag_modified, + TransactionSpy.spy(mock_job_manager.db), + ): + await mock_job_manager.prepare_retry() + + # Verify flag_modified was called for metadata_ field. + mock_flag_modified.assert_called_once_with(mock_job_run, "metadata_") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.retry_count == 1 + assert mock_job_run.progress_message == "Job retry prepared" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_["retry_history"] is not None + assert mock_job_run.started_at is None + assert mock_job_run.metadata_.get("result") is None + + +@pytest.mark.integration +class TestPrepareRetryIntegration: + """Test job retry lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], + ) + @pytest.mark.asyncio + async def test_prepare_retry_failed_due_to_invalid_status( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): + """Test job retry failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to non-failed state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Prepare retry job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(manager.db), + pytest.raises(JobTransitionError, match=f"Cannot retry job {job.id} due to invalid state \({job.status}\)"), + ): + await manager.prepare_retry() + + @pytest.mark.asyncio + async def test_prepare_retry_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful job retry.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to FAILED status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + session.commit() + + # Prepare retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + await manager.prepare_retry() + + # Commit pending changes made by start job. + session.commit() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + assert job.progress_message == "Job retry prepared" + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + assert job.finished_at is None + assert job.metadata_["retry_history"] is not None + + +@pytest.mark.unit +class TestPrepareQueueUnit: + """Unit tests for job prepare for queue lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status != JobStatus.PENDING], + ) + def test_prepare_queue_raises_job_transition_error_when_managed_job_has_unretryable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + """Test job prepare queue failure due to invalid job status.""" + # Set initial job status to an invalid (non-pending) status. + mock_job_run.status = invalid_status + + # Prepare queue job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape(f"Cannot queue job {mock_job_manager.job_id} from status {invalid_status}"), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_queue() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_prepare_queue_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job prepare queue failure due to exception during job object manipulation.""" + # Set initial job status to PENDING. Job status must be valid for this test. + initial_status = JobStatus.PENDING + mock_job_run.status = initial_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return FAILED. + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job queue state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.prepare_queue() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.progress_message is None + + def test_prepare_queue_success(self, mock_job_manager, mock_job_run): + """Test successful job prepare queue.""" + # Set initial job status to PENDING. Job status must be valid for this test. + mock_job_run.status = JobStatus.PENDING + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Mock the flag_modified function: mock objects don't have _sa_instance_state attribute required by SQLAlchemy + # funcs and it's easier to mock the functions that manipulate the state than to fully mock the state itself. + with ( + patch.object(mock_job_manager, "get_job", return_value=mock_job_run), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_queue() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.QUEUED + assert mock_job_run.progress_message == "Job queued for execution" + + +@pytest.mark.integration +class TestPrepareQueue: + """Test job prepare for queue lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status != JobStatus.PENDING], + ) + def test_prepare_queue_failed_due_to_invalid_status( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): + """Test job prepare for queue failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to invalid state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.flush() + + # Prepare queue job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=f"Cannot queue job {job.id} from status {job.status}", + ), + ): + manager.prepare_queue() + + def test_prepare_queue_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful job prepare for queue.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Sample run should be in PENDING state from fixture setup, but verify to be sure. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Sample job run must be in PENDING state for this test." + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + assert job.progress_message == "Job queued for execution" + + +@pytest.mark.unit +class TestResetJobUnit: + """Unit tests for job reset lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_reset_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job reset job failure due to exception during job object manipulation.""" + + # Trigger: If any attribute setting occurs on job, raise exception. Otherwise return FAILED. + # Set initial job status to FAILED. Job status is unimportant for this test (all statuses are resettable). + initial_status = JobStatus.FAILED + mock_job_run.status = initial_status + + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to reset job state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.reset_job() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.started_at is None + assert mock_job_run.finished_at is None + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.retry_count == 0 + assert mock_job_run.metadata_ == {} + + def test_reset_job_success(self, mock_job_manager, mock_job_run): + """Test successful job reset.""" + # Set initial job status to provided status. All statuses are resettable, so the actual status is not important. + mock_job_run.status = JobStatus.FAILED + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.reset_job() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.started_at is None + assert mock_job_run.finished_at is None + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.retry_count == 0 + assert mock_job_run.metadata_ == {} + + +@pytest.mark.integration +class TestResetJobIntegration: + """Test job reset lifecycle management.""" + + def test_reset_job_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful job reset.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to a non-pending status and set various fields to non-default values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.started_at = "2023-12-31T23:59:59Z" + job.finished_at = "2024-01-01T00:00:00Z" + job.progress_current = 50 + job.progress_total = 100 + job.progress_message = "Halfway done" + job.error_message = "Test error message" + job.error_traceback = "Test error traceback" + job.failure_category = FailureCategory.UNKNOWN + job.retry_count = 2 + job.metadata_ = {"result": {}, "retry_history": [{"attempt": 1}, {"attempt": 2}]} + session.commit() + + # Reset job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.reset_job() + + # Commit pending changes made by reset job. + session.commit() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.progress_current is None + assert job.progress_total is None + assert job.progress_message is None + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + assert job.started_at is None + assert job.finished_at is None + assert job.retry_count == 0 + assert job.metadata_.get("retry_history") is None + + +@pytest.mark.unit +class TestJobProgressUpdateUnit: + """Unit tests for job progress update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_update_progress_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress update failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting occurs on job progress, raise exception. If only access, return initial progress. + initial_progress_current = mock_job_run.progress_current + + def get_or_error(*args): + if args: + raise exception + return initial_progress_current + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job progress", + ), + ): + type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) + mock_job_manager.update_progress(50, 100, "Halfway done", commit=False) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + + def test_update_progress_success(self, mock_job_manager, mock_job_run): + """Test successful job progress update.""" + + # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_progress(50, 100, "Halfway done", commit=False) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 50 + assert mock_job_run.progress_total == 100 + assert mock_job_run.progress_message == "Halfway done" + + def test_update_progress_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress update without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_progress(75, 200, commit=False) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 75 + assert mock_job_run.progress_total == 200 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressUpdateIntegration: + """Test job progress update lifecycle management.""" + + def test_update_progress_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful progress update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to None to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = None + job.progress_total = None + job.progress_message = None + session.commit() + + # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_progress(50, 100, "Halfway done", commit=False) + + # Commit pending changes made by update progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 50 + assert job.progress_total == 100 + assert job.progress_message == "Halfway done" + + def test_update_progress_success_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): + """Test successful progress update without message.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to None to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = None + job.progress_total = None + job.progress_message = "Old message" + session.commit() + + # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_progress(75, 200, commit=False) + + # Commit pending changes made by update progress. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 75 + assert job.progress_total == 200 + assert job.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.unit +class TestJobProgressStatusUpdateUnit: + """Unit tests for job progress status update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_update_status_message_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job status message update failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting occurs on job progress message, raise exception. If only access, return initial message. + initial_progress_message = mock_job_run.progress_message + + def get_or_error(*args): + if args: + raise exception + return initial_progress_message + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job status message", + ), + ): + type(mock_job_run).progress_message = PropertyMock(side_effect=get_or_error) + mock_job_manager.update_status_message("New status message", commit=False) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_message == initial_progress_message + + def test_update_status_message_success(self, mock_job_manager, mock_job_run): + """Test successful job status message update.""" + + # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_status_message("New status message", commit=False) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_message == "New status message" + + +@pytest.mark.integration +class TestJobProgressStatusUpdate: + """Test job progress status update lifecycle management.""" + + def test_update_status_message_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful status message update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress message to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_message = "Old status message" + session.commit() + + # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_status_message("New status message", commit=False) + + # Commit pending changes made by update status message. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_message == "New status message" + + +@pytest.mark.unit +class TestJobProgressIncrementationUnit: + """Unit tests for job progress incrementation lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_increment_progress_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress incrementation failure due to exception during job object manipulation.""" + # Trigger: If any attribute access occurs on job progress, raise exception. If no access, return initial progress. + initial_progress_current = mock_job_run.progress_current + + def get_or_error(*args): + if args: + raise exception + return initial_progress_current + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to increment job progress", + ), + ): + type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) + mock_job_manager.increment_progress(10, "Incrementing progress", commit=False) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_current is None + assert mock_job_run.progress_message is None + + def test_increment_progress_success(self, mock_job_manager, mock_job_run): + """Test successful job progress incrementation.""" + + # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.increment_progress(10, "Incrementing progress", commit=False) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 10 + assert mock_job_run.progress_message == "Incrementing progress" + + def test_increment_progress_success_old_message_is_not_overwritten_when_none_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress incrementation without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Increment progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.increment_progress(15, commit=False) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 15 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressIncrementationIntegration: + """Test job progress incrementation lifecycle management.""" + + @pytest.mark.parametrize( + "msg", + [None, "Incremented progress successfully"], + ) + def test_increment_progress_success(self, session, arq_redis, with_populated_job_data, sample_job_run, msg): + """Test successful progress incrementation.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + job.progress_message = "Test incrementation message" + session.commit() + + # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(10, msg, commit=False) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 10 + assert job.progress_total == 100 + assert job.progress_message == ( + msg if msg else "Test incrementation message" + ) # Message should remain unchanged if None + + def test_increment_progress_success_multiple_times( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): + """Test successful progress incrementation multiple times.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + session.commit() + + # Increment progress multiple times. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(20, commit=False) + manager.increment_progress(30, commit=False) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 50 + assert job.progress_total == 100 + + def test_increment_progress_success_exceeding_total( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): + """Test successful progress incrementation exceeding total.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + session.commit() + + # Increment progress exceeding total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(150, commit=False) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 150 + assert job.progress_total == 100 + + +class TestJobProgressTotalUpdateUnit: + """Unit tests for job progress total update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_set_progress_total_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress total update failure due to exception during job object manipulation.""" + # Trigger: If any attribute access occurs on job progress total, raise exception. If no access, return initial total. + initial_progress_total = mock_job_run.progress_total + + def get_or_error(*args): + if args: + raise exception + return initial_progress_total + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job progress total state", + ), + ): + type(mock_job_run).progress_total = PropertyMock(side_effect=get_or_error) + mock_job_manager.set_progress_total(200, commit=False) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_total == initial_progress_total + + def test_set_progress_total_success(self, mock_job_manager, mock_job_run): + """Test successful job progress total update.""" + + # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.set_progress_total(200, commit=False) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_total == 200 + + def test_set_progress_total_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress total update without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Set progress total without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.set_progress_total(300, commit=False) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_total == 300 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressTotalUpdateIntegration: + """Test job progress total update lifecycle management.""" + + def test_set_progress_total_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful progress total update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress total and message to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_total = 100 + job.progress_message = "Ready to start" + session.commit() + + # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.set_progress_total(200, message="Updated total progress", commit=False) + + # Commit pending changes made by set progress total. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_total == 200 + assert job.progress_message == "Updated total progress" + + +@pytest.mark.unit +class TestJobIsCancelledUnit: + """Unit tests for job is_cancelled lifecycle management.""" + + @pytest.mark.parametrize( + "status,expected_result", + [(status, status in CANCELLED_JOB_STATUSES) for status in JobStatus._member_map_.values()], + ) + def test_is_cancelled_success_not_cancelled(self, mock_job_manager, mock_job_run, status, expected_result): + """Test successful is_cancelled check when not cancelled.""" + # Set initial job status to a non-cancelled status. + mock_job_run.status = status + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.is_cancelled() + + assert result == expected_result + + +@pytest.mark.integration +class TestJobIsCancelledIntegration: + """Test job is_cancelled lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status in CANCELLED_JOB_STATUSES], + ) + def test_is_cancelled_success_cancelled( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): + """Test successful is_cancelled check when cancelled.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Mark the job as cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.is_cancelled() + + # Verify the job is marked as cancelled. This method requires no persistance. + assert result is True + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status not in CANCELLED_JOB_STATUSES], + ) + def test_is_cancelled_success_not_cancelled( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): + """Test successful is_cancelled check when not cancelled.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Mark the job as not cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.is_cancelled() + + # Verify the job is not marked as cancelled. This method requires no persistance. + assert result is False + + +@pytest.mark.unit +class TestJobShouldRetryUnit: + """Unit tests for job should_retry lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + [ + pytest.param( + exc, + marks=pytest.mark.skip( + reason=( + "AttributeError is not propagated by mock objects: " + "Python's attribute lookup swallows AttributeError and mock returns a new mock instead. " + "See unittest.mock docs for details." + ) + ) + if isinstance(exc, AttributeError) + else (), + # ^ Only mark AttributeError for skip, others run as normal + ) + for exc in HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION + ], + ) + def test_should_retry_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """ + Test should_retry check failure due to exception during job object manipulation. + + AttributeError is skipped in this test because Python's mock machinery swallows + AttributeError raised by property getters and instead returns a new mock, so the + exception is not propagated as expected. See unittest.mock documentation for details. + ^^ or something like that... don't ask me to explain why. + """ + + # Trigger: If any attribute access occurs on job, raise exception. + def get_or_error(*args): + raise exception + + # Remove any instance attribute that could shadow the property + if "status" in mock_job_run.__dict__: + del mock_job_run.__dict__["status"] + + # In cases where we want to raise on attribute access, we need to override the entire property + # or else AttributeError won't be raised due to some internal Mock nuances I don't understand. + type(mock_job_run).status = property(get_or_error) + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to check retry eligibility state", + ), + ): + mock_job_manager.should_retry() + + @pytest.mark.parametrize( + "status,expected_result", + [ + (JobStatus.SUCCEEDED, False), + (JobStatus.CANCELLED, False), + (JobStatus.QUEUED, False), + (JobStatus.RUNNING, False), + (JobStatus.PENDING, False), + ], + ) + def test_should_retry_success_for_non_failed_statuses( + self, mock_job_manager, mock_job_run, status, expected_result + ): + """Test successful should_retry check.""" + # Set initial job status to provided status. + mock_job_run.status = status + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.should_retry() + + # Verify the result matches expected. + assert result == expected_result + + @pytest.mark.parametrize( + "retry_count,max_retries,failure_category,expected_result", + ( + [(0, 3, cat, True) for cat in RETRYABLE_FAILURE_CATEGORIES] # Initial retry, + + [(2, 3, RETRYABLE_FAILURE_CATEGORIES[0], True)] # Within retry limit (barely) + + [(3, 3, RETRYABLE_FAILURE_CATEGORIES[0], False)] # Exceeded retries + + [ + (1, 3, cat, False) + for cat in FailureCategory._member_map_.values() + if cat not in RETRYABLE_FAILURE_CATEGORIES + ] # Non-retryable failure categories + ), + ) + def test_should_retry_success_for_failed_status( + self, mock_job_manager, mock_job_run, retry_count, max_retries, failure_category, expected_result + ): + """Test successful should_retry check for failed status.""" + # Set initial job status to FAILED with provided parameters. + mock_job_run.status = JobStatus.FAILED + mock_job_run.retry_count = retry_count + mock_job_run.max_retries = max_retries + mock_job_run.failure_category = failure_category + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.should_retry() + + # Verify the result matches expected. + assert result == expected_result + + +@pytest.mark.integration +class TestJobShouldRetryIntegration: + """Test job should_retry lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status not in (JobStatus.FAILED, JobStatus.ERRORED)], + ) + def test_should_retry_success_non_failed_jobs_should_not_retry( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): + """Test successful should_retry check (only jobs in failure states may retry).""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to non-failed state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success_exceeded_retry_attempts_should_not_retry( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): + """Test successful should_retry check with no retry attempts left.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with no retries left + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 3 + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success_failure_category_is_not_retryable( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): + """Test successful should_retry check with non-retryable failure category.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with non-retryable failure category + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 1 + job.failure_category = FailureCategory.UNKNOWN + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful should_retry check with retryable failure category.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with retryable failure category + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 1 + job.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should retry. This method requires no persistance. + assert result is True + + +@pytest.mark.unit +class TestGetJobUnit: + """Unit tests for job retrieval.""" + + def test_get_job_wraps_database_connection_error_when_encounters_sqlalchemy_error(self, mock_job_run): + """Test job retrieval failure during job fetch.""" + + # Prepare mock JobManager with mocked DB session that will raise SQLAlchemyError on query. + # We don't use the default fixture here since it usually wraps this function. + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + manager = object.__new__(JobManager) + manager.db = mock_db + manager.redis = mock_redis + manager.job_id = mock_job_run.id + manager.context = {} + + with ( + TransactionSpy.mock_database_execution_failure(manager.db), + pytest.raises(DatabaseConnectionError, match=f"Failed to fetch job {mock_job_run.id}"), + ): + manager.get_job() + + +@pytest.mark.integration +class TestGetJobIntegration: + """Test job retrieval.""" + + def test_get_job_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test successful job retrieval.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Retrieve job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + job = manager.get_job() + + # Verify the retrieved job matches expected. + assert job.id == sample_job_run.id + assert job.status == JobStatus.PENDING + + def test_get_job_raises_job_not_found_error_when_job_does_not_exist( + self, session, arq_redis, with_populated_job_data + ): + """Test job retrieval failure when job does not exist.""" + with pytest.raises(DatabaseConnectionError, match="Failed to fetch job 9999"), TransactionSpy.spy(session): + JobManager(session, arq_redis, job_id=9999) # Non-existent job ID + + +@pytest.mark.integration +class TestJobManagerJob: + """Test overall job lifecycle management.""" + + def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test full job lifecycle from start to completion.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING, "Job status should be RUNNING after starting" + assert job.started_at is not None, "Job started_at should be set after starting" + + # Set initial progress + with TransactionSpy.spy(manager.db): + manager.update_progress(0, 100, "Job started", commit=False) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 0 + assert job.progress_total == 100 + assert job.progress_message == "Job started" + + # Update status message + with TransactionSpy.spy(manager.db): + manager.update_status_message("Began processing data", commit=False) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_message == "Began processing data" + + # Set progress total + with TransactionSpy.spy(manager.db): + manager.set_progress_total(200, "Set total work units", commit=False) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_total == 200 + assert job.progress_message == "Set total work units" + + # Increment progress + with TransactionSpy.spy(manager.db): + manager.increment_progress(100, "Halfway done", commit=False) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 100 + assert job.progress_message == "Halfway done" + + # Increment progress again + with TransactionSpy.spy(manager.db): + manager.increment_progress(100, "All done", commit=False) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 200 + assert job.progress_message == "All done" + + # Complete job + with TransactionSpy.spy(manager.db): + manager.succeed_job(result=JobExecutionOutcome.succeeded(data={"output": "test"})) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + assert job.finished_at is not None + + # Verify job is not cancelled and should not retry + assert manager.is_cancelled() is False + assert manager.should_retry() is False + + # Verify final job state + final_job = manager.get_job() + assert final_job.status == JobStatus.SUCCEEDED + assert final_job.progress_current == 200 + assert final_job.progress_total == 200 + + def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test full job lifecycle for a cancelled job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Cancel job + with TransactionSpy.spy(manager.db): + manager.cancel_job( + result=JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={"reason": "User requested cancellation"}, + error="User requested cancellation", + exception=None, + ) + ) + session.flush() + + # Verify job is cancelled + assert manager.is_cancelled() is True + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + assert job.finished_at is not None + + def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test full job lifecycle for a skipped job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Skip job + with TransactionSpy.spy(manager.db): + manager.skip_job(result=JobExecutionOutcome.skipped(data={"reason": "Job not needed"})) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + assert job.finished_at is not None + + def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test full job lifecycle for a failed job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job(result=JobExecutionOutcome.failed(reason="An error occurred")) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.finished_at is not None + assert job.error_message == "An error occurred" + assert job.error_traceback is None + + @pytest.mark.asyncio + async def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test full job lifecycle for a retried job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job(result=JobExecutionOutcome.failed(reason="Temporary error")) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # TODO: Use some failure method added later to set failure category to retryable during the + # call to fail_job above. For now, we manually set it here. + job.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] + session.commit() + + # Should retry + assert manager.should_retry() is True + + # Prepare retry + with TransactionSpy.spy(manager.db): + await manager.prepare_retry() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + + @pytest.mark.asyncio + async def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): + """Test full job lifecycle for a reset job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job(result=JobExecutionOutcome.failed(reason="Some error")) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Retry job + with TransactionSpy.spy(manager.db): + await manager.prepare_retry() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + + # Queeue job again + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job again + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job again + with TransactionSpy.spy(manager.db): + manager.fail_job(result=JobExecutionOutcome.failed(reason="Another error")) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.retry_count == 1 + + # Reset job + with TransactionSpy.spy(manager.db): + manager.reset_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.progress_current is None + assert job.progress_total is None + assert job.retry_count == 0 diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py new file mode 100644 index 000000000..3028a31d2 --- /dev/null +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -0,0 +1,4410 @@ +# ruff: noqa: E402 +""" +Comprehensive test suite for PipelineManager class. + +Tests cover all aspects of pipeline coordination, job dependency management, +status updates, error handling, and database interactions including new methods +for pipeline monitoring, job retry management, and restart functionality. +""" + +import pytest + +pytest.importorskip("arq") + +import datetime +from unittest.mock import Mock, PropertyMock, patch + +from arq import ArqRedis +from arq.jobs import Job as ArqJob +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.managers import JobManager +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + CANCELLED_PIPELINE_STATUSES, + RUNNING_PIPELINE_STATUSES, + TERMINAL_PIPELINE_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + PipelineCoordinationError, + PipelineStateError, + PipelineTransitionError, +) +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.utils import arq_job_id +from tests.helpers.transaction_spy import TransactionSpy + +HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( + AttributeError("Mock attribute error"), + KeyError("Mock key error"), + TypeError("Mock type error"), + ValueError("Mock value error"), +) + + +@pytest.mark.integration +class TestPipelineManagerInitialization: + """Test PipelineManager initialization and setup.""" + + def test_init_with_valid_pipeline(self, session, arq_redis, with_populated_job_data, sample_pipeline): + """Test successful initialization with valid pipeline ID.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + assert manager.db == session + assert manager.redis == arq_redis + assert manager.pipeline_id == sample_pipeline.id + + def test_init_with_invalid_pipeline_id(self, session, arq_redis): + """Test initialization failure with non-existent pipeline ID.""" + pipeline_id = 999 # Assuming this ID does not exist + with pytest.raises(DatabaseConnectionError, match=f"Failed to get pipeline {pipeline_id}"): + PipelineManager(session, arq_redis, pipeline_id) + + def test_init_with_database_error(self, session, arq_redis, with_populated_job_data, sample_pipeline): + """Test initialization failure with database connection error.""" + pipeline_id = sample_pipeline.id + + with ( + TransactionSpy.mock_database_execution_failure(session), + pytest.raises(DatabaseConnectionError, match=f"Failed to get pipeline {pipeline_id}"), + ): + PipelineManager(session, arq_redis, pipeline_id) + + +@pytest.mark.unit +class TestStartPipelineUnit: + """Unit tests for starting a pipeline.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "coordinate_after_start", + [True, False], + ) + async def test_start_pipeline_successful(self, mock_pipeline_manager, coordinate_after_start): + """Test successful pipeline start from CREATED state.""" + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline", + return_value=Mock(spec=Pipeline, status=PipelineStatus.CREATED), + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.start_pipeline(coordinate=coordinate_after_start) + + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + if coordinate_after_start: + mock_coordinate.assert_called_once() + else: + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "current_status", + [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], + ) + async def test_start_pipeline_non_created_state(self, mock_pipeline_manager, current_status): + """Test pipeline start failure when not in CREATED state.""" + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline_status", + return_value=current_status, + ), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in state {current_status} and may not be started", + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.start_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + +@pytest.mark.integration +class TestStartPipelineIntegration: + """Integration tests for starting a pipeline.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "coordinate_after_start", + [True, False], + ) + async def test_start_pipeline_successful( + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, coordinate_after_start + ): + """Test successful pipeline start from CREATED state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=coordinate_after_start): + await manager.start_pipeline(coordinate=coordinate_after_start) + + # Commit the session to persist changes + session.commit() + + # Verify pipeline status is now RUNNING + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Verify the initial job was queued if we are coordinating after start + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + jobs = await arq_redis.queued_jobs() + + if coordinate_after_start: + assert job.status == JobStatus.QUEUED + assert jobs[0].function == sample_job_run.job_function + else: + assert job.status == JobStatus.PENDING + assert len(jobs) == 0 + + @pytest.mark.asyncio + async def test_start_pipeline_no_jobs(self, session, arq_redis, with_populated_job_data, sample_empty_pipeline): + """Test pipeline start when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with TransactionSpy.spy(session, expect_flush=True): + await manager.start_pipeline(coordinate=True) + + # Commit the session to persist changes + session.commit() + + # Verify pipeline status is now SUCCEEDED since there are no jobs + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_empty_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + # Verify no jobs were enqueued in Redis + jobs = await arq_redis.queued_jobs() + assert len(jobs) == 0 + + +@pytest.mark.unit +class TestCoordinatePipelineUnit: + """Unit tests for pipeline coordination logic.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + CANCELLED_PIPELINE_STATUSES, + ) + async def test_coordinate_pipeline_cancels_remaining_jobs_status_transitions_to_cancellable( + self, + mock_pipeline_manager, + new_status, + ): + """Test that remaining jobs are cancelled if pipeline transitions to a cancelable status.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + mock_transition.assert_called_once() + mock_cancel.assert_called_once_with(reason="Pipeline failed or cancelled") + mock_enqueue.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + RUNNING_PIPELINE_STATUSES, + ) + async def test_coordinate_pipeline_enqueues_jobs_when_status_transitions_to_running( + self, mock_pipeline_manager, new_status + ): + """Test coordination after successful job completion.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + assert mock_transition.call_count == 2 # Called once before and once after enqueuing jobs + mock_cancel.assert_not_called() + mock_enqueue.assert_called_once() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in CANCELLED_PIPELINE_STATUSES + RUNNING_PIPELINE_STATUSES + ], + ) + async def test_coordinate_pipeline_noop_for_other_status_transitions(self, mock_pipeline_manager, new_status): + """Test coordination no-op for non-cancelled/running status transitions.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + mock_transition.assert_called_once() + mock_cancel.assert_not_called() + mock_enqueue.assert_not_called() + + +@pytest.mark.integration +class TestCoordinatePipelineIntegration: + """Test pipeline coordination after job completion.""" + + @pytest.mark.asyncio + async def test_coordinate_pipeline_transitions_pipeline_to_failed_after_job_failure( + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing after job completion.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job in the pipeline to a terminal status + sample_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued but that jobs were cancelled + mock_cancel.assert_called_once() + mock_enqueue.assert_not_called() + + # Verify that the pipeline status is now FAILED + assert manager.get_pipeline().status == PipelineStatus.FAILED + + # Verify that the failed job remains failed + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Verify that the pending job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_coordinate_pipeline_transitions_pipeline_to_cancelled_after_pipeline_is_cancelled( + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing after pipeline cancellation .""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a cancelled status + manager.set_pipeline_status(PipelineStatus.CANCELLED) + session.commit() + + # Set the job in the pipeline to a running status + sample_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued but that jobs were cancelled + mock_cancel.assert_called_once() + mock_enqueue.assert_not_called() + + # Verify that the pipeline status is now CANCELLED + assert manager.get_pipeline().status == PipelineStatus.CANCELLED + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_coordinate_running_pipeline_enqueues_ready_jobs( + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing when jobs are still pending.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a running status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were cancelled but that jobs were enqueued + mock_cancel.assert_not_called() + mock_enqueue.assert_called_once() + + # Verify that the non-dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending (since its dependency is not yet complete) + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "initial_status", + [PipelineStatus.CREATED, PipelineStatus.PAUSED, PipelineStatus.SUCCEEDED, PipelineStatus.PARTIAL], + ) + async def test_coordinate_pipeline_noop( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + initial_status, + ): + """Test successful pipeline coordination and job enqueuing when jobs are still pending.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a cancelled status + manager.set_pipeline_status(initial_status) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued or cancelled + mock_cancel.assert_not_called() + mock_enqueue.assert_not_called() + + # Verify that the job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + # Verify that the dependent job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_coordinate_pipeline_leaf_failure_keeps_pipeline_running_and_enqueues_siblings( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """A FAILED leaf job does not cancel siblings — the pipeline stays RUNNING and enqueues ready jobs. + + Setup: + - sample_job_run (id=1): SUCCEEDED — non-leaf (sample_dependent_job_run depends on it) + - sample_dependent_job_run (id=2): FAILED — leaf (nothing depends on it) + - sibling_job (id=10): PENDING leaf, should be enqueued after coordination + """ + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + manager.set_pipeline_status(PipelineStatus.RUNNING) + + sibling_job = JobRun( + id=10, + urn="test:job:10", + job_type="sibling_job", + job_function="sibling_function", + status=JobStatus.PENDING, + pipeline_id=sample_pipeline.id, + ) + session.add(sibling_job) + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + ): + await manager.coordinate_pipeline() + + mock_cancel.assert_not_called() + assert manager.get_pipeline().status == PipelineStatus.RUNNING + sibling = session.execute(select(JobRun).where(JobRun.id == sibling_job.id)).scalar_one() + assert sibling.status == JobStatus.QUEUED + + @pytest.mark.asyncio + async def test_coordinate_pipeline_leaf_failure_terminal_state_is_partial( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """When all jobs are terminal with only leaf failures, the pipeline ends PARTIAL. + + Setup: + - sample_job_run (id=1): SUCCEEDED — non-leaf + - sample_dependent_job_run (id=2): FAILED — leaf + No pending/running jobs remain, so the pipeline must settle into a terminal state. + """ + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + manager.set_pipeline_status(PipelineStatus.RUNNING) + + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + ): + await manager.coordinate_pipeline() + + mock_cancel.assert_not_called() + assert manager.get_pipeline().status == PipelineStatus.PARTIAL + + @pytest.mark.asyncio + async def test_coordinate_pipeline_errored_leaf_job_fails_pipeline( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """An ERRORED leaf job always fails the pipeline and cancels remaining jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + manager.set_pipeline_status(PipelineStatus.RUNNING) + + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.ERRORED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + ): + await manager.coordinate_pipeline() + + mock_cancel.assert_called_once() + assert manager.get_pipeline().status == PipelineStatus.FAILED + + +@pytest.mark.unit +class TestTransitionPipelineStatusUnit: + """Test pipeline status transition logic.""" + + @pytest.mark.parametrize( + "existing_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_terminal_state_results_in_retention_of_terminal_states( + self, mock_pipeline_manager, existing_status, mock_pipeline + ): + """No jobs in pipeline should result in no status change, so long as the pipeline is in a terminal state.""" + mock_pipeline.status = existing_status + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result is existing_status + + mock_set_status.assert_not_called() + + def test_paused_state_results_in_retention_of_paused_state(self, mock_pipeline_manager, mock_pipeline): + """No jobs in pipeline should result in no status change when pipeline is paused.""" + mock_pipeline.status = PipelineStatus.PAUSED + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result is PipelineStatus.PAUSED + + mock_set_status.assert_not_called() + + @pytest.mark.parametrize( + "existing_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in TERMINAL_PIPELINE_STATUSES + [PipelineStatus.PAUSED] + ], + ) + def test_no_jobs_results_in_succeeded_state_if_not_terminal( + self, mock_pipeline_manager, existing_status, mock_pipeline + ): + """No jobs in pipeline should result in SUCCEEDED state if not already terminal.""" + mock_pipeline.status = existing_status + mock_pipeline.finished_at = None + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == PipelineStatus.SUCCEEDED + + mock_set_status.assert_called_once_with(PipelineStatus.SUCCEEDED) + + @pytest.mark.parametrize( + "job_counts,expected_status", + [ + # Non-leaf FAILED job always fails the pipeline (is_leaf_job=False below) + ({JobStatus.SUCCEEDED: 10, JobStatus.FAILED: 1}, PipelineStatus.FAILED), + # ERRORED job always fails the pipeline regardless of topology + ({JobStatus.SUCCEEDED: 10, JobStatus.ERRORED: 1}, PipelineStatus.FAILED), + ({JobStatus.ERRORED: 1}, PipelineStatus.FAILED), + # Running or queued jobs without failures keep pipeline running + ({JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 0, JobStatus.RUNNING: 2}, PipelineStatus.RUNNING), + ({JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 0, JobStatus.QUEUED: 3}, PipelineStatus.RUNNING), + # All succeeded + ({JobStatus.SUCCEEDED: 5}, PipelineStatus.SUCCEEDED), + # Mix of terminal states without failures + ({JobStatus.SUCCEEDED: 3, JobStatus.SKIPPED: 2}, PipelineStatus.PARTIAL), + ({JobStatus.SUCCEEDED: 1, JobStatus.CANCELLED: 1}, PipelineStatus.PARTIAL), + # All cancelled + ({JobStatus.CANCELLED: 5}, PipelineStatus.CANCELLED), + # All skipped + ({JobStatus.SKIPPED: 4}, PipelineStatus.CANCELLED), + # Some cancelled and skipped + ({JobStatus.CANCELLED: 2, JobStatus.SKIPPED: 3}, PipelineStatus.CANCELLED), + # Inconsistent state + ({JobStatus.CANCELLED: 2, JobStatus.SKIPPED: 1, JobStatus.SUCCEEDED: 1, None: 3}, PipelineStatus.PARTIAL), + ], + ) + def test_pipeline_status_determination_based_on_job_counts( + self, mock_pipeline_manager, job_counts, expected_status, mock_pipeline + ): + """Test pipeline status determination based on job counts. + + For FAILED cases, is_leaf_job is patched to return False (non-leaf), + so the pipeline always transitions to FAILED on job failure. + Leaf-failure topology is covered in TestLeafJobFailureUnit. + """ + mock_pipeline.status = PipelineStatus.CREATED + mock_pipeline.finished_at = None + mock_failed_job = Mock(spec=JobRun, id=1) + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value=job_counts), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=False), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == expected_status + + mock_set_status.assert_called_once_with(expected_status) + + @pytest.mark.parametrize( + "job_counts,existing_status", + [ + ({JobStatus.PENDING: 5}, PipelineStatus.CREATED), + ({JobStatus.SUCCEEDED: 5, JobStatus.PENDING: 3}, PipelineStatus.RUNNING), + ({JobStatus.PENDING: 2, JobStatus.SKIPPED: 4}, PipelineStatus.RUNNING), + ({JobStatus.PENDING: 1, JobStatus.CANCELLED: 1}, PipelineStatus.RUNNING), + ], + ) + def test_pipeline_status_determination_pending_jobs_do_not_change_status( + self, mock_pipeline_manager, job_counts, existing_status, mock_pipeline + ): + """Test that presence of pending jobs does not change pipeline status.""" + mock_pipeline.status = existing_status + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value=job_counts, + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == existing_status + + mock_set_status.assert_not_called() + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_pipeline_status_determination_throws_state_error_for_handled_exceptions( + self, mock_pipeline_manager, exception + ): + """Test that handled exceptions during status determination raise PipelineStateError.""" + + # Mocks exception in first try/except + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value=Mock(side_effect=exception), + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + pytest.raises(PipelineStateError), + ): + mock_pipeline_manager.transition_pipeline_status() + mock_set_status.assert_not_called() + + # Mocks exception in second try/except + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 5}, + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", side_effect=exception) as mock_set_status, + patch.object( + mock_pipeline_manager, "get_pipeline", return_value=Mock(spec=Pipeline, status=PipelineStatus.CREATED) + ), + pytest.raises(PipelineStateError), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.transition_pipeline_status() + + def test_pipeline_status_determination_no_change(self, mock_pipeline_manager, mock_pipeline): + """Test that no status change occurs if pipeline status remains the same.""" + mock_pipeline.status = PipelineStatus.SUCCEEDED + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={JobStatus.SUCCEEDED: 5}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == PipelineStatus.SUCCEEDED + + mock_set_status.assert_not_called() + + def test_leaf_failed_job_keeps_pipeline_running_when_siblings_active(self, mock_pipeline_manager, mock_pipeline): + """A FAILED leaf job keeps the pipeline RUNNING if active sibling jobs remain.""" + mock_pipeline.status = PipelineStatus.RUNNING + mock_failed_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.FAILED: 1, JobStatus.RUNNING: 2}, + ), + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=True), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + + assert result == PipelineStatus.RUNNING + mock_set_status.assert_not_called() + + def test_leaf_failed_job_yields_partial_when_all_jobs_terminal(self, mock_pipeline_manager, mock_pipeline): + """A FAILED leaf job with no remaining active jobs yields PARTIAL pipeline status.""" + mock_pipeline.status = PipelineStatus.RUNNING + mock_failed_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 1}, + ), + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=True), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + + assert result == PipelineStatus.PARTIAL + mock_set_status.assert_called_once_with(PipelineStatus.PARTIAL) + + def test_non_leaf_failed_job_always_fails_pipeline(self, mock_pipeline_manager, mock_pipeline): + """A FAILED non-leaf job always transitions the pipeline to FAILED.""" + mock_pipeline.status = PipelineStatus.RUNNING + mock_failed_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 3, JobStatus.FAILED: 1}, + ), + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=False), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + + assert result == PipelineStatus.FAILED + mock_set_status.assert_called_once_with(PipelineStatus.FAILED) + + def test_errored_job_always_fails_pipeline_regardless_of_topology(self, mock_pipeline_manager, mock_pipeline): + """An ERRORED job always transitions the pipeline to FAILED, never checked for leaf status.""" + mock_pipeline.status = PipelineStatus.RUNNING + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 5, JobStatus.ERRORED: 1}, + ), + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[]) as mock_get_failed, + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=True) as mock_is_leaf, + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + + assert result == PipelineStatus.FAILED + mock_set_status.assert_called_once_with(PipelineStatus.FAILED) + mock_get_failed.assert_not_called() + mock_is_leaf.assert_not_called() + + +class TestTransitionPipelineStatusIntegration: + """Integration tests for pipeline status transition logic.""" + + @pytest.mark.parametrize( + "initial_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_pipeline_status_transition_noop_when_status_is_terminal( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + initial_status, + ): + """Test that pipeline status remains unchanged when already in a terminal state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert new_status == initial_status + assert manager.get_pipeline_status() == initial_status + + def test_pipeline_status_transition_noop_when_status_is_paused( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + ): + """Test that pipeline status remains unchanged when in PAUSED state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status to PAUSED + manager.set_pipeline_status(PipelineStatus.PAUSED) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert new_status == PipelineStatus.PAUSED + assert manager.get_pipeline_status() == PipelineStatus.PAUSED + + @pytest.mark.parametrize( + "initial_status,expected_status", + [ + ( + status, + status if status in TERMINAL_PIPELINE_STATUSES + [PipelineStatus.PAUSED] else PipelineStatus.SUCCEEDED, + ) + for status in PipelineStatus._member_map_.values() + ], + ) + def test_pipeline_status_transition_when_no_jobs_in_pipeline( + self, + session, + arq_redis, + with_populated_job_data, + initial_status, + expected_status, + sample_empty_pipeline, + ): + """Test that pipeline status transitions to SUCCEEDED when there are no jobs in a + non-terminal pipeline. If the pipeline is already in a terminal state, it should remain unchanged.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is the expected status and that + # the status was persisted to the transaction + assert new_status == expected_status + assert manager.get_pipeline_status() == expected_status + + @pytest.mark.parametrize( + "initial_status,job_updates,expected_status", + [ + # Non-leaf job (id=1) FAILED -> pipeline FAILED + (PipelineStatus.CREATED, {1: JobStatus.FAILED, 2: JobStatus.PENDING}, PipelineStatus.FAILED), + # Leaf job (id=2) FAILED, non-leaf SUCCEEDED -> PARTIAL (leaf failure does not fail the pipeline) + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.FAILED}, PipelineStatus.PARTIAL), + # Some running -> running + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.RUNNING}, PipelineStatus.RUNNING), + # Some queued -> running + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.QUEUED}, PipelineStatus.RUNNING), + # Some pending => no change (handled separately via a second call to transition after enqueuing jobs) + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.PENDING}, PipelineStatus.CREATED), + (PipelineStatus.RUNNING, {1: JobStatus.SUCCEEDED, 2: JobStatus.PENDING}, PipelineStatus.RUNNING), + # All succeeded -> succeeded + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.SUCCEEDED}, PipelineStatus.SUCCEEDED), + # All cancelled -> cancelled + (PipelineStatus.RUNNING, {1: JobStatus.CANCELLED, 2: JobStatus.CANCELLED}, PipelineStatus.CANCELLED), + # Mix of succeeded and skipped -> partial + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.SKIPPED}, PipelineStatus.PARTIAL), + # Mix of succeeded and cancelled -> partial + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.CANCELLED}, PipelineStatus.PARTIAL), + # Mix of cancelled and skipped -> cancelled + (PipelineStatus.CREATED, {1: JobStatus.CANCELLED, 2: JobStatus.SKIPPED}, PipelineStatus.CANCELLED), + ], + ) + def test_pipeline_status_transitions( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + initial_status, + job_updates, + expected_status, + ): + """Test pipeline status transitions based on job status updates.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + # Update job statuses as per test case + for job_run in sample_pipeline.job_runs: + if job_run.id in job_updates: + job_run.status = job_updates[job_run.id] + session.commit() + + # Perform status transition and verify return state + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + assert new_status == expected_status + session.commit() + + # Verify expected pipeline status is persisted + pipeline = manager.get_pipeline() + assert pipeline.status == expected_status + + +@pytest.mark.unit +class TestEnqueueReadyJobsUnit: + """Test enqueuing of ready jobs (both independent and dependent).""" + + @pytest.mark.parametrize( + "pipeline_status", + [status for status in PipelineStatus._member_map_.values() if status not in RUNNING_PIPELINE_STATUSES], + ) + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_raises_if_pipeline_not_running(self, mock_pipeline_manager, pipeline_status): + """Test that job enqueuing raises a state error if pipeline is not in RUNNING status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises(PipelineStateError, match="cannot enqueue jobs"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_skips_if_no_jobs(self, mock_pipeline_manager): + """Test that job enqueuing skips if there are no pending jobs.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, + "get_pending_jobs", + return_value=[], + ), + TransactionSpy.spy(mock_pipeline_manager.db, expect_commit=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + # Should complete without error + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "should_skip", + [False, True], + ) + async def test_enqueue_ready_jobs_checks_if_jobs_are_reachable_if_cant_enqueue( + self, mock_pipeline_manager, mock_job_manager, should_skip + ): + """Test that job enqueuing skips jobs which are unreachable if any exist.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] + ), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=False), + patch.object( + mock_pipeline_manager, "should_skip_job_due_to_dependencies", return_value=(should_skip, "Reason") + ) as mock_should_skip, + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + TransactionSpy.spy(mock_pipeline_manager.db, expect_commit=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_should_skip.assert_called_once() + mock_skip_job.assert_called_once() if should_skip else mock_skip_job.assert_not_called() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_raises_if_arq_enqueue_fails(self, mock_pipeline_manager, mock_job_manager): + """Test that job enqueuing raises an error if ARQ enqueue fails.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, + "get_pending_jobs", + return_value=[Mock(spec=JobRun, id=1, urn="test:job:1", retry_count=0)], + ), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), + patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, + patch.object( + mock_pipeline_manager, "_enqueue_in_arq", side_effect=PipelineCoordinationError("ARQ enqueue failed") + ), + pytest.raises(PipelineCoordinationError, match="ARQ enqueue failed"), + TransactionSpy.spy(mock_pipeline_manager.db, expect_commit=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_prepare_queue.assert_called_once() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "retry_count, expected_is_retry", + [ + (0, False), + (1, True), + (3, True), + ], + ) + async def test_enqueue_ready_jobs_successful_enqueue( + self, mock_pipeline_manager, mock_job_manager, retry_count, expected_is_retry + ): + """Test successful job enqueuing, passing is_retry based on retry_count.""" + mock_job = Mock(spec=JobRun, id=1, urn="test:job:1", retry_count=retry_count) + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object(mock_pipeline_manager, "get_pending_jobs", return_value=[mock_job]), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), + patch.object(mock_pipeline_manager, "_enqueue_in_arq", return_value=None) as mock_enqueue, + patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_commit=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_prepare_queue.assert_called_once() + mock_enqueue.assert_called_once_with(mock_job, is_retry=expected_is_retry) + + +@pytest.mark.integration +class TestEnqueueReadyJobsIntegration: + """Integration tests for enqueuing of ready jobs.""" + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful enqueuing of ready jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + await manager.enqueue_ready_jobs() + + # Verify that the independent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending (since its dependency is not yet complete) + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + # Verify the queued ARQ job exists and is the job we expect + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 1 + assert arq_job[0].function == sample_job_run.job_function + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_integration_with_unreachable_job( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + sample_job_dependency, + ): + """Test enqueuing of ready jobs skips unreachable jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Make the dependent job unreachable by setting the sample_job to cancelled. + sample_job_run.status = JobStatus.CANCELLED + session.commit() + + with TransactionSpy.spy(session, expect_commit=True, expect_flush=True): + await manager.enqueue_ready_jobs() + + # Verify that the dependent job is marked as skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + # Verify nothing was enqueued for the dependent job + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 0 + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_with_empty_pipeline( + self, session, arq_redis, with_populated_job_data, sample_empty_pipeline + ): + """Test enqueuing of ready jobs in an empty pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with TransactionSpy.spy(session, expect_commit=True): + await manager.enqueue_ready_jobs() + + # Verify nothing was enqueued + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 0 + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_bubbles_pipeline_coordination_error_for_any_exception_during_enqueue( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test that any exception during job enqueuing raises PipelineCoordinationError.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + patch.object( + manager.redis, + "enqueue_job", + side_effect=Exception("Unexpected error during enqueue"), + ), + pytest.raises(PipelineCoordinationError, match="Failed to enqueue job in ARQ"), + ): + await manager.enqueue_ready_jobs() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_passes_is_retry_true_for_retried_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test that enqueue_ready_jobs passes is_retry=True when retry_count > 0.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status and simulate a retried job + manager.set_pipeline_status(PipelineStatus.RUNNING) + sample_job_run.retry_count = 1 + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + patch.object(manager, "_enqueue_in_arq", wraps=manager._enqueue_in_arq) as mock_enqueue, + ): + await manager.enqueue_ready_jobs() + + mock_enqueue.assert_called_once_with(sample_job_run, is_retry=True) + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_passes_is_retry_false_for_first_attempt( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test that enqueue_ready_jobs passes is_retry=False when retry_count == 0.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status; retry_count defaults to 0 + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + patch.object(manager, "_enqueue_in_arq", wraps=manager._enqueue_in_arq) as mock_enqueue, + ): + await manager.enqueue_ready_jobs() + + mock_enqueue.assert_called_once_with(sample_job_run, is_retry=False) + + +@pytest.mark.unit +class TestCancelRemainingJobsUnit: + """Test cancellation of remaining jobs.""" + + def test_cancel_remaining_jobs_no_active_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test job cancellation when there are no active jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[], + ), + patch.object(mock_job_manager, "cancel_job", return_value=None) as mock_cancel_job, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_cancel_job.assert_not_called() + + @pytest.mark.parametrize( + "job_status", + [JobStatus.QUEUED, JobStatus.RUNNING], + ) + def test_cancel_remaining_jobs_cancels_queued_and_running_jobs( + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status + ): + """Test successful cancellation of remaining jobs.""" + mock_job_run.status = job_status + cancellation_result = JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={"reason": "Pipeline cancelled"}, + error="Pipeline cancelled", + exception=None, + ) + + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[mock_job_run], + ), + patch.object(mock_job_manager, "cancel_job", return_value=None) as mock_cancel_job, + patch( + "mavedb.worker.lib.managers.pipeline_manager.construct_bulk_cancellation_result", + return_value=cancellation_result, + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_cancel_job.assert_called_once_with(result=cancellation_result) + + @pytest.mark.parametrize( + "job_status", + [JobStatus.PENDING], + ) + def test_cancel_remaining_jobs_skips_pending_jobs( + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status + ): + """Test successful cancellation of remaining jobs.""" + mock_job_run.status = job_status + cancellation_result = JobExecutionOutcome.skipped(data={"reason": "Pipeline cancelled"}) + + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[mock_job_run], + ), + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + patch( + "mavedb.worker.lib.managers.pipeline_manager.construct_bulk_cancellation_result", + return_value=cancellation_result, + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_skip_job.assert_called_once_with(result=cancellation_result) + + +@pytest.mark.integration +class TestCancelRemainingJobsIntegration: + """Integration tests for cancellation of remaining jobs.""" + + def test_cancel_remaining_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful cancellation of remaining jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + manager.cancel_remaining_jobs() + + # Commit the transaction + session.commit() + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + def test_cancel_remaining_jobs_integration_no_active_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_empty_pipeline, + ): + """Test cancellation of remaining jobs when there are no active jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + manager.cancel_remaining_jobs() + + # Commit the transaction + session.commit() + + # Should complete without error + + +@pytest.mark.unit +class TestCancelPipelineUnit: + """Test cancellation of pipelines.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + async def test_cancel_pipeline_raises_transition_error_if_already_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test that pipeline cancellation raises an error if already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in terminal state", + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.cancel_pipeline(reason="Testing cancellation") + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + [status for status in PipelineStatus._member_map_.values() if status not in TERMINAL_PIPELINE_STATUSES], + ) + async def test_cancel_pipeline_successful_cancellation_if_not_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test successful pipeline cancellation if not already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.cancel_pipeline(reason="Testing cancellation") + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.CANCELLED) + + +@pytest.mark.integration +class TestCancelPipelineIntegration: + """Integration tests for cancellation of pipelines.""" + + @pytest.mark.asyncio + async def test_cancel_pipeline_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful cancellation of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.cancel_pipeline(reason="Testing cancellation") + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in CANCELLED status + assert manager.get_pipeline_status() == PipelineStatus.CANCELLED + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_cancel_pipeline_integration_already_terminal( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test that cancelling a pipeline already in terminal status raises an error.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to SUCCEEDED status + manager.set_pipeline_status(PipelineStatus.SUCCEEDED) + session.commit() + + # Set the job status to something that would normally be cancellable + sample_job_run.status = JobStatus.PENDING + session.commit() + + with ( + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {manager.pipeline_id} is in terminal state", + ), + TransactionSpy.spy(session), + ): + await manager.cancel_pipeline(reason="Testing cancellation") + + # Commit the transaction + session.commit() + + # Verify the pipeline status remains SUCCEEDED + assert manager.get_pipeline_status() == PipelineStatus.SUCCEEDED + + # Verify that the job status remains unchanged + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestPausePipelineUnit: + """Test pausing of pipelines.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + async def test_pause_pipeline_raises_transition_error_if_already_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test that pipeline pausing raises an error if already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in terminal state", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.pause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_pause_pipeline_raises_transition_error_if_already_paused(self, mock_pipeline_manager): + """Test that pipeline pausing raises an error if already paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.PAUSED), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is already paused", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.pause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in TERMINAL_PIPELINE_STATUSES and status != PipelineStatus.PAUSED + ], + ) + async def test_pause_pipeline_successful_pausing_if_not_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test successful pipeline pausing if not already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.pause_pipeline() + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.PAUSED) + + +@pytest.mark.integration +class TestPausePipelineIntegration: + """Integration tests for pausing of pipelines.""" + + @pytest.mark.asyncio + async def test_pause_pipeline_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + ): + """Test successful pausing of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.pause_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in PAUSED status + assert manager.get_pipeline_status() == PipelineStatus.PAUSED + + # Verify that all jobs remain in their original statuses + # (coordinate_pipeline is called by pause_pipeline but should not change job statuses + # while paused). + for job_run in sample_pipeline.job_runs: + assert job_run.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestUnpausePipelineUnit: + """Test unpausing of pipelines.""" + + @pytest.mark.asyncio + async def test_unpause_pipeline_raises_transition_error_if_not_paused(self, mock_pipeline_manager): + """Test that pipeline unpausing raises an error if not currently paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is not paused", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.unpause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_unpause_pipeline_successful_unpausing_if_currently_paused(self, mock_pipeline_manager): + """Test successful pipeline unpausing if currently paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.PAUSED), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.unpause_pipeline() + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + + +@pytest.mark.integration +class TestUnpausePipelineIntegration: + """Integration tests for unpausing of pipelines.""" + + @pytest.mark.asyncio + async def test_unpause_pipeline_integration( + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful unpausing of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to PAUSED status + manager.set_pipeline_status(PipelineStatus.PAUSED) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + ): + await manager.unpause_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the non-dependent job was queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + +@pytest.mark.unit +class TestRestartPipelineUnit: + """Test restarting of pipelines.""" + + @pytest.mark.asyncio + async def test_restart_pipeline_skips_if_no_jobs_in_pipeline(self, mock_pipeline_manager): + """Test that pipeline restart skips if there are no jobs in the pipeline.""" + with ( + patch.object( + mock_pipeline_manager, + "get_all_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.restart_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_restart_pipeline_successful_restart(self, mock_pipeline_manager, mock_job_manager): + """Test successful pipeline restart.""" + with ( + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object( + mock_pipeline_manager, + "get_all_jobs", + return_value=[Mock(spec=JobRun, id=1), Mock(spec=JobRun, id=2)], + ), + patch.object( + mock_job_manager, + "reset_job", + return_value=None, + ) as mock_reset_job, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.restart_pipeline() + + assert mock_reset_job.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.CREATED) + mock_start_pipeline.assert_called_once() + + +@pytest.mark.integration +class TestRestartPipelineIntegration: + """Integration tests for restarting of pipelines.""" + + @pytest.mark.asyncio + async def test_restart_pipeline_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful restarting of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job statuses to terminal states + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + ): + await manager.restart_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the non-dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is now pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_restart_pipeline_integration_skips_if_no_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_empty_pipeline, + ): + """Test that restarting a pipeline with no jobs skips without error.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to a terminal status + manager.set_pipeline_status(PipelineStatus.SUCCEEDED) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.restart_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert manager.get_pipeline_status() == PipelineStatus.SUCCEEDED + + +@pytest.mark.unit +class TestCanEnqueueJobUnit: + """Test job dependency checking.""" + + def test_can_enqueue_job_with_no_dependencies(self, mock_pipeline_manager): + """Test that a job with no dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[], + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + assert result is True + + def test_cannot_enqueue_job_with_unmet_dependencies(self, mock_pipeline_manager): + """Test that a job with unmet dependencies cannot be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.PENDING) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", return_value=False + ) as mock_job_dependency_is_met, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + mock_job_dependency_is_met.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.PENDING + ) + assert result is False + + def test_can_enqueue_job_with_met_dependencies(self, mock_pipeline_manager): + """Test that a job with met dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", return_value=True + ) as mock_job_dependency_is_met, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + mock_job_dependency_is_met.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.SUCCEEDED + ) + assert result is True + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_can_enqueue_job_raises_pipeline_state_error_on_handled_exceptions(self, mock_pipeline_manager, exception): + """Test that handled exceptions during dependency checking raise PipelineStateError.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch("mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", side_effect=exception), + pytest.raises(PipelineStateError, match="Corrupted dependency data"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.can_enqueue_job(mock_job) + + +@pytest.mark.integration +class TestCanEnqueueJobIntegration: + """Integration tests for job dependency checking.""" + + def test_can_enqueue_job_integration_with_no_dependencies( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test that a job with no dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_job_run) + + assert result is True + + def test_can_enqueue_job_integration_with_unmet_dependencies( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_dependent_job_run, + ): + """Test that a job with unmet dependencies cannot be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_dependent_job_run) + + assert result is False + + def test_can_enqueue_job_integration_with_met_dependencies( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with met dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the dependency job to a succeeded status + sample_job_run.status = JobStatus.SUCCEEDED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_dependent_job_run) + + assert result is True + + +@pytest.mark.unit +class TestShouldSkipJobDueToDependenciesUnit: + """Test job skipping due to unmet dependencies.""" + + def test_should_not_skip_job_with_no_dependencies(self, mock_pipeline_manager): + """Test that a job with no dependencies should not be skipped.""" + mock_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(False, ""), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + mock_job_should_be_skipped.assert_not_called() + assert should_skip is False + assert reason == "" + + def test_should_skip_job_with_unreachable_dependency(self, mock_pipeline_manager): + """Test that a job with unreachable dependencies should be skipped.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.FAILED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.SUCCESS_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(True, "Unfulfillable dependency detected"), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + mock_job_should_be_skipped.assert_called_once_with( + dependency_type=DependencyType.SUCCESS_REQUIRED, dependent_job_status=JobStatus.FAILED + ) + assert should_skip is True + assert reason == "Unfulfillable dependency detected" + + def test_should_not_skip_job_with_reachable(self, mock_pipeline_manager): + """Test that a job with met dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(False, ""), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + mock_job_should_be_skipped.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.SUCCEEDED + ) + assert should_skip is False + assert reason == "" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_should_skip_job_due_to_dependencies_raises_pipeline_state_error_on_handled_exceptions( + self, mock_pipeline_manager, exception + ): + """Test that handled exceptions during dependency checking raise PipelineStateError.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + side_effect=exception, + ), + pytest.raises(PipelineStateError, match="Corrupted dependency data"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + +@pytest.mark.integration +class TestShouldSkipJobDueToDependenciesIntegration: + """Integration tests for job skipping due to unmet dependencies.""" + + def test_should_not_skip_job_with_no_dependencies( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test that a job with no dependencies should not be skipped.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_job_run) + + assert should_skip is False + assert reason == "" + + def test_should_skip_job_with_unreachable_dependency( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with unreachable dependencies should be skipped.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job the dependency depends on to a failed status + sample_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_dependent_job_run) + + assert should_skip is True + assert reason == "Dependency did not succeed (failed)" + + def test_should_not_skip_job_with_reachable_dependency( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with met dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the dependency job to a succeeded status + sample_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_dependent_job_run) + + assert should_skip is False + assert reason == "" + + +@pytest.mark.unit +class TestRetryFailedJobsUnit: + """Test retrying of failed jobs.""" + + @pytest.mark.asyncio + async def test_retry_failed_jobs_no_failed_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying failed jobs skips if there are no failed jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_failed_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.retry_failed_jobs() + + mock_prepare_retry.assert_not_called() + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_retry_failed_jobs_successful_retry(self, mock_pipeline_manager, mock_job_manager): + """Test successful retrying of failed jobs.""" + mock_failed_job1 = Mock(spec=JobRun, id=1) + mock_failed_job2 = Mock(spec=JobRun, id=2) + + with ( + patch.object( + mock_pipeline_manager, + "get_failed_jobs", + return_value=[mock_failed_job1, mock_failed_job2], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object( + mock_job_manager, + "prepare_retry", + return_value=None, + ) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.retry_failed_jobs() + + assert mock_prepare_retry.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + mock_coordinate.assert_called_once() + + +@pytest.mark.integration +class TestRetryFailedJobsIntegration: + """Integration tests for retrying of failed jobs.""" + + @pytest.mark.asyncio + async def test_retry_failed_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of failed jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + ): + await manager.retry_failed_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_retry_failed_jobs_integration_no_failed_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_empty_pipeline, + ): + """Test that retrying failed jobs skips if there are no failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.retry_failed_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is not changed + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + +@pytest.mark.unit +class TestRetryUnsuccessfulJobsUnit: + """Test retrying of unsuccessful jobs.""" + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_no_unsuccessful_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying unsuccessful jobs skips if there are no unsuccessful jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_unsuccessful_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.retry_unsuccessful_jobs() + + mock_prepare_retry.assert_not_called() + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_retry_failed_jobs_successful_retry(self, mock_pipeline_manager, mock_job_manager): + """Test successful retrying of failed jobs.""" + mock_failed_job1 = Mock(spec=JobRun, id=1) + mock_failed_job2 = Mock(spec=JobRun, id=2) + + with ( + patch.object( + mock_pipeline_manager, + "get_unsuccessful_jobs", + return_value=[mock_failed_job1, mock_failed_job2], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object( + mock_job_manager, + "prepare_retry", + return_value=None, + ) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.retry_unsuccessful_jobs() + + assert mock_prepare_retry.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + mock_coordinate.assert_called_once() + + +@pytest.mark.integration +class TestRetryUnsuccessfulJobsIntegration: + """Integration tests for retrying of unsuccessful jobs.""" + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of unsuccessful jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.CANCELLED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + ): + await manager.retry_unsuccessful_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the cancelled dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_integration_no_unsuccessful_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_empty_pipeline, + ): + """Test that retrying unsuccessful jobs skips if there are no unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.retry_unsuccessful_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is not changed + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + +@pytest.mark.unit +class TestRetryPipelineUnit: + """Test retrying of entire pipelines.""" + + @pytest.mark.asyncio + async def test_retry_pipeline_calls_retry_unsuccessful_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying a pipeline calls retrying unsuccessful jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "retry_unsuccessful_jobs", + return_value=None, + ) as mock_retry_unsuccessful_jobs, + TransactionSpy.spy(mock_pipeline_manager.db), # flush is handled in retry_unsuccessful_jobs, which we mock + ): + await mock_pipeline_manager.retry_pipeline() + + mock_retry_unsuccessful_jobs.assert_called_once() + + +@pytest.mark.integration +class TestRetryPipelineIntegration: + """Integration tests for retrying of entire pipelines.""" + + @pytest.mark.asyncio + async def test_retry_pipeline_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of an entire pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.CANCELLED + sample_dependent_job_run.status = JobStatus.SKIPPED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + ): + await manager.retry_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the cancelled dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestGetJobsByStatusUnit: + """Test job retrieval by status with mocked database.""" + + def test_get_jobs_by_status_wraps_sqlalchemy_error_with_database_error(self, mock_pipeline_manager): + """Test database error handling.""" + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get jobs with status"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_jobs_by_status([JobStatus.RUNNING]) + + +@pytest.mark.integration +class TestGetJobsByStatusIntegration: + """Integration tests for job retrieval by status.""" + + @pytest.mark.parametrize( + "status", + JobStatus._member_map_.values(), + ) + def test_get_jobs_by_status_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + status, + ): + """Test retrieval of jobs by status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = status + sample_dependent_job_run.status = [s for s in JobStatus if s != status][0] + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_jobs_by_status([status]) + + assert len(running_jobs) == 1 + assert running_jobs[0].id == sample_job_run.id + + def test_get_jobs_by_status_integration_no_matching_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + ): + """Test retrieval of jobs by status when no jobs match.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.SUCCEEDED]) + + assert len(jobs) == 0 + + def test_get_jobs_by_status_integration_multiple_matching_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of jobs by status when multiple jobs match.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set both job statuses to RUNNING + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + + assert len(running_jobs) == 2 + job_ids = {job.id for job in running_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_jobs_by_status_integration_no_jobs_in_pipeline( + self, + session, + arq_redis, + with_populated_job_data, + sample_empty_pipeline, + ): + """Test retrieval of jobs by status when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + + assert len(jobs) == 0 + + def test_get_jobs_by_status_multiple_statuses( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of jobs by multiple statuses.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.RUNNING, JobStatus.PENDING]) + + assert len(jobs) == 2 + job_ids = {job.id for job in jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + # Assert jobs are ordered by created by timestamp + assert jobs[0].created_at <= jobs[1].created_at + + +@pytest.mark.unit +class TestGetPendingJobsUnit: + """Test retrieval of pending jobs.""" + + def test_get_pending_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of pending jobs.""" + + with ( + patch.object( + mock_pipeline_manager, "get_jobs_by_status", return_value=[Mock(), Mock()] + ) as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + jobs = mock_pipeline_manager.get_pending_jobs() + + assert len(jobs) == 2 + mock_get_jobs_by_status.assert_called_once_with([JobStatus.PENDING]) + + +@pytest.mark.integration +class TestGetPendingJobsIntegration: + """Integration tests for retrieval of pending jobs.""" + + def test_get_pending_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of pending jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.PENDING + sample_dependent_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + pending_jobs = manager.get_pending_jobs() + + assert len(pending_jobs) == 1 + assert pending_jobs[0].id == sample_job_run.id + + def test_get_pending_jobs_integration_no_pending_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of pending jobs when there are no pending jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.SUCCEEDED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + pending_jobs = manager.get_pending_jobs() + + assert len(pending_jobs) == 0 + + +@pytest.mark.unit +class TestGetRunningJobsUnit: + """Test retrieval of running jobs.""" + + def test_get_running_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of running jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_running_jobs() + mock_get_jobs_by_status.assert_called_once_with([JobStatus.RUNNING]) + + +@pytest.mark.unit +class TestGetActiveJobsUnit: + """Test retrieval of active jobs.""" + + def test_get_active_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of active jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_active_jobs() + mock_get_jobs_by_status.assert_called_once_with(ACTIVE_JOB_STATUSES) + + +@pytest.mark.integration +class TestGetActiveJobsIntegration: + """Integration tests for retrieval of active jobs.""" + + def test_get_active_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of active jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + active_jobs = manager.get_active_jobs() + + assert len(active_jobs) == 2 + job_ids = {job.id for job in active_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_active_jobs_integration_no_active_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of active jobs when there are no active jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + active_jobs = manager.get_active_jobs() + + assert len(active_jobs) == 0 + + +@pytest.mark.integration +class TestGetRunningJobsIntegration: + """Integration tests for retrieval of running jobs.""" + + def test_get_running_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of running jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_running_jobs() + + assert len(running_jobs) == 1 + assert running_jobs[0].id == sample_job_run.id + + def test_get_running_jobs_integration_no_running_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of running jobs when there are no running jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_running_jobs() + + assert len(running_jobs) == 0 + + +@pytest.mark.unit +class TestGetFailedJobsUnit: + """Test retrieval of failed jobs.""" + + def test_get_failed_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of failed jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_failed_jobs() + + mock_get_jobs_by_status.assert_called_once_with([JobStatus.FAILED]) + + +@pytest.mark.integration +class TestGetFailedJobsIntegration: + """Integration tests for retrieval of failed jobs.""" + + def test_get_failed_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + failed_jobs = manager.get_failed_jobs() + + assert len(failed_jobs) == 1 + assert failed_jobs[0].id == sample_job_run.id + + def test_get_failed_jobs_integration_no_failed_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of failed jobs when there are no failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + failed_jobs = manager.get_failed_jobs() + + assert len(failed_jobs) == 0 + + +@pytest.mark.unit +class TestGetUnsuccessfulJobsUnit: + """Test retrieval of unsuccessful jobs.""" + + def test_get_unsuccessful_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of unsuccessful jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_unsuccessful_jobs() + mock_get_jobs_by_status.assert_called_once_with( + [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED, JobStatus.ERRORED] + ) + + +@pytest.mark.integration +class TestGetUnsuccessfulJobsIntegration: + """Integration tests for retrieval of unsuccessful jobs.""" + + def test_get_unsuccessful_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.CANCELLED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + unsuccessful_jobs = manager.get_unsuccessful_jobs() + + assert len(unsuccessful_jobs) == 2 + job_ids = {job.id for job in unsuccessful_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_unsuccessful_jobs_integration_no_unsuccessful_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of unsuccessful jobs when there are no unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + unsuccessful_jobs = manager.get_unsuccessful_jobs() + + assert len(unsuccessful_jobs) == 0 + + +@pytest.mark.unit +class TestGetAllJobsUnit: + """Test retrieval of all jobs.""" + + def test_get_all_jobs_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of all jobs.""" + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get all jobs"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_all_jobs() + + +@pytest.mark.integration +class TestGetAllJobsIntegration: + """Integration tests for retrieval of all jobs.""" + + def test_get_all_jobs_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of all jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 2 + job_ids = {job.id for job in all_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_all_jobs_integration_no_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_empty_pipeline, + ): + """Test retrieval of all jobs when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 0 + + def test_get_all_jobs_integration_multiple_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of all jobs when there are multiple jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Add an additional job to the pipeline + new_job = JobRun( + id=99, + urn="job:additional_job:999", + pipeline_id=sample_pipeline.id, + job_type="Additional Job", + job_function="additional_function", + status=JobStatus.PENDING, + ) + session.add(new_job) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 3 + job_ids = {job.id for job in all_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + assert new_job.id in job_ids + + # Assert jobs are ordered by created by timestamp + assert all_jobs[0].created_at <= all_jobs[1].created_at <= all_jobs[2].created_at + + +@pytest.mark.unit +class TestGetDependenciesForJobUnit: + """Test retrieval of job dependencies.""" + + def test_get_dependencies_for_job_wraps_sqlalchemy_error_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of job dependencies.""" + mock_job = Mock(spec=JobRun) + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get job dependencies for job"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_dependencies_for_job(mock_job) + + +@pytest.mark.integration +class TestGetDependenciesForJobIntegration: + """Integration tests for retrieval of job dependencies.""" + + def test_get_dependencies_for_job_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + sample_job_dependency, + ): + """Test retrieval of job dependencies.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_dependent_job_run) + + assert len(dependencies) == 1 + dependency, job = dependencies[0] + assert dependency.id == sample_job_dependency.id + assert job.id == sample_job_run.id + + def test_get_dependencies_for_job_integration_no_dependencies( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test retrieval of job dependencies when there are no dependencies.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_job_run) + + assert len(dependencies) == 0 + + def test_get_dependencies_for_job_integration_multiple_dependencies( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of job dependencies when there are multiple dependencies.""" + # Create additional job and dependency + additional_job = JobRun( + id=99, + urn="job:additional_job:999", + pipeline_id=sample_pipeline.id, + job_type="Additional Job", + job_function="additional_function", + status=JobStatus.PENDING, + ) + session.add(additional_job) + session.commit() + + additional_dependency = JobDependency( + id=sample_dependent_job_run.id, + depends_on_job_id=additional_job.id, + dependency_type=DependencyType.COMPLETION_REQUIRED, + ) + session.add(additional_dependency) + session.commit() + + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_dependent_job_run) + + assert len(dependencies) == 2 + fetched_dependency_ids = {dep.id for dep, job in dependencies} + implicit_dependency_ids = {dep.id for dep in sample_dependent_job_run.job_dependencies} + assert fetched_dependency_ids == implicit_dependency_ids + + +@pytest.mark.unit +class TestGetPipelineUnit: + """Test retrieval of pipeline.""" + + def test_get_pipeline_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline): + """Test database error handling during retrieval of pipeline.""" + + # Prepare mock PipelineManager with mocked DB session that will raise SQLAlchemyError on query. + # We don't use the default fixture here since it usually wraps this function. + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + manager = object.__new__(PipelineManager) + manager.db = mock_db + manager.redis = mock_redis + manager.pipeline_id = mock_pipeline.id + + with ( + patch.object(manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get pipeline"), + TransactionSpy.spy(manager.db), + ): + manager.get_pipeline() + + +@pytest.mark.integration +class TestGetPipelineIntegration: + """Integration tests for retrieval of pipeline.""" + + def test_get_pipeline_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + ): + """Test retrieval of pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + pipeline = manager.get_pipeline() + + assert pipeline.id == sample_pipeline.id + assert pipeline.name == sample_pipeline.name + + def test_get_pipeline_integration_nonexistent_pipeline( + self, + session, + arq_redis, + with_populated_job_data, + ): + """Test retrieval of a nonexistent pipeline raises PipelineNotFoundError.""" + with ( + pytest.raises(DatabaseConnectionError, match="Failed to get pipeline 9999"), + TransactionSpy.spy(session), + ): + # get_pipeline is called implicitly during PipelineManager initialization + PipelineManager(session, arq_redis, pipeline_id=9999) + + +@pytest.mark.unit +class TestGetJobCountsByStatusUnit: + """Test retrieval of job counts by status.""" + + def test_get_job_counts_by_status_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of job counts by status.""" + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get job counts for pipeline"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_job_counts_by_status() + + +@pytest.mark.integration +class TestGetJobCountsByStatusIntegration: + """Integration tests for retrieval of job counts by status.""" + + def test_get_job_counts_by_status_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of job counts by status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + counts = manager.get_job_counts_by_status() + + assert counts[JobStatus.RUNNING] == 1 + assert counts[JobStatus.PENDING] == 1 + assert counts.get(JobStatus.SUCCEEDED, 0) == 0 + + def test_get_job_counts_by_status_integration_no_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_empty_pipeline, + ): + """Test retrieval of job counts by status when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + counts = manager.get_job_counts_by_status() + + assert counts == {} + + +@pytest.mark.unit +class TestGetPipelineProgressUnit: + """Test retrieval of pipeline progress.""" + + pass + + +@pytest.mark.integration +class TestGetPipelineProgressIntegration: + """Integration tests for retrieval of pipeline progress.""" + + pass + + +@pytest.mark.unit +class TestGetPipelineStatusUnit: + """Test retrieval of pipeline status.""" + + def test_get_pipeline_status_success(self, mock_pipeline_manager): + """Test successful retrieval of pipeline status.""" + with ( + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object( + mock_pipeline_manager, + "get_pipeline", + wraps=mock_pipeline_manager.get_pipeline, + ) as mock_get_pipeline, + ): + mock_pipeline_manager.get_pipeline_status() + mock_get_pipeline.assert_called_once() + + +@pytest.mark.integration +class TestGetPipelineStatusIntegration: + """Integration tests for retrieval of pipeline status.""" + + def test_get_pipeline_status_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + ): + """Test retrieval of pipeline status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + status = manager.get_pipeline_status() + + assert status == sample_pipeline.status + + +@pytest.mark.unit +class TestSetPipelineStatusUnit: + """Test setting of pipeline status.""" + + @pytest.mark.parametrize("pipeline_status", [status for status in PipelineStatus._member_map_.values()]) + def test_set_pipeline_status_success(self, mock_pipeline_manager, pipeline_status): + """Test successful setting of pipeline status.""" + mock_pipeline = Mock(spec=Pipeline, status=None) + + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline", + return_value=mock_pipeline, + ) as mock_get_pipeline, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.set_pipeline_status(pipeline_status) + assert mock_pipeline.status == pipeline_status + + mock_get_pipeline.assert_called_once() + + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_set_pipeline_status_sets_finished_at_property_for_terminal_status( + self, mock_pipeline_manager, mock_pipeline, pipeline_status + ): + """Test that setting a terminal status updates the finished_at property.""" + # Set initial finished_at to None + mock_pipeline.finished_at = None + + with TransactionSpy.spy(mock_pipeline_manager.db): + before_update = datetime.datetime.now() + mock_pipeline_manager.set_pipeline_status(pipeline_status) + after_update = datetime.datetime.now() + + assert mock_pipeline.status == pipeline_status + assert mock_pipeline.finished_at is not None + assert before_update <= mock_pipeline.finished_at <= after_update + + def test_set_pipeline_status_clears_started_at_property_for_created_status( + self, mock_pipeline_manager, mock_pipeline + ): + """Test that setting status to CREATED clears the started_at property.""" + + with TransactionSpy.spy(mock_pipeline_manager.db): + mock_pipeline_manager.set_pipeline_status(PipelineStatus.CREATED) + assert mock_pipeline.status == PipelineStatus.CREATED + assert mock_pipeline.started_at is None + + @pytest.mark.parametrize( + "initial_started_at", + [None, datetime.datetime.now() - datetime.timedelta(hours=1)], + ) + def test_set_pipeline_status_sets_started_at_property_for_running_status( + self, mock_pipeline_manager, mock_pipeline, initial_started_at + ): + """Test that setting status to RUNNING sets the started_at property if not already set.""" + mock_pipeline.started_at = initial_started_at + with TransactionSpy.spy(mock_pipeline_manager.db): + before_update = datetime.datetime.now() + mock_pipeline_manager.set_pipeline_status(PipelineStatus.RUNNING) + after_update = datetime.datetime.now() + + assert mock_pipeline.status == PipelineStatus.RUNNING + + if initial_started_at is None: + assert mock_pipeline.started_at is not None + assert before_update <= mock_pipeline.started_at <= after_update + else: + assert mock_pipeline.started_at == initial_started_at + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_set_pipeline_status_handled_exception_raises_pipeline_state_error(self, mock_pipeline_manager, exception): + """Test that handled exceptions during setting of pipeline status raise PipelineStateError.""" + + def get_or_error(*args): + if args: + raise exception + return PipelineStatus.CREATED + + with ( + patch.object(mock_pipeline_manager, "get_pipeline") as mock_pipeline, + pytest.raises(PipelineStateError, match="Failed to set pipeline status"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + # Mock exception when setting pipeline status + mock_pipeline.return_value = Mock(spec=Pipeline) + type(mock_pipeline.return_value).status = PropertyMock(side_effect=get_or_error) + + mock_pipeline_manager.set_pipeline_status(PipelineStatus.RUNNING) + + +@pytest.mark.integration +class TestSetPipelineStatusIntegration: + """Integration tests for setting of pipeline status.""" + + @pytest.mark.parametrize("pipeline_status", [status for status in PipelineStatus._member_map_.values()]) + def test_set_pipeline_status_integration( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + pipeline_status, + ): + """Test setting of pipeline status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + manager.set_pipeline_status(pipeline_status) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == pipeline_status + + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_set_pipeline_status_integration_terminal_status_sets_finished_at( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + pipeline_status, + ): + """Test that setting a terminal status updates the finished_at property.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + before_update = datetime.datetime.now(tz=datetime.timezone.utc) + manager.set_pipeline_status(pipeline_status) + after_update = datetime.datetime.now(tz=datetime.timezone.utc) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status and finished_at are updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == pipeline_status + assert updated_pipeline.finished_at is not None + assert before_update <= updated_pipeline.finished_at <= after_update + + def test_set_pipeline_status_integration_created_status_clears_started_at( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + ): + """Test that setting status to CREATED clears the started_at property.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with TransactionSpy.spy(session): + manager.set_pipeline_status(PipelineStatus.CREATED) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is updated and started_at is None + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == PipelineStatus.CREATED + assert updated_pipeline.started_at is None + + @pytest.mark.parametrize( + "initial_started_at", + [None, datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(hours=1)], + ) + def test_set_pipeline_status_integration_running_status_sets_started_at( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + initial_started_at, + ): + """Test that setting status to RUNNING sets the started_at property if not already set.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial started_at + sample_pipeline.started_at = initial_started_at + session.commit() + + with TransactionSpy.spy(session): + before_update = datetime.datetime.now(tz=datetime.timezone.utc) + manager.set_pipeline_status(PipelineStatus.RUNNING) + after_update = datetime.datetime.now(tz=datetime.timezone.utc) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status and started_at are updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == PipelineStatus.RUNNING + + if initial_started_at is None: + assert before_update <= updated_pipeline.started_at <= after_update + else: + assert updated_pipeline.started_at == initial_started_at + + +@pytest.mark.unit +class TestEnqueueInArqUnit: + """Test enqueuing jobs in ARQ.""" + + @pytest.mark.asyncio + async def test_enqueue_in_arq_without_redis_raises_pipeline_coordination_error(self, mock_pipeline_manager): + """Test that attempting to enqueue a job without a Redis connection raises PipelineCoordinationError.""" + mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + mock_pipeline_manager.redis = None + + with ( + pytest.raises( + PipelineCoordinationError, match="Redis client is not configured for job enqueueing; cannot proceed." + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=False) + + @pytest.mark.asyncio + @pytest.mark.parametrize("enqueud", [Mock(spec=ArqJob), None]) + @pytest.mark.parametrize("retry", [True, False]) + async def test_enqueue_in_arq_success(self, mock_pipeline_manager, retry, enqueud): + """Test successful enqueuing of a job in ARQ.""" + mock_job = Mock( + spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10, retry_count=0 + ) + with ( + patch.object(mock_pipeline_manager.redis, "enqueue_job", return_value=enqueud) as mock_enqueue_job, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=retry) + + mock_enqueue_job.assert_called_once_with( + mock_job.job_function, + mock_job.id, + _defer_by=datetime.timedelta(seconds=mock_job.retry_delay_seconds if retry else 0), + _job_id=arq_job_id(mock_job), + ) + + @pytest.mark.asyncio + async def test_any_enqueue_exception_raises_pipeline_coordination_error(self, mock_pipeline_manager): + """Test that any exception during enqueuing raises PipelineCoordinationError.""" + mock_job = Mock( + spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10, retry_count=0 + ) + + with ( + patch.object( + mock_pipeline_manager.redis, + "enqueue_job", + side_effect=Exception("Test exception"), + ), + pytest.raises(PipelineCoordinationError, match="Failed to enqueue job in ARQ"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=False) + + +@pytest.mark.integration +class TestEnqueueInArqIntegration: + """Integration tests for enqueuing jobs in ARQ.""" + + @pytest.mark.asyncio + async def test_enqueue_in_arq_integration( + self, + session, + arq_redis: ArqRedis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test enqueuing of a job in ARQ.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + await manager._enqueue_in_arq(job=sample_job_run, is_retry=False) + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + +@pytest.mark.integration +class TestPipelineManagerLifecycle: + """Integration tests for PipelineManager lifecycle.""" + + @pytest.mark.asyncio + async def test_full_pipeline_lifecycle( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test full lifecycle of PipelineManager including initialization and job retrieval.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # pipeline is created with pending jobs + pipeline = manager.get_pipeline() + all_jobs = manager.get_all_jobs() + + assert pipeline.id == sample_pipeline.id + assert len(all_jobs) == 2 + assert all_jobs[0].id == sample_job_run.id + assert all_jobs[0].status == JobStatus.PENDING + + # pipeline started + await manager.start_pipeline() + session.commit() + + # verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate pipeline lifecycle for a two job sample pipeline. The workflow here should be as follows: + # - Enter pipeline manager decorator. We don't make any calls when a pipeline begins + # - Enter the job manager decorator. This sets the job to RUNNING. + # - Job runs... + # - Exit the job manager decorator. This sets the job to some terminal state. + # - Exit the pipeline manager decorator. This coordinates the pipeline, either + # enqueuing any newly queueable jobs or terminating it. + + # enter pipeline manager decorator: no work + pass + + # enter job manager decorator: set job to RUNNING + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # job runs... Actual job execution is out of scope for this test. Instead, evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # exit job manager decorator: set job to SUCCEEDED + job_manager.succeed_job(JobExecutionOutcome.succeeded()) + session.commit() + + # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify pipeline status is still RUNNING (since there is a dependent job) + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify that the completed job is now SUCCEEDED in the database + completed_job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert completed_job.status == JobStatus.SUCCEEDED + + # Verify that the dependent job is now QUEUED in the database and ARQ + dependent_job = session.execute( + select(JobRun).where(JobRun.pipeline_id == sample_pipeline.id).filter(JobRun.id != sample_job_run.id) + ).scalar_one() + assert dependent_job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == dependent_job.job_function + + # Simulate the next iteration of pipeline lifecycle. We've now entered a new context manager with + # steps identical to those described above but executing in the context of a newly enqueued dependent job. + job_manager = JobManager(session, arq_redis, dependent_job.id) + + # enter pipeline manager decorator: no work + pass + + # enter job manager decorator: set dependent job to RUNNING + dependent_job_manager = JobManager(session, arq_redis, dependent_job.id) + dependent_job_manager.start_job() + session.commit() + + # job runs... Actual job execution is out of scope for this test. Instead, evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # exit job manager decorator: set dependent job to SUCCEEDED + job_manager.succeed_job(JobExecutionOutcome.succeeded()) + session.commit() + + # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify pipeline status is now SUCCEEDED + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that the dependent job is now SUCCEEDED in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == dependent_job.id)).scalar_one() + assert dependent_job.status == JobStatus.SUCCEEDED + + @pytest.mark.asyncio + async def test_paused_pipeline_lifecycle( + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test lifecycle of a paused pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate job start + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Pause the pipeline. Pausing the pipeline while a job is running DOES NOT affect the job. + await manager.pause_pipeline() + session.commit() + + # Verify that the pipeline is paused + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.PAUSED + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # Simulate job completion + job_manager.succeed_job(JobExecutionOutcome.succeeded()) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify that the pipeline remains paused + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.PAUSED + + # Verify that no jobs were enqueued in ARQ + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + # Verify that the dependent job remains pending in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.PENDING + + # Unpause the pipeline + await manager.unpause_pipeline() + session.commit() + + # Verify that the pipeline is now running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify that the dependent job is is now queued in ARQ + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_dependent_job_run.job_function + + # Simulate dependent job start + dependent_job_manager = JobManager(session, arq_redis, sample_dependent_job_run.id) + dependent_job_manager.start_job() + session.commit() + + # Evict the dependent job from redis to simulate completion. + await arq_redis.flushdb() + + # Simulate dependent job completion + dependent_job_manager.succeed_job(JobExecutionOutcome.succeeded()) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify that the pipeline is now succeeded + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that the dependent job is now succeeded in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.SUCCEEDED + + @pytest.mark.asyncio + async def test_cancelled_pipeline_lifecycle( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test lifecycle of a cancelled pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate job start + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # Cancel the pipeline. This DOES have an effect on the running job. + await manager.cancel_pipeline() + session.commit() + + # Verify that the pipeline is now cancelled + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.CANCELLED + + # Verify that the job is now cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the dependent job is now skipped in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.SKIPPED + + # Verify that no jobs were enqueued in ARQ + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + @pytest.mark.asyncio + async def test_restart_pipeline_lifecycle( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test lifecycle of a restarted pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Start the job + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + exc = Exception("Simulated job failure") + job_manager.fail_job(result=JobExecutionOutcome.failed(reason=str(exc))) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify the pipeline failed + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.FAILED + + # Verify that the job is now failed in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Restart the pipeline + await manager.restart_pipeline() + session.commit() + + # Verify that the pipeline is now created + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + @pytest.mark.asyncio + async def test_retry_pipeline_lifecycle( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test lifecycle of a restarted pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Add a cancelled job to the pipeline + cancelled_job = JobRun( + id=99, + pipeline_id=sample_pipeline.id, + job_function="cancelled_job_function", + job_type="CANCELLED_JOB", + status=JobStatus.CANCELLED, + urn="urn:cancelled_job", + ) + session.add(cancelled_job) + session.commit() + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Start the job + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + exc = Exception("Simulated job failure") + job_manager.fail_job(result=JobExecutionOutcome.failed(reason=str(exc))) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify the pipeline failed + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.FAILED + + # Verify that the job is now failed in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Restart the pipeline + await manager.retry_pipeline() + session.commit() + + # Verify that the pipeline is now created + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status of failed job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify the previously cancelled job is now queued + job = session.execute(select(JobRun).where(JobRun.id == cancelled_job.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 2 + + +@pytest.mark.unit +class TestGetDependentsForJobUnit: + """Unit tests for PipelineManager.get_dependents_for_job.""" + + def test_returns_dependent_jobs(self, mock_pipeline_manager): + """Returns jobs in this pipeline that list the given job as a dependency.""" + mock_job = Mock(spec=JobRun, id=10) + mock_dependent = Mock(spec=JobRun, id=20) + + mock_pipeline_manager.db.execute.return_value.scalars.return_value.all.return_value = [mock_dependent] + + result = mock_pipeline_manager.get_dependents_for_job(mock_job) + + assert result == [mock_dependent] + + def test_raises_database_connection_error_on_sql_error(self, mock_pipeline_manager): + """Wraps SQLAlchemyError in DatabaseConnectionError.""" + mock_job = Mock(spec=JobRun, id=10) + mock_pipeline_manager.db.execute.side_effect = SQLAlchemyError("db failure") + + with pytest.raises(DatabaseConnectionError): + mock_pipeline_manager.get_dependents_for_job(mock_job) + + +@pytest.mark.integration +class TestGetDependentsForJobIntegration: + """Integration tests for PipelineManager.get_dependents_for_job.""" + + def test_returns_correct_dependents( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Returns the downstream jobs that depend on the given job.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + dependents = manager.get_dependents_for_job(sample_job_run) + assert len(dependents) == 1 + assert dependents[0].id == sample_dependent_job_run.id + + def test_returns_empty_for_leaf_job( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_dependent_job_run, + ): + """Returns empty sequence when no jobs depend on the given job.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + dependents = manager.get_dependents_for_job(sample_dependent_job_run) + assert len(dependents) == 0 + + +@pytest.mark.unit +class TestIsLeafJobUnit: + """Unit tests for PipelineManager.is_leaf_job.""" + + def test_returns_true_when_no_dependents(self, mock_pipeline_manager): + """Returns True when get_dependents_for_job returns empty.""" + mock_job = Mock(spec=JobRun, id=10) + + with patch.object(mock_pipeline_manager, "get_dependents_for_job", return_value=[]): + assert mock_pipeline_manager.is_leaf_job(mock_job) is True + + def test_returns_false_when_dependents_exist(self, mock_pipeline_manager): + """Returns False when job has at least one dependent.""" + mock_job = Mock(spec=JobRun, id=10) + mock_dependent = Mock(spec=JobRun, id=20) + + with patch.object(mock_pipeline_manager, "get_dependents_for_job", return_value=[mock_dependent]): + assert mock_pipeline_manager.is_leaf_job(mock_job) is False + + +@pytest.mark.unit +class TestGetFailedLeafJobsUnit: + """Unit tests for PipelineManager.get_failed_leaf_jobs.""" + + def test_excludes_non_leaf_failed_jobs(self, mock_pipeline_manager): + """Returns only failed jobs that have no dependents in this pipeline.""" + leaf_job = Mock(spec=JobRun, id=1) + non_leaf_job = Mock(spec=JobRun, id=2) + + with ( + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[leaf_job, non_leaf_job]), + patch.object( + mock_pipeline_manager.db, + "execute", + return_value=Mock(**{"scalars.return_value.all.return_value": [non_leaf_job.id]}), + ), + ): + result = mock_pipeline_manager.get_failed_leaf_jobs() + + assert result == [leaf_job] + + def test_raises_database_connection_error_on_sql_error(self, mock_pipeline_manager): + """Wraps SQLAlchemyError in DatabaseConnectionError.""" + mock_pipeline_manager.db.execute.side_effect = SQLAlchemyError("db failure") + + with pytest.raises(DatabaseConnectionError): + mock_pipeline_manager.get_failed_leaf_jobs() + + +@pytest.mark.integration +class TestGetFailedLeafJobsIntegration: + """Integration tests for PipelineManager.get_failed_leaf_jobs.""" + + def test_returns_only_leaf_failures( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Returns failed jobs that have no dependents, excluding non-leaf failures.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + leaf_failures = manager.get_failed_leaf_jobs() + + assert len(leaf_failures) == 1 + assert leaf_failures[0].id == sample_dependent_job_run.id + + def test_returns_empty_when_no_failed_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Returns empty list when no jobs have FAILED status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + leaf_failures = manager.get_failed_leaf_jobs() + + assert leaf_failures == [] + + +@pytest.mark.unit +class TestComputeNewStatusUnit: + """Unit tests for PipelineManager._compute_new_status. + + These tests cover the status computation logic in isolation, verifying each + branch of the decision tree and the delegation to _compute_status_with_leaf_failures. + """ + + def test_errored_job_returns_failed(self, mock_pipeline_manager): + """Any ERRORED job always yields FAILED regardless of topology.""" + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 5, JobStatus.ERRORED: 1}, + 6, + ) + assert result == PipelineStatus.FAILED + + def test_errored_job_does_not_check_leaf_status(self, mock_pipeline_manager): + """ERRORED path short-circuits before any leaf topology check.""" + with ( + patch.object(mock_pipeline_manager, "get_failed_jobs") as mock_get_failed, + patch.object(mock_pipeline_manager, "is_leaf_job") as mock_is_leaf, + ): + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.ERRORED: 1}, + 1, + ) + + assert result == PipelineStatus.FAILED + mock_get_failed.assert_not_called() + mock_is_leaf.assert_not_called() + + def test_non_leaf_failed_job_returns_failed(self, mock_pipeline_manager): + """A FAILED non-leaf job always yields FAILED.""" + mock_failed_job = Mock(spec=JobRun, id=1) + with ( + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=False), + ): + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 3, JobStatus.FAILED: 1}, + 4, + ) + + assert result == PipelineStatus.FAILED + + def test_leaf_failed_job_delegates_to_leaf_failure_helper(self, mock_pipeline_manager): + """When all failed jobs are leaves, delegates to _compute_status_with_leaf_failures.""" + mock_failed_job = Mock(spec=JobRun, id=1) + expected = PipelineStatus.RUNNING + counts = {JobStatus.SUCCEEDED: 3, JobStatus.FAILED: 1, JobStatus.RUNNING: 2} + total = 6 + + with ( + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=True), + patch.object( + mock_pipeline_manager, + "_compute_status_with_leaf_failures", + return_value=expected, + ) as mock_leaf_helper, + ): + result = mock_pipeline_manager._compute_new_status(PipelineStatus.RUNNING, counts, total) + + assert result == expected + mock_leaf_helper.assert_called_once_with(PipelineStatus.RUNNING, counts, total) + + @pytest.mark.parametrize( + "active_status", + [JobStatus.RUNNING, JobStatus.QUEUED], + ) + def test_active_jobs_without_failures_return_running(self, mock_pipeline_manager, active_status): + """RUNNING or QUEUED jobs (with no failures) yield RUNNING.""" + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 3, active_status: 1}, + 4, + ) + assert result == PipelineStatus.RUNNING + + @pytest.mark.parametrize( + "old_status", + [PipelineStatus.CREATED, PipelineStatus.RUNNING], + ) + def test_pending_jobs_preserve_old_status(self, mock_pipeline_manager, old_status): + """Presence of PENDING jobs preserves the current pipeline status unchanged.""" + result = mock_pipeline_manager._compute_new_status( + old_status, + {JobStatus.SUCCEEDED: 3, JobStatus.PENDING: 2}, + 5, + ) + assert result == old_status + + def test_all_succeeded_returns_succeeded(self, mock_pipeline_manager): + """All jobs in SUCCEEDED state yields SUCCEEDED.""" + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 5}, + 5, + ) + assert result == PipelineStatus.SUCCEEDED + + @pytest.mark.parametrize( + "status_counts,total", + [ + ({JobStatus.SUCCEEDED: 3, JobStatus.SKIPPED: 2}, 5), + ({JobStatus.SUCCEEDED: 1, JobStatus.CANCELLED: 1}, 2), + ({JobStatus.SUCCEEDED: 2, JobStatus.SKIPPED: 1, JobStatus.CANCELLED: 1}, 4), + ], + ) + def test_mixed_terminal_with_succeeded_returns_partial(self, mock_pipeline_manager, status_counts, total): + """Mix of terminal states including SUCCEEDED (no FAILED) yields PARTIAL.""" + result = mock_pipeline_manager._compute_new_status(PipelineStatus.RUNNING, status_counts, total) + + assert result == PipelineStatus.PARTIAL + + def test_inconsistent_job_counts_returns_partial_with_slack_alert(self, mock_pipeline_manager): + """Inconsistent total (counts don't sum to total) still yields PARTIAL but fires a Slack warning.""" + # total=10 but counts only sum to 6 — inconsistent + with patch("mavedb.worker.lib.managers.pipeline_manager.send_slack_message") as mock_slack: + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 5, JobStatus.CANCELLED: 1}, + 10, + ) + + assert result == PipelineStatus.PARTIAL + mock_slack.assert_called_once() + + @pytest.mark.parametrize( + "status_counts", + [ + {JobStatus.CANCELLED: 5}, + {JobStatus.SKIPPED: 4}, + {JobStatus.CANCELLED: 2, JobStatus.SKIPPED: 3}, + ], + ) + def test_all_cancelled_or_skipped_returns_cancelled(self, mock_pipeline_manager, status_counts): + """All jobs CANCELLED or SKIPPED (no SUCCEEDED) yields CANCELLED.""" + total = sum(status_counts.values()) + result = mock_pipeline_manager._compute_new_status(PipelineStatus.RUNNING, status_counts, total) + assert result == PipelineStatus.CANCELLED + + +@pytest.mark.unit +class TestComputeStatusWithLeafFailuresUnit: + """Unit tests for PipelineManager._compute_status_with_leaf_failures. + + This method determines pipeline status when all failed jobs are leaf jobs. + Leaf failures do not fail the pipeline; siblings continue and the pipeline + settles to PARTIAL rather than FAILED once all jobs are terminal. + """ + + @pytest.mark.parametrize( + "active_status", + [JobStatus.RUNNING, JobStatus.QUEUED], + ) + def test_active_sibling_jobs_keep_pipeline_running(self, mock_pipeline_manager, active_status): + """RUNNING or QUEUED siblings keep the pipeline in RUNNING state.""" + result = mock_pipeline_manager._compute_status_with_leaf_failures( + PipelineStatus.RUNNING, + {JobStatus.FAILED: 1, active_status: 2}, + 3, + ) + assert result == PipelineStatus.RUNNING + + @pytest.mark.parametrize( + "old_status", + [PipelineStatus.CREATED, PipelineStatus.RUNNING], + ) + def test_pending_sibling_jobs_preserve_old_status(self, mock_pipeline_manager, old_status): + """Pending sibling jobs leave the pipeline status unchanged.""" + result = mock_pipeline_manager._compute_status_with_leaf_failures( + old_status, + {JobStatus.FAILED: 1, JobStatus.PENDING: 2}, + 3, + ) + assert result == old_status + + @pytest.mark.parametrize( + "status_counts,total", + [ + ({JobStatus.SUCCEEDED: 3, JobStatus.FAILED: 1}, 4), + ({JobStatus.SUCCEEDED: 1, JobStatus.FAILED: 1, JobStatus.SKIPPED: 1}, 3), + ({JobStatus.SUCCEEDED: 2, JobStatus.FAILED: 2, JobStatus.CANCELLED: 1}, 5), + ], + ) + def test_all_terminal_with_succeeded_yields_partial(self, mock_pipeline_manager, status_counts, total): + """Once all jobs are terminal and SUCCEEDED is present, the pipeline is PARTIAL.""" + result = mock_pipeline_manager._compute_status_with_leaf_failures( + PipelineStatus.RUNNING, + status_counts, + total, + ) + assert result == PipelineStatus.PARTIAL + + def test_inconsistent_job_counts_yields_partial_with_slack_alert(self, mock_pipeline_manager): + """Inconsistent total still yields PARTIAL but fires a Slack warning.""" + # total=10 but counts sum to only 4 — inconsistent + with patch("mavedb.worker.lib.managers.pipeline_manager.send_slack_message") as mock_slack: + result = mock_pipeline_manager._compute_status_with_leaf_failures( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 2, JobStatus.FAILED: 2}, + 10, + ) + + assert result == PipelineStatus.PARTIAL + mock_slack.assert_called_once() + + @pytest.mark.parametrize( + "status_counts", + [ + {JobStatus.FAILED: 3, JobStatus.CANCELLED: 2}, + {JobStatus.FAILED: 1, JobStatus.SKIPPED: 2}, + {JobStatus.FAILED: 2}, + ], + ) + def test_no_succeeded_jobs_yields_cancelled(self, mock_pipeline_manager, status_counts): + """When there are only leaf failures and no SUCCEEDED jobs, yield CANCELLED.""" + total = sum(status_counts.values()) + result = mock_pipeline_manager._compute_status_with_leaf_failures( + PipelineStatus.RUNNING, + status_counts, + total, + ) + assert result == PipelineStatus.CANCELLED diff --git a/tests/worker/lib/managers/test_types.py b/tests/worker/lib/managers/test_types.py new file mode 100644 index 000000000..cbea88783 --- /dev/null +++ b/tests/worker/lib/managers/test_types.py @@ -0,0 +1,163 @@ +"""Tests for JobExecutionOutcome dataclass and factory methods.""" + +import pytest + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus + + +@pytest.mark.unit +class TestJobExecutionOutcomeSucceeded: + def test_default(self): + result = JobExecutionOutcome.succeeded() + assert result.status == JobStatus.SUCCEEDED + assert result.data == {} + assert result.error is None + assert result.exception is None + + def test_with_data(self): + result = JobExecutionOutcome.succeeded(data={"variant_count": 42}) + assert result.status == JobStatus.SUCCEEDED + assert result.data == {"variant_count": 42} + assert result.error is None + assert result.exception is None + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.succeeded(data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeFailed: + def test_with_reason(self): + result = JobExecutionOutcome.failed(reason="bad input") + assert result.status == JobStatus.FAILED + assert result.error == "bad input" + assert result.exception is None + assert result.data == {} + + def test_with_reason_and_data(self): + result = JobExecutionOutcome.failed(reason="bad input", data={"partial": 5}) + assert result.status == JobStatus.FAILED + assert result.error == "bad input" + assert result.data == {"partial": 5} + assert result.exception is None + + def test_empty_reason_is_valid(self): + result = JobExecutionOutcome.failed(reason="") + assert result.error == "" + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.failed(reason="x", data=None) + assert result.data == {} + + def test_with_failure_category(self): + result = JobExecutionOutcome.failed(reason="HGVS parse error", failure_category=FailureCategory.DATA_ERROR) + assert result.failure_category == FailureCategory.DATA_ERROR + + def test_without_failure_category_defaults_to_none(self): + result = JobExecutionOutcome.failed(reason="bad input") + assert result.failure_category is None + + +@pytest.mark.unit +class TestJobExecutionOutcomeErrored: + def test_with_exception(self): + exc = RuntimeError("boom") + result = JobExecutionOutcome.errored(exception=exc) + assert result.status == JobStatus.ERRORED + assert result.error == "boom" + assert result.exception is exc + assert result.data == {} + + def test_with_exception_and_data(self): + exc = ValueError("invalid") + result = JobExecutionOutcome.errored(exception=exc, data={"processed": 50}) + assert result.status == JobStatus.ERRORED + assert result.error == "invalid" + assert result.data == {"processed": 50} + assert result.exception is exc + + def test_empty_exception_message(self): + exc = ValueError("") + result = JobExecutionOutcome.errored(exception=exc) + assert result.error == "" + + def test_none_data_defaults_to_empty_dict(self): + exc = RuntimeError("x") + result = JobExecutionOutcome.errored(exception=exc, data=None) + assert result.data == {} + + def test_with_failure_category(self): + exc = ConnectionError("timeout") + result = JobExecutionOutcome.errored(exception=exc, failure_category=FailureCategory.NETWORK_ERROR) + assert result.failure_category == FailureCategory.NETWORK_ERROR + + def test_without_failure_category_defaults_to_none(self): + exc = RuntimeError("boom") + result = JobExecutionOutcome.errored(exception=exc) + assert result.failure_category is None + + +@pytest.mark.unit +class TestJobExecutionOutcomeSkipped: + def test_default(self): + result = JobExecutionOutcome.skipped() + assert result.status == JobStatus.SKIPPED + assert result.data == {} + assert result.error is None + assert result.exception is None + + def test_with_data(self): + result = JobExecutionOutcome.skipped(data={"reason": "disabled"}) + assert result.data == {"reason": "disabled"} + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.skipped(data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeDirectConstruction: + """Direct construction bypassing factories is at-your-own-risk but should not raise.""" + + def test_semantically_invalid_combination_is_allowed(self): + result = JobExecutionOutcome( + status=JobStatus.SUCCEEDED, + data={}, + error="oops", + exception=RuntimeError("x"), + ) + assert result.status == JobStatus.SUCCEEDED + assert result.error == "oops" + assert result.exception is not None + + +@pytest.mark.unit +class TestJobExecutionOutcomeToDict: + def test_succeeded(self): + result = JobExecutionOutcome.succeeded(data={"k": 1}) + d = result.to_dict() + assert d == {"status": "succeeded", "data": {"k": 1}, "error": None, "failure_category": None} + + def test_failed(self): + result = JobExecutionOutcome.failed(reason="bad", data={"partial": 3}) + d = result.to_dict() + assert d == {"status": "failed", "data": {"partial": 3}, "error": "bad", "failure_category": None} + + def test_failed_with_failure_category(self): + result = JobExecutionOutcome.failed(reason="bad", failure_category=FailureCategory.DATA_ERROR) + d = result.to_dict() + assert d["failure_category"] == "data_error" + + def test_errored_excludes_exception(self): + exc = RuntimeError("crash") + result = JobExecutionOutcome.errored(exception=exc) + d = result.to_dict() + assert d == {"status": "errored", "data": {}, "error": "crash", "failure_category": None} + assert "exception" not in d + + def test_skipped(self): + result = JobExecutionOutcome.skipped() + d = result.to_dict() + assert d == {"status": "skipped", "data": {}, "error": None, "failure_category": None} diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py new file mode 100644 index 000000000..70e3ca24b --- /dev/null +++ b/tests/worker/lib/managers/test_utils.py @@ -0,0 +1,150 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + COMPLETED_JOB_STATUSES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) +from mavedb.worker.lib.managers.utils import ( + classify_exception, + construct_bulk_cancellation_result, + job_dependency_is_met, + job_should_be_skipped_due_to_unfulfillable_dependency, +) + + +@pytest.mark.unit +class TestConstructBulkCancellationResultUnit: + def test_construct_bulk_cancellation_result(self): + reason = "Test cancellation reason" + result = construct_bulk_cancellation_result(reason) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.CANCELLED + assert result.data["reason"] == reason + assert "timestamp" in result.data + assert result.exception is None + assert result.error == reason + + +@pytest.mark.unit +class TestJobStatusConstantsUnit: + def test_errored_in_completed_statuses(self): + assert JobStatus.ERRORED in COMPLETED_JOB_STATUSES + + def test_errored_in_terminal_statuses(self): + assert JobStatus.ERRORED in TERMINAL_JOB_STATUSES + + def test_errored_in_retryable_statuses(self): + assert JobStatus.ERRORED in RETRYABLE_JOB_STATUSES + + def test_errored_not_in_startable_statuses(self): + assert JobStatus.ERRORED not in STARTABLE_JOB_STATUSES + + def test_errored_not_in_active_statuses(self): + assert JobStatus.ERRORED not in ACTIVE_JOB_STATUSES + + +@pytest.mark.unit +class TestJobDependencyIsMetUnit: + @pytest.mark.parametrize( + "dependency_type, dependent_job_status, expected", + [ + (None, "any_status", True), + # success required dependencies-- should only be met if dependent job succeeded + (DependencyType.SUCCESS_REQUIRED, JobStatus.SUCCEEDED, True), + *[ + (DependencyType.SUCCESS_REQUIRED, dependent_job_status, False) + for dependent_job_status in JobStatus._member_map_.values() + if dependent_job_status != JobStatus.SUCCEEDED + ], + # completion required dependencies-- should be met if dependent job is in any terminal state + *[ + ( + DependencyType.COMPLETION_REQUIRED, + dependent_job_status, + dependent_job_status in COMPLETED_JOB_STATUSES, + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + ], + ) + def test_job_dependency_is_met(self, dependency_type, dependent_job_status, expected): + result = job_dependency_is_met(dependency_type, dependent_job_status) + assert result == expected + + +@pytest.mark.unit +class TestJobShouldBeSkippedDueToUnfulfillableDependencyUnit: + @pytest.mark.parametrize( + "dependency_type, dependent_job_status, expected", + [ + # No dependency-- should not be skipped + (None, "any_status", False), + # success required dependencies-- should be skipped if dependent job in terminal non-success state + (DependencyType.SUCCESS_REQUIRED, JobStatus.SUCCEEDED, False), + *[ + ( + DependencyType.SUCCESS_REQUIRED, + dependent_job_status, + dependent_job_status + in (JobStatus.FAILED, JobStatus.ERRORED, JobStatus.SKIPPED, JobStatus.CANCELLED), + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + # completion required dependencies-- should be skipped if dependent job is not in a terminal state + *[ + ( + DependencyType.COMPLETION_REQUIRED, + dependent_job_status, + dependent_job_status in (JobStatus.CANCELLED, JobStatus.SKIPPED), + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + ], + ) + def test_job_should_be_skipped_due_to_unfulfillable_dependency( + self, dependency_type, dependent_job_status, expected + ): + result = job_should_be_skipped_due_to_unfulfillable_dependency(dependency_type, dependent_job_status) + + if expected: + assert result[0] is True + assert isinstance(result[1], str) + else: + assert result == (False, None) + + +@pytest.mark.unit +class TestClassifyException: + """Tests for classify_exception mapping.""" + + def test_connection_error_returns_network_error(self): + assert classify_exception(ConnectionError("connection refused")) == FailureCategory.NETWORK_ERROR + + def test_timeout_error_returns_timeout(self): + assert classify_exception(TimeoutError("timed out")) == FailureCategory.TIMEOUT + + def test_os_error_returns_network_error(self): + assert classify_exception(OSError("socket error")) == FailureCategory.NETWORK_ERROR + + def test_connection_reset_error_returns_network_error(self): + """ConnectionResetError is a subclass of ConnectionError, so isinstance matches.""" + assert classify_exception(ConnectionResetError("reset by peer")) == FailureCategory.NETWORK_ERROR + + def test_value_error_returns_unknown(self): + assert classify_exception(ValueError("bad value")) == FailureCategory.UNKNOWN + + def test_runtime_error_returns_unknown(self): + assert classify_exception(RuntimeError("unexpected")) == FailureCategory.UNKNOWN + + def test_generic_exception_returns_unknown(self): + assert classify_exception(Exception("generic")) == FailureCategory.UNKNOWN diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py deleted file mode 100644 index e7fd0b39f..000000000 --- a/tests/worker/test_jobs.py +++ /dev/null @@ -1,3479 +0,0 @@ -# ruff: noqa: E402 - -import json -from asyncio.unix_events import _UnixSelectorEventLoop -from copy import deepcopy -from datetime import date -from unittest.mock import patch -from uuid import uuid4 - -import jsonschema -import pandas as pd -import pytest -from requests import HTTPError -from sqlalchemy import not_, select - -arq = pytest.importorskip("arq") -cdot = pytest.importorskip("cdot") -fastapi = pytest.importorskip("fastapi") -pyathena = pytest.importorskip("pyathena") - -from mavedb.data_providers.services import VRSMap -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, - ClinGenLdhService, - clingen_allele_id_from_ldh_variation, -) -from mavedb.lib.mave.constants import HGVS_NT_COLUMN -from mavedb.lib.score_sets import csv_data_to_df -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.view_models.experiment import Experiment, ExperimentCreate -from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate -from mavedb.worker.jobs import ( - BACKOFF_LIMIT, - MAPPING_CURRENT_ID_NAME, - MAPPING_QUEUE_NAME, - create_variants_for_score_set, - link_clingen_variants, - link_gnomad_variants, - map_variants_for_score_set, - poll_uniprot_mapping_jobs_for_score_set, - submit_score_set_mappings_to_car, - submit_score_set_mappings_to_ldh, - submit_uniprot_mapping_jobs_for_score_set, - variant_mapper_manager, -) -from tests.helpers.constants import ( - TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, - TEST_GNOMAD_DATA_VERSION, - TEST_MINIMAL_ACC_SCORESET, - TEST_MINIMAL_EXPERIMENT, - TEST_MINIMAL_MULTI_TARGET_SCORESET, - TEST_MINIMAL_SEQ_SCORESET, - TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_NT_CDOT_TRANSCRIPT, - TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, - TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, - TEST_UNIPROT_SWISS_PROT_TYPE, - TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - VALID_CHR_ACCESSION, - VALID_CLINGEN_CA_ID, - VALID_NT_ACCESSION, - VALID_UNIPROT_ACCESSION, -) -from tests.helpers.util.exceptions import awaitable_exception -from tests.helpers.util.experiment import create_experiment -from tests.helpers.util.score_set import create_acc_score_set, create_multi_target_score_set, create_seq_score_set - - -@pytest.fixture -def populate_worker_db(data_files, client): - # create score set via API. In production, the API would invoke this worker job - experiment = create_experiment(client) - seq_score_set = create_seq_score_set(client, experiment["urn"]) - acc_score_set = create_acc_score_set(client, experiment["urn"]) - multi_target_score_set = create_multi_target_score_set(client, experiment["urn"]) - - return [seq_score_set["urn"], acc_score_set["urn"], multi_target_score_set["urn"]] - - -async def setup_records_and_files(async_client, data_files, input_score_set): - experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) - jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.model_json_schema()) - experiment_response = await async_client.post("/api/v1/experiments/", json=experiment_payload) - assert experiment_response.status_code == 200 - experiment = experiment_response.json() - jsonschema.validate(instance=experiment, schema=Experiment.model_json_schema()) - - score_set_payload = deepcopy(input_score_set) - score_set_payload["experimentUrn"] = experiment["urn"] - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.model_json_schema()) - score_set_response = await async_client.post("/api/v1/score-sets/", json=score_set_payload) - assert score_set_response.status_code == 200 - score_set = score_set_response.json() - jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) - - scores_fp = ( - "scores_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv") - ) - counts_fp = ( - "counts_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv") - ) - with ( - open(data_files / scores_fp, "rb") as score_file, - open(data_files / counts_fp, "rb") as count_file, - open(data_files / "score_columns_metadata.json", "rb") as score_columns_file, - open(data_files / "count_columns_metadata.json", "rb") as count_columns_file, - ): - scores = csv_data_to_df(score_file) - counts = csv_data_to_df(count_file) - score_columns_metadata = json.load(score_columns_file) - count_columns_metadata = json.load(count_columns_file) - - return score_set["urn"], scores, counts, score_columns_metadata, count_columns_metadata - - -async def setup_records_files_and_variants(session, async_client, data_files, input_score_set, worker_ctx): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset. - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ): - result = await create_variants_for_score_set( - worker_ctx, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata - ) - - score_set_with_variants = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - assert result["success"] - assert score_set.processing_state is ProcessingState.success - assert score_set_with_variants.num_variants == 3 - - return score_set_with_variants - - -async def setup_records_files_and_variants_with_mapping( - session, async_client, data_files, input_score_set, standalone_worker_context -): - score_set = await setup_records_files_and_variants( - session, async_client, data_files, input_score_set, standalone_worker_context - ) - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - - -async def sanitize_mapping_queue(standalone_worker_context, score_set): - queued_job = await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME) - assert int(queued_job.decode("utf-8")) == score_set.id - - -async def setup_mapping_output( - async_client, session, score_set, score_set_is_seq_based=True, score_set_is_multi_target=False, empty=False -): - score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") - - if score_set_is_seq_based: - if score_set_is_multi_target: - # If this is a multi-target sequence based score set, use the scaffold for that. - mapping_output = deepcopy(TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - # there is not currently a multi-target accession-based score set test - mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) - mapping_output["metadata"] = score_set_response.json() - - if empty: - return mapping_output - - variants = session.scalars(select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).all() - for variant in variants: - mapped_score = { - "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - "mavedb_id": variant.urn, - } - - mapping_output["mapped_scores"].append(mapped_score) - - return mapping_output - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set,validation_error", - [ - ( - TEST_MINIMAL_SEQ_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'c.1T>A' at row 0 for sequence TEST1"], - }, - ), - ( - TEST_MINIMAL_ACC_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": [ - "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." - ], - }, - ), - ( - TEST_MINIMAL_MULTI_TARGET_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'n.1T>A' at row 0 for sequence TEST3"], - }, - ), - ], -) -async def test_create_variants_for_score_set_with_validation_error( - input_score_set, - validation_error, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - if input_score_set == TEST_MINIMAL_SEQ_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" - elif input_score_set == TEST_MINIMAL_ACC_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" - elif input_score_set == TEST_MINIMAL_MULTI_TARGET_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "TEST3:n.1T>A" - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == validation_error - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_base_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some base exception will be handled no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=BaseException), - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_variants( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - await sanitize_mapping_queue(standalone_worker_context, score_set) - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_exceptions( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object( - pd.DataFrame, - "isnull", - side_effect=ValidationError("Test Exception", triggers=["exc_1", "exc_2"]), - ) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == { - "exception": "Test Exception", - "detail": ["exc_1", "exc_2"], - } - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_enqueues_manager_and_successful_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) - score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1 - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, score_set_is_seq, score_set_is_multi_target) - - async def dummy_car_submission_job(): - return TEST_CLINGEN_ALLELE_OBJECT - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # Variants have not yet been created, so infer their URNs. - async def dummy_linking_job(): - return [(f"{score_set_urn}#{i}", TEST_CLINGEN_LDH_LINKING_RESPONSE) for i in range(1, len(scores) + 1)] - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[ - dummy_mapping_job(), - dummy_car_submission_job(), - dummy_ldh_submission_job(), - dummy_linking_job(), - ], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if score_set_is_seq: - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_exception_skips_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc: - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.not_attempted - assert score_set.mapping_errors is None - - -# NOTE: These tests operate under the assumption that mapping output is consistent between accession based and sequence based score sets. If -# this assumption changes in the future, tests reflecting this difference in output should be added for accession based score sets. - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset( - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_with_existing_mapped_variants( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - existing_variant = session.scalars(select(Variant)).first() - - if not existing_variant: - raise ValueError - - session.add( - MappedVariant( - pre_mapped={"preexisting": "variant"}, - post_mapped={"preexisting": "variant"}, - variant_id=existing_variant.id, - modification_date=date.today(), - mapped_date=date.today(), - vrs_version="2.0", - mapping_api_version="0.0.0", - current=True, - ) - ) - session.commit() - - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - preexisting_variants = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, not_(MappedVariant.current)) - ).all() - new_variants = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.current) - ).all() - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == score_set.num_variants + 1 - assert len(preexisting_variants) == 1 - assert len(new_variants) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_exception_in_mapping_setup_score_set_selection( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id + 5, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # When we cannot fetch a score set, these fields are unable to be updated. - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_exception_in_mapping_setup_vrs_object( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - with patch.object( - VRSMap, - "__init__", - return_value=Exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception_retry_limit_reached( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set( - standalone_worker_context, uuid4().hex, score_set.id, 1, BACKOFF_LIMIT + 1 - ) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception_retry_failed( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ), - patch.object(arq.ArqRedis, "lpush", awaitable_exception()), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception in mapping is retried job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_with_retry( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_retry_failed( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch.object(arq.ArqRedis, "lpush", awaitable_exception()), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception outside mapping is failed job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_retry_limit_reached( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ): - result = await map_variants_for_score_set( - standalone_worker_context, uuid4().hex, score_set.id, 1, BACKOFF_LIMIT + 1 - ) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception outside mapping is failed job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_no_mapping_output( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # Do not await, we need a co-routine object to be the return value of our `run_in_executor` mock. - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, empty=True) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_context): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.ArqRedis, "rpop", Exception()): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set.id) - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mapping job should be queued if none is currently running, and the queue should now be empty. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - # We don't actually start processing these score sets. - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Enqueue would have failed, the job is unsuccessful, and we remove the queued item. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which all should be deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have three jobs, each of our three created score sets. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 3 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_1) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # Each score set should remain queued with no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mock the first job being in-progress - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, str(score_set_id_1)) - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which the first should be a queued job of the "map_variants_for_score_set" variety and the other two should be - # deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "map_variants_for_score_set" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have two jobs, neither of which should be the first score set. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 2 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # We don't actually process any score sets in the manager job, and each should have no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed all jobs exactly once. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_disabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager and mapping jobs, but not the submission, linking, or uniprot mapping jobs. - assert num_completed_jobs == 2 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_enabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, and uniprot jobs, but not the submission or linking jobs. - assert num_completed_jobs == 4 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_enabled_uniprot_disabled( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, submission, and linking jobs, but not the uniprot jobs. - assert num_completed_jobs == 6 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_retried_mapping_successful_mapping_on_retry( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job(), dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the mapping manager job twice, the mapping job twice, the two submission jobs, and both linking jobs. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_unsuccessful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job()] * 5, - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed 6 mapping jobs and 6 management jobs. - assert num_completed_jobs == 12 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -############################################################################################################################################ -# ClinGen CAR Submission -############################################################################################################################################ - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_hgvs_dict_creation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", side_effect=Exception()), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_allele_association( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.get_allele_registry_associations", side_effect=Exception()), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_ldh_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################ -# ClinGen LDH Submission -############################################################################################################################################ - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_auth( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object( - ClinGenLdhService, - "_existing_jwt", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.lib.variants.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_construction( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.lib.clingen.content_constructors.construct_ldh_submission", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_submission_job(): - return Exception() - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=failed_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "error_response", [TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE] -) -async def test_submit_score_set_mappings_to_ldh_submission_failures_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis, error_response -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [None, error_response] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_linking_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_linking_not_queued_when_expected( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", return_value=None), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################## -## ClinGen Linkage -############################################################################################################################################## - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.clingen_allele_id == clingen_allele_id_from_ldh_variation(TEST_CLINGEN_LDH_LINKING_RESPONSE) - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.clingen_allele_id is None - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_during_linkage( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=Exception(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_linkages( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.clingen_allele_id_from_ldh_variation", - side_effect=Exception(), - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_but_do_not_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 2, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", - 0, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_cant_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_retries_exceeded( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", - 0, - ), - patch( - "mavedb.worker.jobs.BACKOFF_LIMIT", - 1, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 2) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_error_in_gnomad_job_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -################################################################################################################################################## -# UniProt ID mapping -################################################################################################################################################## - -### Test Submission - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] is not None - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_while_spawning_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", side_effect=HTTPError()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=["AC1", "AC2"]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.setup_job_state", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_submission_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_spawned_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=None), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -### Test Polling - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=["AC1", "AC2"]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=[]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_jobs_not_ready( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=False), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # This case does not get sent to slack - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_ids_mapped( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value={"failedIDs": [VALID_CHR_ACCESSION]}), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_mapped_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # Simulate a response with too many mapped IDs - too_many_mapped_ids_response = TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE.copy() - too_many_mapped_ids_response["results"].append( - {"from": "AC3", "to": {"primaryAccession": "AC3", "entryType": TEST_UNIPROT_SWISS_PROT_TYPE}} - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value=too_many_mapped_ids_response), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.setup_job_state", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_exception_during_polling( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -################################################################################################################################################## -# gnomAD Linking -################################################################################################################################################## - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_success( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - # Patch Athena connection with mock object which returns a mocked gnomAD variant row w/ CAID=VALID_CLINGEN_CA_ID. - with ( - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_fetching_variant_data( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ), - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", side_effect=Exception()), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_linking_variants( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - with ( - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - patch("mavedb.worker.jobs.link_gnomad_variants_to_mapped_variants", side_effect=Exception()), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants