From f713472530161208d2660653b2fde621500c3e1c Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Fri, 3 Jul 2026 17:45:29 -0500 Subject: [PATCH] feat: index-ceiling CLI parity and zero-size shape checks (CIPHER-04) Expose --max-index-artifact-bytes on the ordvec-manifest CLI LimitArgs, wiring it to ResourceLimits::max_index_artifact_bytes so the opt-in primary-artifact read ceiling reaches feature parity with the existing --max-auxiliary-artifact-bytes flag. Close the deferred CIPHER-04 reason-code symmetry: validate_manifest_shape now rejects a zero manifest-declared artifact.file_size_bytes (artifact_file_size_zero) and validate_auxiliary_artifact_shape rejects zero-size declarations on required auxiliary artifacts (auxiliary_artifact_file_size_zero), mirroring the calibration and encoder-distortion *_file_size_zero checks. Optional artifacts keep the established zero-size absent-placeholder convention. --- ordvec-manifest/README.md | 6 +++-- ordvec-manifest/src/lib.rs | 17 ++++++++++++ ordvec-manifest/src/main.rs | 44 ++++++++++++++++++++++++++++++- ordvec-manifest/tests/manifest.rs | 43 ++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 3 deletions(-) diff --git a/ordvec-manifest/README.md b/ordvec-manifest/README.md index 03c0eef..6a58c54 100644 --- a/ordvec-manifest/README.md +++ b/ordvec-manifest/README.md @@ -159,7 +159,8 @@ Stable limit codes are part of the contract: size on create; the flat cap is an opt-in ceiling, unbounded by default (`auxiliary_artifact_file_too_large`); - primary index artifact bytes: bounded by the manifest-declared - `file_size_bytes` on verify (`artifact_file_too_large`); + `file_size_bytes` on verify; the flat cap is an opt-in ceiling, unbounded + by default (`artifact_file_too_large`); - calibration profile artifact bytes: bounded by the declared `file_size_bytes`; flat cap opt-in, unbounded by default (`calibration_profile_too_large`); @@ -174,7 +175,7 @@ The CLI exposes matching override flags on `inspect`, `verify`, `create`, `sqlite verify`, and `sqlite activate`: `--max-manifest-bytes`, `--max-row-map-line-bytes`, `--max-row-map-rows`, `--max-row-map-tracked-id-bytes`, `--max-auxiliary-artifacts`, -`--max-auxiliary-artifact-bytes`, +`--max-auxiliary-artifact-bytes`, `--max-index-artifact-bytes`, `--max-calibration-profile-bytes`, `--max-encoder-distortion-profile-bytes`, `--max-report-issues`, and `--max-cached-report-bytes`. Library callers can override the same ceilings @@ -190,6 +191,7 @@ Stable limit codes: | row-identity duplicate-tracking `db_id` bytes | `row_identity_duplicate_tracking_limit_exceeded` | `row_identity_duplicate_tracking_limit_exceeded` | | auxiliary artifact declarations | `auxiliary_artifact_count_limit_exceeded` | n/a | | auxiliary artifact bytes per declared file | `auxiliary_artifact_file_too_large` | n/a | +| primary index artifact bytes | `artifact_file_too_large` | n/a | | calibration profile artifact bytes | `calibration_profile_too_large` | n/a | | encoder distortion profile artifact bytes | `encoder_distortion_profile_too_large` | n/a | | collected verification report issues | `verification_report_issue_limit_exceeded` | n/a | diff --git a/ordvec-manifest/src/lib.rs b/ordvec-manifest/src/lib.rs index ceea67c..14f7919 100644 --- a/ordvec-manifest/src/lib.rs +++ b/ordvec-manifest/src/lib.rs @@ -363,6 +363,12 @@ fn validate_manifest_shape( "artifact.sha256 must be a lowercase 64-character hex SHA-256 digest", ); } + if manifest.artifact.file_size_bytes == 0 { + report.error( + "artifact_file_size_zero", + "artifact.file_size_bytes must be greater than zero", + ); + } if manifest.artifact.bytes_per_vec == 0 { report.error( "artifact_bytes_per_vec_zero", @@ -565,6 +571,17 @@ fn validate_auxiliary_artifact_shape( ), ); } + // Optional artifacts may legitimately be declared absent with a + // zero-size placeholder (see `AuxiliaryArtifactState::OptionalAbsent`); + // only required declarations must carry a real size. + if artifact.required && artifact.file_size_bytes == 0 { + report.error( + "auxiliary_artifact_file_size_zero", + format!( + "required auxiliary artifact {name:?} file_size_bytes must be greater than zero" + ), + ); + } } } diff --git a/ordvec-manifest/src/main.rs b/ordvec-manifest/src/main.rs index 6236878..02df85c 100644 --- a/ordvec-manifest/src/main.rs +++ b/ordvec-manifest/src/main.rs @@ -103,7 +103,8 @@ fn parse_auxiliary_artifact_arg(value: &str) -> Result { + assert_eq!(limits.max_index_artifact_bytes, Some(8)); + assert_eq!(limits.resource_limits().max_index_artifact_bytes, 8); + } + _ => panic!("expected verify command"), + } + } } #[cfg(feature = "sqlite")] @@ -174,6 +211,8 @@ struct LimitArgs { #[arg(long)] max_auxiliary_artifact_bytes: Option, #[arg(long)] + max_index_artifact_bytes: Option, + #[arg(long)] max_calibration_profile_bytes: Option, #[arg(long)] max_encoder_distortion_profile_bytes: Option, @@ -204,6 +243,9 @@ impl LimitArgs { if let Some(value) = self.max_auxiliary_artifact_bytes { limits.max_auxiliary_artifact_bytes = value; } + if let Some(value) = self.max_index_artifact_bytes { + limits.max_index_artifact_bytes = value; + } if let Some(value) = self.max_calibration_profile_bytes { limits.max_calibration_profile_bytes = value; } diff --git a/ordvec-manifest/tests/manifest.rs b/ordvec-manifest/tests/manifest.rs index 2096cd9..71f9b57 100644 --- a/ordvec-manifest/tests/manifest.rs +++ b/ordvec-manifest/tests/manifest.rs @@ -2639,6 +2639,49 @@ fn auxiliary_artifacts_fail_closed_on_tamper_missing_and_path_escape() { .ends_with("missing.bin")); } +#[test] +fn manifest_shape_rejects_zero_declared_file_sizes_for_required_artifacts() { + let root = tempfile::tempdir().unwrap(); + let (temp, mut manifest, _manifest_path) = identity_manifest(root.path()); + fs::write(temp.path().join("extra.bin"), b"extra").unwrap(); + let extra_hash = sha256_file(temp.path().join("extra.bin")).unwrap(); + + manifest.artifact.file_size_bytes = 0; + manifest.auxiliary_artifacts = vec![AuxiliaryArtifact { + name: "extra".to_string(), + path: "extra.bin".to_string(), + sha256: extra_hash.sha256, + file_size_bytes: 0, + required: true, + }]; + + let report = verify_manifest_with_base(manifest, temp.path(), VerifyOptions::default()); + assert!(!report.ok); + let codes = error_codes(&report); + assert!(codes.contains(&"artifact_file_size_zero"), "{codes:?}"); + assert!( + codes.contains(&"auxiliary_artifact_file_size_zero"), + "{codes:?}" + ); +} + +#[test] +fn optional_absent_zero_size_placeholder_is_not_flagged_zero_size() { + let root = tempfile::tempdir().unwrap(); + let (temp, mut manifest, _manifest_path) = identity_manifest(root.path()); + manifest.auxiliary_artifacts = vec![AuxiliaryArtifact { + name: "optional-model".to_string(), + path: "missing-model.json".to_string(), + sha256: "0".repeat(64), + file_size_bytes: 0, + required: false, + }]; + + let report = verify_manifest_with_base(manifest, temp.path(), VerifyOptions::default()); + assert!(report.ok, "{:?}", report.errors); + assert!(!error_codes(&report).contains(&"auxiliary_artifact_file_size_zero")); +} + #[test] fn auxiliary_artifact_schema_rejects_unknown_fields_and_duplicate_names() { let root = tempfile::tempdir().unwrap();