From 76a975f3d5f322f95f20111968edba9dcbe35cab Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:10:18 -0500 Subject: [PATCH 001/287] feat: add connector catalog metadata --- README.md | 14 ++ docs/package-surface.md | 11 ++ src/extended_data/__init__.py | 3 + src/extended_data/connectors/__init__.py | 2 + src/extended_data/connectors/_optional.py | 63 +++++++- src/extended_data/connectors/cli.py | 65 ++++++-- src/extended_data/connectors/connectors.py | 25 +++- src/extended_data/connectors/registry.py | 165 ++++++++++++++++++--- tests/connectors/test_cli.py | 35 ++++- tests/connectors/test_connectors.py | 29 ++++ tests/core/test_package_surface.py | 2 + 11 files changed, 370 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index bba6e12..1bfdb27 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,20 @@ Connector names are normalized before lookup. If a known built-in connector is requested without its optional extra installed, the registry raises an `ImportError` with the matching `extended-data[...]` install target. +Inspect connector availability before wiring vendor workflows: + +```python +catalog = connectors.list_connector_info() +github_info = connectors.get_connector_info("github") +``` + +The same catalog is available from the CLI: + +```bash +extended-data list +extended-data info github --json +``` + ## Package Shape ```text diff --git a/docs/package-surface.md b/docs/package-surface.md index cf2a6b8..295fe41 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -51,6 +51,17 @@ Both paths share the same input provider and lifecycle logger, and both cache instances by connector type and constructor inputs. Generic connector names are stripped and lowercased before lookup. +Use the catalog helpers when a workflow needs to inspect which integrations can +run in the current environment: + +```python +catalog = fabric.list_connector_info() +github_info = fabric.get_connector_info("github") +``` + +Each catalog entry includes availability, source, extra name, install command, +required packages, missing packages, module, class, and description fields. + ## Optional Integrations Install only the vendor or AI layers you need: diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index c95bbea..a415929 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -115,6 +115,7 @@ if TYPE_CHECKING: from extended_data.connectors import ( ConnectorFabric, + ConnectorInfo, VendorConnectorBase, get_connector, get_connector_class, @@ -128,6 +129,7 @@ _LAZY_EXPORTS = { "ConnectorFabric": ("extended_data.connectors", "ConnectorFabric"), + "ConnectorInfo": ("extended_data.connectors", "ConnectorInfo"), "ExitRunError": ("extended_data.logging", "ExitRunError"), "InputProvider": ("extended_data.inputs", "InputProvider"), "KeyTransform": ("extended_data.logging", "KeyTransform"), @@ -156,6 +158,7 @@ def __getattr__(name: str): __all__ = [ "ConnectorFabric", + "ConnectorInfo", "ExitRunError", "FilePath", "InputProvider", diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index 80d1cac..b1f699f 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -137,6 +137,7 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): "AWSSSOmixin", "AnthropicConnector", "ConnectorFabric", + "ConnectorInfo", "CursorConnector", "GitHubConnector", "GoogleBillingMixin", @@ -163,6 +164,7 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): # Registry - unified access to all connectors from extended_data.connectors.registry import ( + ConnectorInfo, get_connector, get_connector_class, get_connector_info, diff --git a/src/extended_data/connectors/_optional.py b/src/extended_data/connectors/_optional.py index bc3b496..0647f45 100644 --- a/src/extended_data/connectors/_optional.py +++ b/src/extended_data/connectors/_optional.py @@ -165,18 +165,68 @@ def get_available_ai_frameworks() -> list[str]: CONNECTOR_REQUIREMENTS: dict[str, list[str]] = { # Core-only connectors (always available) + "cursor": [], # httpx is in core "meshy": [], # httpx, pydantic, tenacity are in core + "secrets": [], # pyyaml is in core "zoom": [], # requests is in core - "cursor": [], # httpx is in core # Connectors requiring extras + "anthropic": ["anthropic"], "aws": ["boto3"], "google": ["googleapiclient"], + "google_billing": ["googleapiclient"], + "google_cloud": ["googleapiclient"], + "google_workspace": ["googleapiclient"], "github": ["github"], + "jules": ["googleapiclient"], "slack": ["slack_sdk"], "vault": ["hvac"], - "anthropic": ["anthropic"], } +CONNECTOR_EXTRAS: dict[str, str] = { + "anthropic": "anthropic", + "aws": "aws", + "cursor": "cursor", + "google": "google", + "google_billing": "google", + "google_cloud": "google", + "google_workspace": "google", + "github": "github", + "jules": "google", + "meshy": "meshy", + "secrets": "secrets", + "slack": "slack", + "vault": "vault", + "zoom": "zoom", +} + + +def _normalize_connector_name(connector: str) -> str: + """Normalize connector names for optional dependency lookup.""" + return connector.strip().lower() + + +def get_extra_for_connector(connector: str) -> str | None: + """Get the optional dependency extra for a connector.""" + return CONNECTOR_EXTRAS.get(_normalize_connector_name(connector)) + + +def get_connector_requirements(connector: str) -> list[str]: + """Get package imports required by a connector.""" + return list(CONNECTOR_REQUIREMENTS.get(_normalize_connector_name(connector), [])) + + +def get_missing_connector_requirements(connector: str) -> list[str]: + """Get missing package imports for a connector.""" + return [pkg for pkg in get_connector_requirements(connector) if not is_available(pkg)] + + +def get_connector_install_command(connector: str) -> str | None: + """Get the pip install command for a connector extra.""" + extra = get_extra_for_connector(connector) + if extra is None: + return None + return f"pip install extended-data[{extra}]" + def is_connector_available(connector: str) -> bool: """Check if a connector's dependencies are available. @@ -187,8 +237,7 @@ def is_connector_available(connector: str) -> bool: Returns: True if all required packages are available """ - requirements = CONNECTOR_REQUIREMENTS.get(connector, []) - return all(is_available(pkg) for pkg in requirements) + return not get_missing_connector_requirements(connector) def get_available_connectors() -> list[str]: @@ -209,12 +258,12 @@ def require_connector(connector: str) -> None: Raises: ImportError: With helpful message if dependencies missing """ - requirements = CONNECTOR_REQUIREMENTS.get(connector, []) - missing = [pkg for pkg in requirements if not is_available(pkg)] + missing = get_missing_connector_requirements(connector) if missing: + extra = get_extra_for_connector(connector) or connector raise ImportError( f"The '{connector}' connector requires additional dependencies.\n" f"Missing packages: {', '.join(missing)}\n" - f"Install with: pip install extended-data[{connector}]" + f"Install with: pip install extended-data[{extra}]" ) diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index d57b961..f3f37d9 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -25,7 +25,6 @@ import argparse import json -import os import sys from typing import Any @@ -33,6 +32,7 @@ from extended_data.connectors.registry import ( get_connector, get_connector_class, + get_connector_info, list_connector_info, ) @@ -73,21 +73,43 @@ def _parse_arg_value(value: str) -> Any: return value +def _format_list(values: list[str] | tuple[str, ...] | None) -> str: + """Format a list-like metadata field for CLI output.""" + if not values: + return "-" + return ", ".join(values) + + +def _write_stdout(message: str) -> None: + """Write one CLI output line.""" + sys.stdout.write(f"{message}\n") + + +def _write_stderr(message: str) -> None: + """Write one CLI error line.""" + sys.stderr.write(f"{message}\n") + + # ============================================================================= # Commands # ============================================================================= def cmd_list(args: argparse.Namespace) -> int: - """List available connectors.""" - info = list_connector_info() + """List connector catalog entries.""" + info = list_connector_info(include_unavailable=not getattr(args, "available_only", False)) if args.json: + _write_stdout(_json_output(info)) return 0 + _write_stdout(f"{'name':<18} {'status':<11} {'extra':<10} {'class':<28} install") for c in info: - env = c.get("api_key_env") or "-" - "✓" if os.environ.get(env) else " " + status = "available" if c["available"] else "missing" + extra = c.get("extra") or "-" + class_name = c.get("class") or "-" + install = c.get("install") or "-" + _write_stdout(f"{c['name']:<18} {status:<11} {extra:<10} {class_name:<28} {install}") return 0 @@ -134,7 +156,8 @@ def cmd_methods(args: argparse.Namespace) -> int: try: cls = get_connector_class(connector_name) - except ValueError: + except (ImportError, ValueError) as e: + _write_stderr(str(e)) return 1 for name in sorted(dir(cls)): @@ -160,12 +183,32 @@ def cmd_mcp(args: argparse.Namespace) -> int: def cmd_info(args: argparse.Namespace) -> int: """Show info about a specific connector.""" - from extended_data.connectors.registry import get_connector_info - try: - get_connector_info(args.connector) + info = get_connector_info(args.connector) + if args.json: + _write_stdout(_json_output(info)) + return 0 + + for key in ( + "name", + "available", + "source", + "extra", + "install", + "requirements", + "missing", + "class", + "module", + "description", + "error", + ): + value = info.get(key) + if isinstance(value, list): + value = _format_list(value) + _write_stdout(f"{key}: {value if value is not None else '-'}") return 0 - except ValueError: + except (ImportError, ValueError) as e: + _write_stderr(str(e)) return 1 @@ -194,6 +237,7 @@ def main() -> int: # List command list_parser = subparsers.add_parser("list", help="List available connectors") list_parser.add_argument("--json", action="store_true", help="JSON output") + list_parser.add_argument("--available-only", action="store_true", help="Hide connectors with missing extras") list_parser.set_defaults(func=cmd_list) # Methods command @@ -204,6 +248,7 @@ def main() -> int: # Info command info_parser = subparsers.add_parser("info", help="Show connector info") info_parser.add_argument("connector", help="Connector name") + info_parser.add_argument("--json", action="store_true", help="JSON output") info_parser.set_defaults(func=cmd_info) # Call command diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 272164b..5fb53b0 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -9,7 +9,18 @@ from extended_data import get_default_dict, get_unique_signature, make_hashable # Import zoom directly (no extra deps) -from extended_data.connectors.registry import get_connector_class +from extended_data.connectors.registry import ( + get_connector_class, +) +from extended_data.connectors.registry import ( + get_connector_info as get_registered_connector_info, +) +from extended_data.connectors.registry import ( + list_connector_info as list_registered_connector_info, +) +from extended_data.connectors.registry import ( + list_connectors as list_registered_connectors, +) from extended_data.connectors.zoom import ZoomConnector from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -79,6 +90,18 @@ def _set_cached_client(self, client_type: str, client: Any, **kwargs) -> None: cache_key = self._get_cache_key(**kwargs) self._client_cache[client_type][cache_key] = client + def list_connectors(self) -> dict[str, Any]: + """List connector classes available in the current environment.""" + return list_registered_connectors() + + def list_connector_info(self, *, include_unavailable: bool = True) -> list[dict[str, Any]]: + """List connector catalog metadata.""" + return list_registered_connector_info(include_unavailable=include_unavailable) + + def get_connector_info(self, name: str, *, include_unavailable: bool = True) -> dict[str, Any]: + """Get catalog metadata for one connector.""" + return get_registered_connector_info(name, include_unavailable=include_unavailable) + def get_connector(self, name: str, **kwargs: Any) -> Any: """Get a cached connector instance by registry name. diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 3853044..ed2397d 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -35,6 +35,13 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any, NoReturn +from extended_data.connectors._optional import ( + get_connector_install_command, + get_connector_requirements, + get_extra_for_connector, + get_missing_connector_requirements, +) + if TYPE_CHECKING: from extended_data.connectors.base import VendorConnectorBase @@ -49,6 +56,43 @@ class BuiltinConnectorSpec: extra: str +@dataclass(frozen=True) +class ConnectorInfo: + """Registry metadata for a connector.""" + + name: str + available: bool + source: str + extra: str | None + install: str | None + requirements: tuple[str, ...] + missing: tuple[str, ...] + class_name: str | None + module: str | None + base_url: str | None + api_key_env: str | None + description: str | None + error: str | None + + def as_dict(self) -> dict[str, Any]: + """Return JSON-friendly connector metadata.""" + return { + "name": self.name, + "available": self.available, + "source": self.source, + "extra": self.extra, + "install": self.install, + "requirements": list(self.requirements), + "missing": list(self.missing), + "class": self.class_name, + "module": self.module, + "base_url": self.base_url, + "api_key_env": self.api_key_env, + "description": self.description, + "error": self.error, + } + + BUILTIN_CONNECTORS: dict[str, BuiltinConnectorSpec] = { # Google connectors "jules": BuiltinConnectorSpec("extended_data.connectors.google.jules", "JulesConnector", "google"), @@ -60,6 +104,7 @@ class BuiltinConnectorSpec: "cursor": BuiltinConnectorSpec("extended_data.connectors.cursor", "CursorConnector", "cursor"), "github": BuiltinConnectorSpec("extended_data.connectors.github", "GitHubConnector", "github"), "meshy": BuiltinConnectorSpec("extended_data.connectors.meshy", "MeshyConnector", "meshy"), + "secrets": BuiltinConnectorSpec("extended_data.connectors.secrets", "SecretsConnector", "secrets"), "anthropic": BuiltinConnectorSpec("extended_data.connectors.anthropic", "AnthropicConnector", "anthropic"), "aws": BuiltinConnectorSpec("extended_data.connectors.aws", "AWSConnector", "aws"), "slack": BuiltinConnectorSpec("extended_data.connectors.slack", "SlackConnector", "slack"), @@ -93,8 +138,17 @@ def _discover_connectors() -> dict[str, builtins.type[VendorConnectorBase]]: eps = entry_points(group="extended_data.connectors") for ep in eps: + connector_name = _normalize_connector_name(ep.name) try: - connectors[ep.name] = ep.load() + connectors[connector_name] = ep.load() + _missing_builtin_connectors.pop(connector_name, None) + except ImportError as e: + if connector_name in BUILTIN_CONNECTORS: + _missing_builtin_connectors[connector_name] = e + continue + import warnings + + warnings.warn(f"Failed to load connector '{ep.name}': {e}", stacklevel=2) except Exception as e: # Log but don't fail - allow partial loading import warnings @@ -123,17 +177,24 @@ def _register_builtins(connectors: dict[str, builtins.type[VendorConnectorBase]] if cls is not None: connectors[name] = cls _missing_builtin_connectors.pop(name, None) + else: + _missing_builtin_connectors[name] = ImportError( + f"Could not find {spec.class_name} in {spec.module_path}" + ) except ImportError as e: _missing_builtin_connectors[name] = e # Optional dependency not installed def _raise_missing_builtin_connector(name: str, error: ImportError) -> NoReturn: """Raise a clear install hint for a known built-in connector.""" - spec = BUILTIN_CONNECTORS[name] + install = get_connector_install_command(name) or f"pip install extended-data[{BUILTIN_CONNECTORS[name].extra}]" + missing = get_missing_connector_requirements(name) msg = ( f"The '{name}' connector is built in but its optional dependencies are not installed.\n" - f"Install with: pip install extended-data[{spec.extra}]" + f"Install with: {install}" ) + if missing: + msg = f"{msg}\nMissing packages: {', '.join(missing)}" if str(error): msg = f"{msg}\nOriginal import error: {error}" raise ImportError(msg) from error @@ -200,29 +261,93 @@ def clear_cache() -> None: _missing_builtin_connectors.clear() +def _get_description(cls: builtins.type[VendorConnectorBase]) -> str | None: + """Get the first useful line from a connector docstring.""" + if not cls.__doc__: + return None + for line in cls.__doc__.splitlines(): + description = line.strip() + if description: + return description + return None + + +def _available_connector_info(name: str, cls: builtins.type[VendorConnectorBase]) -> ConnectorInfo: + """Build metadata for a loadable connector.""" + spec = BUILTIN_CONNECTORS.get(name) + source = "builtin" if spec else "entry_point" + extra = spec.extra if spec else get_extra_for_connector(name) + requirements = tuple(get_connector_requirements(name)) + missing = tuple(get_missing_connector_requirements(name)) + + return ConnectorInfo( + name=name, + available=True, + source=source, + extra=extra, + install=get_connector_install_command(name), + requirements=requirements, + missing=missing, + class_name=cls.__name__, + module=cls.__module__, + base_url=getattr(cls, "BASE_URL", None), + api_key_env=getattr(cls, "API_KEY_ENV", None), + description=_get_description(cls), + error=None, + ) + + +def _missing_builtin_connector_info(name: str, error: ImportError | None) -> ConnectorInfo: + """Build metadata for a known built-in connector that cannot be loaded.""" + spec = BUILTIN_CONNECTORS[name] + + return ConnectorInfo( + name=name, + available=False, + source="builtin", + extra=spec.extra, + install=get_connector_install_command(name), + requirements=tuple(get_connector_requirements(name)), + missing=tuple(get_missing_connector_requirements(name)), + class_name=spec.class_name, + module=spec.module_path, + base_url=None, + api_key_env=None, + description=None, + error=str(error) if error else "Connector class could not be loaded.", + ) + + # ============================================================================= # Connector Info Helpers # ============================================================================= -def get_connector_info(name: str) -> dict[str, Any]: - """Get metadata about a connector. +def get_connector_info(name: str, *, include_unavailable: bool = True) -> dict[str, Any]: + """Get registry metadata about a connector.""" + connector_name = _normalize_connector_name(name) + connectors = _discover_connectors() + + if connector_name in connectors: + return _available_connector_info(connector_name, connectors[connector_name]).as_dict() - Returns: - Dict with name, module, env_vars, description, etc. - """ - cls = get_connector_class(name) + if connector_name in _missing_builtin_connectors: + if include_unavailable: + return _missing_builtin_connector_info(connector_name, _missing_builtin_connectors[connector_name]).as_dict() + _raise_missing_builtin_connector(connector_name, _missing_builtin_connectors[connector_name]) + + if include_unavailable and connector_name in BUILTIN_CONNECTORS: + return _missing_builtin_connector_info(connector_name, None).as_dict() - return { - "name": name, - "class": cls.__name__, - "module": cls.__module__, - "base_url": getattr(cls, "BASE_URL", None), - "api_key_env": getattr(cls, "API_KEY_ENV", None), - "description": cls.__doc__.split("\n")[0] if cls.__doc__ else None, - } + available = ", ".join(sorted(connectors.keys())) + raise ValueError(f"Unknown connector: {name}. Available: {available}") -def list_connector_info() -> list[dict[str, Any]]: - """Get metadata for all connectors.""" - return [get_connector_info(name) for name in sorted(list_connectors().keys())] +def list_connector_info(*, include_unavailable: bool = True) -> list[dict[str, Any]]: + """Get registry metadata for known connectors.""" + connectors = _discover_connectors() + names = set(connectors) + if include_unavailable: + names.update(BUILTIN_CONNECTORS) + names.update(_missing_builtin_connectors) + return [get_connector_info(name, include_unavailable=include_unavailable) for name in sorted(names)] diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index f283b75..e3d1622 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -8,23 +8,46 @@ import pytest -from extended_data.connectors.cli import cmd_list, main +from extended_data.connectors.cli import cmd_info, cmd_list, main -@pytest.mark.xfail(reason="Pre-existing mock issue: cmd_list uses logging instead of print") def test_cli_list(): """Test the list command.""" - args = argparse.Namespace(json=False) - with patch("builtins.print") as mock_print: + args = argparse.Namespace(json=False, available_only=False) + with patch("sys.stdout.write") as mock_write: exit_code = cmd_list(args) assert exit_code == 0 - mock_print.assert_called() + mock_write.assert_called() # Verify it lists some connectors - output = "\n".join(call.args[0] for call in mock_print.call_args_list if call.args) + output = "".join(call.args[0] for call in mock_write.call_args_list if call.args) assert "aws" in output assert "google" in output +def test_cli_list_json(): + """List command can emit machine-readable connector metadata.""" + args = argparse.Namespace(json=True, available_only=False) + with patch("sys.stdout.write") as mock_write: + exit_code = cmd_list(args) + + assert exit_code == 0 + output = mock_write.call_args.args[0] + assert '"name": "github"' in output + assert '"available":' in output + + +def test_cli_info(): + """Info command prints connector metadata.""" + args = argparse.Namespace(connector=" github ", json=False) + with patch("sys.stdout.write") as mock_write: + exit_code = cmd_info(args) + + assert exit_code == 0 + output = "".join(call.args[0] for call in mock_write.call_args_list if call.args) + assert "name: github" in output + assert "install: pip install extended-data[github]" in output + + def test_cli_main_help(): """Test main CLI entry point with help.""" with patch("sys.argv", ["extended-data", "--help"]): diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 0767213..55e62ca 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -133,6 +133,18 @@ def __init__(self, **kwargs): assert third is not first assert mock_get_connector_class.call_count == 2 + def test_connector_fabric_exposes_catalog_info(self): + """ConnectorFabric exposes registry-backed catalog metadata.""" + vc = ConnectorFabric(from_environment=False) + + info = vc.list_connector_info() + names = {connector["name"] for connector in info} + + assert "cursor" in names + assert "github" in names + assert vc.get_connector_info(" github ")["name"] == "github" + assert isinstance(vc.list_connectors(), dict) + @requires_boto3 @patch("extended_data.connectors.aws.AWSConnector") def test_get_aws_connector(self, mock_aws): @@ -329,6 +341,23 @@ def test_get_connector_class_known_missing_builtin_has_install_hint(self, monkey with pytest.raises(ImportError, match=r"extended-data\[github\]"): registry.get_connector_class(" github ") + def test_get_connector_info_includes_known_missing_builtin(self, monkeypatch): + """Registry metadata includes unavailable known connectors.""" + monkeypatch.setattr(registry, "_connector_cache", {}) + monkeypatch.setitem( + registry._missing_builtin_connectors, + "github", + ImportError("No module named 'github'"), + ) + + info = registry.get_connector_info(" github ") + + assert info["name"] == "github" + assert info["available"] is False + assert info["extra"] == "github" + assert info["install"] == "pip install extended-data[github]" + assert info["class"] == "GitHubConnector" + def test_register_builtins_tracks_missing_optional_dependency(self, monkeypatch): """Built-in discovery remembers optional dependency import failures.""" monkeypatch.setattr(registry, "_missing_builtin_connectors", {}) diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 8405e22..ce18619 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -36,5 +36,7 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert extended_data.InputProvider is InputProvider assert extended_data.Logging is Logging assert extended_data.ConnectorFabric is ConnectorFabric + assert extended_data.ConnectorInfo.__name__ == "ConnectorInfo" assert callable(extended_data.directed_inputs) assert callable(extended_data.get_connector) + assert callable(extended_data.list_connector_info) From ecc2fa852fe428c6dd827af056314e092bebbf92 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:19:47 -0500 Subject: [PATCH 002/287] refactor: organize extended data into tiers --- README.md | 21 ++- docs/package-surface.md | 38 ++-- examples/core/composed_workflows.py | 2 +- src/extended_data/__init__.py | 71 +++---- src/extended_data/containers/__init__.py | 13 ++ src/extended_data/containers/mappings.py | 64 +++++++ src/extended_data/containers/sequences.py | 112 +++++++++++ src/extended_data/containers/strings.py | 104 +++++++++++ src/extended_data/io/__init__.py | 46 +++++ .../{base64_utils.py => io/base64.py} | 4 +- .../{export_utils.py => io/exporters.py} | 12 +- .../{file_data_type.py => io/files.py} | 8 +- .../{import_utils.py => io/importers.py} | 12 +- src/extended_data/primitives/__init__.py | 175 ++++++++++++++++++ .../primitives/formats/__init__.py | 45 +++++ .../formats/hcl.py} | 4 +- .../formats/json.py} | 0 .../formats/toml.py} | 4 +- .../formats/yaml}/__init__.py | 12 +- .../formats/yaml}/constructors.py | 2 +- .../formats/yaml}/dumpers.py | 4 +- .../formats/yaml}/loaders.py | 2 +- .../formats/yaml}/representers.py | 2 +- .../formats/yaml}/tag_classes.py | 0 .../formats/yaml}/utils.py | 8 +- .../introspection.py} | 0 .../mappings.py} | 5 +- .../matching.py} | 4 +- .../numbers.py} | 0 .../sequences.py} | 0 .../serialization.py} | 0 .../splitting.py} | 2 +- .../{state_utils.py => primitives/state.py} | 0 .../string_transforms.py} | 0 .../strings.py} | 0 .../transformations/__init__.py | 4 +- .../transformations/numbers/__init__.py | 4 +- .../transformations/numbers/notation.py | 2 +- .../transformations/numbers/words.py | 0 .../transformations/strings/__init__.py | 2 +- .../transformations/strings/inflection.py | 0 .../{type_utils.py => primitives/types.py} | 6 +- tests/core/test_base64_utils.py | 4 +- tests/core/test_containers.py | 62 +++++++ tests/core/test_export_utils.py | 4 +- tests/core/test_file_data_type.py | 16 +- tests/core/test_hcl2_utils.py | 4 +- tests/core/test_import_utils.py | 2 +- tests/core/test_json_utils.py | 2 +- tests/core/test_list_data_type.py | 2 +- tests/core/test_map_data_type.py | 10 +- tests/core/test_matcher_utils.py | 2 +- tests/core/test_number_transformations.py | 2 +- tests/core/test_serialization_utils.py | 2 +- tests/core/test_splitter_utils.py | 2 +- tests/core/test_stack_utils.py | 2 +- tests/core/test_state_utils.py | 4 +- tests/core/test_string_data_type.py | 2 +- tests/core/test_string_transformations.py | 2 +- tests/core/test_toml_utils.py | 2 +- tests/core/test_type_utils.py | 12 +- tests/core/test_workflows.py | 2 +- tests/core/test_yaml_utils.py | 2 +- .../transformations/numbers/test_notation.py | 2 +- .../transformations/numbers/test_words.py | 4 +- .../strings/test_inflection.py | 2 +- 66 files changed, 801 insertions(+), 143 deletions(-) create mode 100644 src/extended_data/containers/__init__.py create mode 100644 src/extended_data/containers/mappings.py create mode 100644 src/extended_data/containers/sequences.py create mode 100644 src/extended_data/containers/strings.py create mode 100644 src/extended_data/io/__init__.py rename src/extended_data/{base64_utils.py => io/base64.py} (93%) rename src/extended_data/{export_utils.py => io/exporters.py} (93%) rename src/extended_data/{file_data_type.py => io/files.py} (98%) rename src/extended_data/{import_utils.py => io/importers.py} (75%) create mode 100644 src/extended_data/primitives/__init__.py create mode 100644 src/extended_data/primitives/formats/__init__.py rename src/extended_data/{hcl2_utils.py => primitives/formats/hcl.py} (98%) rename src/extended_data/{json_utils.py => primitives/formats/json.py} (100%) rename src/extended_data/{toml_utils.py => primitives/formats/toml.py} (90%) rename src/extended_data/{yaml_utils => primitives/formats/yaml}/__init__.py (64%) rename src/extended_data/{yaml_utils => primitives/formats/yaml}/constructors.py (95%) rename src/extended_data/{yaml_utils => primitives/formats/yaml}/dumpers.py (94%) rename src/extended_data/{yaml_utils => primitives/formats/yaml}/loaders.py (93%) rename src/extended_data/{yaml_utils => primitives/formats/yaml}/representers.py (97%) rename src/extended_data/{yaml_utils => primitives/formats/yaml}/tag_classes.py (100%) rename src/extended_data/{yaml_utils => primitives/formats/yaml}/utils.py (86%) rename src/extended_data/{stack_utils.py => primitives/introspection.py} (100%) rename src/extended_data/{map_data_type.py => primitives/mappings.py} (98%) rename src/extended_data/{matcher_utils.py => primitives/matching.py} (95%) rename src/extended_data/{number_transformations.py => primitives/numbers.py} (100%) rename src/extended_data/{list_data_type.py => primitives/sequences.py} (100%) rename src/extended_data/{serialization_utils.py => primitives/serialization.py} (100%) rename src/extended_data/{splitter_utils.py => primitives/splitting.py} (97%) rename src/extended_data/{state_utils.py => primitives/state.py} (100%) rename src/extended_data/{string_transformations.py => primitives/string_transforms.py} (100%) rename src/extended_data/{string_data_type.py => primitives/strings.py} (100%) rename src/extended_data/{ => primitives}/transformations/__init__.py (52%) rename src/extended_data/{ => primitives}/transformations/numbers/__init__.py (82%) rename src/extended_data/{ => primitives}/transformations/numbers/notation.py (98%) rename src/extended_data/{ => primitives}/transformations/numbers/words.py (100%) rename src/extended_data/{ => primitives}/transformations/strings/__init__.py (83%) rename src/extended_data/{ => primitives}/transformations/strings/inflection.py (100%) rename src/extended_data/{type_utils.py => primitives/types.py} (98%) create mode 100644 tests/core/test_containers.py diff --git a/README.md b/README.md index 1bfdb27..fdfa69a 100644 --- a/README.md +++ b/README.md @@ -26,14 +26,15 @@ pip install "extended-data[secrets]" ## Usage ```python -from extended_data import ConnectorFabric, InputProvider, Logging, decode_json, encode_yaml +from extended_data import ConnectorFabric, ExtendedDict, InputProvider, Logging, decode_json, encode_yaml logger = Logging(logger_name="example") inputs = InputProvider(inputs={"GITHUB_OWNER": "jbcom"}, from_environment=False) connectors = ConnectorFabric(inputs=inputs.inputs, logger=logger) data = decode_json('{"status": "ok"}') +payload = ExtendedDict(data).deep_merge({"source": "example"}) -print(encode_yaml(data)) +print(encode_yaml(payload.data)) ``` The fabric can also instantiate any registered connector by name: @@ -68,12 +69,14 @@ extended-data info github --json ```text extended_data/ - core serialization, files, types, transforms + primitives/ Tier 1 pure functions and codecs + containers/ Tier 2 ExtendedString/Dict/List/Set wrappers + io/ Tier 3 file, import, export, and base64 processors inputs/ InputProvider and decorator-based input injection logging/ structured lifecycle logging - connectors/ ConnectorFabric and vendor adapters + connectors/ Tier 3 ConnectorFabric and vendor adapters secrets/ Python access to secret sync primitives - workflows/ higher-order workflow composition + workflows/ Tier 3 higher-order workflow composition ``` Vendor connectors are first-class adapters in the data fabric. `ConnectorFabric` @@ -81,6 +84,14 @@ uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with generic vendor lookup. +The package is intentionally tiered: + +- Tier 1 functions stay stateless and composable. +- Tier 2 containers inherit Python's user container types and expose ergonomic + methods over Tier 1 functions. +- Tier 3 processors use the first two tiers to handle files, inputs, API data, + vendor integrations, and workflows. + More detail lives in [`docs/package-surface.md`](docs/package-surface.md). ## Development diff --git a/docs/package-surface.md b/docs/package-surface.md index 295fe41..6ac85bd 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -6,6 +6,10 @@ namespace. The root package exposes the primitives users need most often: ```python from extended_data import ( ConnectorFabric, + ExtendedDict, + ExtendedList, + ExtendedSet, + ExtendedString, InputProvider, Logging, decode_json, @@ -14,17 +18,29 @@ from extended_data import ( ) ``` -## Layers - -- Core data primitives handle serialization, file decoding, type coercion, - string transforms, map/list transforms, and export-safe values. -- `InputProvider` loads input data from explicit mappings, environment - variables, and stdin, then decodes or coerces values through the same core - primitives. -- `Logging` provides structured lifecycle logging for applications and - connector workflows. -- `ConnectorFabric` caches and coordinates vendor connectors while sharing - input loading, logging, data normalization, retry behavior, and serialization. +## Tiers + +- Tier 1 `extended_data.primitives` modules are pure functions and codecs for + strings, numbers, maps, lists, matching, state, type coercion, and structured + formats. +- Tier 2 `extended_data.containers` classes wrap Python user containers as + `ExtendedString`, `ExtendedDict`, `ExtendedList`, and `ExtendedSet` with + ergonomic methods over Tier 1 primitives. +- Tier 3 processors use the first two tiers to handle files, imports, exports, + inputs, API data, vendor integrations, and workflows. + +```python +name = ExtendedString("API Response Value").to_snake_case() +payload = ExtendedDict({"outer": {"inner": 1}}).flatten() +items = ExtendedList([1, [2, [3]]]).flatten() +tags = ExtendedSet({"prod", "prod", ""}).compact() +``` + +`InputProvider` loads input data from explicit mappings, environment variables, +and stdin, then decodes or coerces values through the primitive layer. `Logging` +provides structured lifecycle logging for applications and connector workflows. +`ConnectorFabric` caches and coordinates vendor connectors while sharing input +loading, logging, data normalization, retry behavior, and serialization. ## Connector Fabric diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index ff19efd..1534aca 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -23,7 +23,7 @@ to_snake_case, write_file, ) -from extended_data.yaml_utils import YamlTagged +from extended_data.primitives.formats.yaml import YamlTagged def demonstrate_layered_config_workflow() -> None: diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index a415929..c830b12 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -11,12 +11,13 @@ from typing import TYPE_CHECKING from extended_data._version import __version__ -from extended_data.base64_utils import base64_decode, base64_encode -from extended_data.export_utils import ( +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString +from extended_data.io.base64 import base64_decode, base64_encode +from extended_data.io.exporters import ( make_raw_data_export_safe, wrap_raw_data_for_export, ) -from extended_data.file_data_type import ( +from extended_data.io.files import ( FilePath, clone_repository_to_temp, decode_file, @@ -33,11 +34,20 @@ resolve_local_path, write_file, ) -from extended_data.hcl2_utils import decode_hcl2, encode_hcl2 -from extended_data.import_utils import unwrap_raw_data_from_import -from extended_data.json_utils import decode_json, encode_json -from extended_data.list_data_type import filter_list, flatten_list -from extended_data.map_data_type import ( +from extended_data.io.importers import unwrap_raw_data_from_import +from extended_data.primitives.formats.hcl import decode_hcl2, encode_hcl2 +from extended_data.primitives.formats.json import decode_json, encode_json +from extended_data.primitives.formats.toml import decode_toml, encode_toml +from extended_data.primitives.formats.yaml import decode_yaml, encode_yaml, is_yaml_data +from extended_data.primitives.introspection import ( + filter_methods, + get_available_methods, + get_caller, + get_inputs_from_docstring, + get_unique_signature, + update_docstring, +) +from extended_data.primitives.mappings import ( SortedDefaultDict, all_values_from_map, create_merger, @@ -50,17 +60,10 @@ unhump_map, zipmap, ) -from extended_data.matcher_utils import is_non_empty_match, is_partial_match -from extended_data.splitter_utils import split_dict_by_type, split_list_by_type -from extended_data.stack_utils import ( - filter_methods, - get_available_methods, - get_caller, - get_inputs_from_docstring, - get_unique_signature, - update_docstring, -) -from extended_data.state_utils import ( +from extended_data.primitives.matching import is_non_empty_match, is_partial_match +from extended_data.primitives.sequences import filter_list, flatten_list +from extended_data.primitives.splitting import split_dict_by_type, split_list_by_type +from extended_data.primitives.state import ( all_non_empty, all_non_empty_in_dict, all_non_empty_in_list, @@ -70,17 +73,7 @@ is_nothing, yield_non_empty, ) -from extended_data.string_data_type import ( - bytestostr, - lower_first_char, - removeprefix, - removesuffix, - sanitize_key, - titleize_name, - truncate, - upper_first_char, -) -from extended_data.string_transformations import ( +from extended_data.primitives.string_transforms import ( humanize, ordinalize, pluralize, @@ -91,8 +84,17 @@ to_pascal_case, to_snake_case, ) -from extended_data.toml_utils import decode_toml, encode_toml -from extended_data.type_utils import ( +from extended_data.primitives.strings import ( + bytestostr, + lower_first_char, + removeprefix, + removesuffix, + sanitize_key, + titleize_name, + truncate, + upper_first_char, +) +from extended_data.primitives.types import ( convert_special_type, convert_special_types, get_default_value_for_type, @@ -109,7 +111,6 @@ strtotime, typeof, ) -from extended_data.yaml_utils import decode_yaml, encode_yaml, is_yaml_data if TYPE_CHECKING: @@ -160,6 +161,10 @@ def __getattr__(name: str): "ConnectorFabric", "ConnectorInfo", "ExitRunError", + "ExtendedDict", + "ExtendedList", + "ExtendedSet", + "ExtendedString", "FilePath", "InputProvider", "KeyTransform", diff --git a/src/extended_data/containers/__init__.py b/src/extended_data/containers/__init__.py new file mode 100644 index 0000000..e48324a --- /dev/null +++ b/src/extended_data/containers/__init__.py @@ -0,0 +1,13 @@ +"""Tier 2 extended container classes.""" + +from extended_data.containers.mappings import ExtendedDict +from extended_data.containers.sequences import ExtendedList, ExtendedSet +from extended_data.containers.strings import ExtendedString + + +__all__ = [ + "ExtendedDict", + "ExtendedList", + "ExtendedSet", + "ExtendedString", +] diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py new file mode 100644 index 0000000..6c70fec --- /dev/null +++ b/src/extended_data/containers/mappings.py @@ -0,0 +1,64 @@ +"""Extended mapping container built on Tier 1 primitives.""" + +from __future__ import annotations + +from collections import UserDict +from collections.abc import Mapping +from typing import Any + +from extended_data.primitives.mappings import ( + all_values_from_map, + deduplicate_map, + deep_merge, + filter_map, + first_non_empty_value_from_map, + flatten_map, + unhump_map, +) +from extended_data.primitives.state import all_non_empty_in_dict + + +class ExtendedDict(UserDict[str, Any]): + """Dictionary wrapper with chainable primitive operations.""" + + def __init__(self, initialdata: Mapping[str, Any] | None = None, **kwargs: Any) -> None: + """Initialize the extended dictionary.""" + super().__init__(dict(initialdata or {}, **kwargs)) + + def deep_merge(self, *mappings: Mapping[str, Any]) -> ExtendedDict: + """Return a deeply merged copy.""" + return ExtendedDict(deep_merge(self.data, *mappings)) + + def flatten(self, *, separator: str = ".") -> ExtendedDict: + """Return a flattened copy.""" + return ExtendedDict(flatten_map(self.data, separator=separator)) + + def filter( + self, + *, + allowlist: list[str] | None = None, + denylist: list[str] | None = None, + ) -> tuple[ExtendedDict, ExtendedDict]: + """Return accepted and rejected mapping entries.""" + accepted, rejected = filter_map(self.data, allowlist=allowlist, denylist=denylist) + return ExtendedDict(accepted), ExtendedDict(rejected) + + def compact(self) -> ExtendedDict: + """Return a copy without values considered empty.""" + return ExtendedDict(all_non_empty_in_dict(self.data)) + + def deduplicate(self) -> ExtendedDict: + """Return a copy with nested duplicate list values removed.""" + return ExtendedDict(deduplicate_map(self.data)) + + def unhump(self, *, drop_without_prefix: str | None = None) -> ExtendedDict: + """Return a copy with camelCase keys converted to snake_case.""" + return ExtendedDict(unhump_map(self.data, drop_without_prefix=drop_without_prefix)) + + def all_values(self) -> list[Any]: + """Return all values from the nested mapping.""" + return all_values_from_map(self.data) + + def first_non_empty_value(self, *keys: str) -> Any: + """Return the first non-empty value for the provided keys.""" + return first_non_empty_value_from_map(self.data, *keys) diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py new file mode 100644 index 0000000..14857c3 --- /dev/null +++ b/src/extended_data/containers/sequences.py @@ -0,0 +1,112 @@ +"""Extended sequence containers built on Tier 1 primitives.""" + +from __future__ import annotations + +from collections import UserList +from collections.abc import Callable, Iterable, Iterator, MutableSet +from typing import Any, TypeVar + +from extended_data.primitives.sequences import flatten_list +from extended_data.primitives.state import is_nothing +from extended_data.primitives.types import make_hashable + + +T = TypeVar("T") +U = TypeVar("U") + + +class ExtendedList(UserList[T]): + """List wrapper with chainable primitive operations.""" + + def __init__(self, initlist: Iterable[T] | None = None) -> None: + """Initialize the extended list.""" + super().__init__(list(initlist or [])) + + def flatten(self) -> ExtendedList[Any]: + """Return a recursively flattened copy.""" + return ExtendedList(flatten_list(list(self.data))) + + def compact(self) -> ExtendedList[T]: + """Return a copy without values considered empty.""" + return ExtendedList(item for item in self.data if not is_nothing(item)) + + def map(self, func: Callable[[T], U]) -> ExtendedList[U]: + """Return a copy with a callable applied to each item.""" + return ExtendedList(func(item) for item in self.data) + + def filter(self, predicate: Callable[[T], bool]) -> ExtendedList[T]: + """Return a copy containing items accepted by a predicate.""" + return ExtendedList(item for item in self.data if predicate(item)) + + def unique(self) -> ExtendedList[T]: + """Return a copy with duplicate values removed while preserving order.""" + seen: set[Any] = set() + values: list[T] = [] + for item in self.data: + marker = make_hashable(item) + if marker in seen: + continue + seen.add(marker) + values.append(item) + return ExtendedList(values) + + +class ExtendedSet(MutableSet[T]): + """Set wrapper with explicit chainable operations.""" + + def __init__(self, values: Iterable[T] | None = None) -> None: + """Initialize the extended set.""" + self._data: set[T] = set(values or []) + + def __contains__(self, value: object) -> bool: + """Return whether the set contains a value.""" + return value in self._data + + def __iter__(self) -> Iterator[T]: + """Iterate over set values.""" + return iter(self._data) + + def __len__(self) -> int: + """Return the number of set values.""" + return len(self._data) + + def add(self, value: T) -> None: + """Add a value to the set.""" + self._data.add(value) + + def discard(self, value: T) -> None: + """Remove a value from the set if present.""" + self._data.discard(value) + + def copy(self) -> ExtendedSet[T]: + """Return a shallow copy.""" + return ExtendedSet(self._data) + + def compact(self) -> ExtendedSet[T]: + """Return a copy without values considered empty.""" + return ExtendedSet(item for item in self._data if not is_nothing(item)) + + def union(self, *others: Iterable[T]) -> ExtendedSet[T]: + """Return a union with other iterables.""" + result = set(self._data) + for other in others: + result.update(other) + return ExtendedSet(result) + + def intersection(self, *others: Iterable[T]) -> ExtendedSet[T]: + """Return an intersection with other iterables.""" + result = set(self._data) + for other in others: + result.intersection_update(other) + return ExtendedSet(result) + + def difference(self, *others: Iterable[T]) -> ExtendedSet[T]: + """Return a difference against other iterables.""" + result = set(self._data) + for other in others: + result.difference_update(other) + return ExtendedSet(result) + + def to_set(self) -> set[T]: + """Return a plain set copy.""" + return set(self._data) diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py new file mode 100644 index 0000000..8465429 --- /dev/null +++ b/src/extended_data/containers/strings.py @@ -0,0 +1,104 @@ +"""Extended string container built on Tier 1 primitives.""" + +from __future__ import annotations + +from collections import UserString + +from extended_data.primitives.string_transforms import ( + humanize, + ordinalize, + pluralize, + singularize, + titleize, + to_camel_case, + to_kebab_case, + to_pascal_case, + to_snake_case, +) +from extended_data.primitives.strings import ( + is_url, + lower_first_char, + removeprefix, + removesuffix, + sanitize_key, + titleize_name, + truncate, + upper_first_char, +) +from extended_data.primitives.types import strtobool + + +class ExtendedString(UserString): + """String wrapper with chainable primitive operations.""" + + def lower_first(self) -> ExtendedString: + """Return a copy with the first character lowercased.""" + return ExtendedString(lower_first_char(self.data)) + + def upper_first(self) -> ExtendedString: + """Return a copy with the first character uppercased.""" + return ExtendedString(upper_first_char(self.data)) + + def remove_prefix(self, prefix: str) -> ExtendedString: + """Return a copy with a leading prefix removed.""" + return ExtendedString(removeprefix(self.data, prefix)) + + def remove_suffix(self, suffix: str) -> ExtendedString: + """Return a copy with a trailing suffix removed.""" + return ExtendedString(removesuffix(self.data, suffix)) + + def sanitize(self, delim: str = "_") -> ExtendedString: + """Return a key-safe copy.""" + return ExtendedString(sanitize_key(self.data, delim=delim)) + + def truncate(self, max_length: int, ender: str = "...") -> ExtendedString: + """Return a truncated copy.""" + return ExtendedString(truncate(self.data, max_length=max_length, ender=ender)) + + def titleize_name(self) -> ExtendedString: + """Return a titleized name copy.""" + return ExtendedString(titleize_name(self.data)) + + def to_snake_case(self) -> ExtendedString: + """Return a snake_case copy.""" + return ExtendedString(to_snake_case(self.data)) + + def to_camel_case(self, *, uppercase_first: bool = False) -> ExtendedString: + """Return a camelCase copy.""" + return ExtendedString(to_camel_case(self.data, uppercase_first=uppercase_first)) + + def to_pascal_case(self) -> ExtendedString: + """Return a PascalCase copy.""" + return ExtendedString(to_pascal_case(self.data)) + + def to_kebab_case(self) -> ExtendedString: + """Return a kebab-case copy.""" + return ExtendedString(to_kebab_case(self.data)) + + def pluralize(self) -> ExtendedString: + """Return a pluralized copy.""" + return ExtendedString(pluralize(self.data)) + + def singularize(self) -> ExtendedString: + """Return a singularized copy.""" + return ExtendedString(singularize(self.data)) + + def humanize(self) -> ExtendedString: + """Return a human-readable copy.""" + return ExtendedString(humanize(self.data)) + + def titleize(self) -> ExtendedString: + """Return a title-case copy.""" + return ExtendedString(titleize(self.data)) + + def ordinalize(self) -> ExtendedString: + """Return an ordinalized copy.""" + return ExtendedString(ordinalize(self.data)) + + def is_url(self) -> bool: + """Return whether the string is a URL.""" + return is_url(self.data) + + def to_bool(self, *, raise_on_error: bool = False) -> bool | None: + """Return a boolean parsed from the string.""" + return strtobool(self.data, raise_on_error=raise_on_error) diff --git a/src/extended_data/io/__init__.py b/src/extended_data/io/__init__.py new file mode 100644 index 0000000..306d65b --- /dev/null +++ b/src/extended_data/io/__init__.py @@ -0,0 +1,46 @@ +"""Tier 3 input/output processors built from primitives.""" + +from extended_data.io.base64 import base64_decode, base64_encode +from extended_data.io.exporters import make_raw_data_export_safe, wrap_raw_data_for_export +from extended_data.io.files import ( + FilePath, + clone_repository_to_temp, + decode_file, + delete_file, + file_path_depth, + file_path_rel_to_root, + get_encoding_for_file_path, + get_parent_repository, + get_repository_name, + get_tld, + is_url, + match_file_extensions, + read_file, + resolve_local_path, + write_file, +) +from extended_data.io.importers import unwrap_raw_data_from_import + + +__all__ = [ + "FilePath", + "base64_decode", + "base64_encode", + "clone_repository_to_temp", + "decode_file", + "delete_file", + "file_path_depth", + "file_path_rel_to_root", + "get_encoding_for_file_path", + "get_parent_repository", + "get_repository_name", + "get_tld", + "is_url", + "make_raw_data_export_safe", + "match_file_extensions", + "read_file", + "resolve_local_path", + "unwrap_raw_data_from_import", + "wrap_raw_data_for_export", + "write_file", +] diff --git a/src/extended_data/base64_utils.py b/src/extended_data/io/base64.py similarity index 93% rename from src/extended_data/base64_utils.py rename to src/extended_data/io/base64.py index 3b07543..b6adfbe 100644 --- a/src/extended_data/base64_utils.py +++ b/src/extended_data/io/base64.py @@ -9,8 +9,8 @@ from base64 import b64decode, b64encode from typing import Any -from extended_data.export_utils import wrap_raw_data_for_export -from extended_data.import_utils import unwrap_raw_data_from_import +from extended_data.io.exporters import wrap_raw_data_for_export +from extended_data.io.importers import unwrap_raw_data_from_import def base64_encode(raw_data: str | bytes, wrap_raw_data: bool = True) -> str: diff --git a/src/extended_data/export_utils.py b/src/extended_data/io/exporters.py similarity index 93% rename from src/extended_data/export_utils.py rename to src/extended_data/io/exporters.py index f612ef5..5499ccc 100644 --- a/src/extended_data/export_utils.py +++ b/src/extended_data/io/exporters.py @@ -8,18 +8,18 @@ from collections.abc import Mapping from typing import Any -from extended_data.hcl2_utils import encode_hcl2 -from extended_data.json_utils import encode_json -from extended_data.serialization_utils import normalize_data_encoding -from extended_data.toml_utils import encode_toml -from extended_data.type_utils import convert_special_types, strtobool -from extended_data.yaml_utils import ( +from extended_data.primitives.formats.hcl import encode_hcl2 +from extended_data.primitives.formats.json import encode_json +from extended_data.primitives.formats.toml import encode_toml +from extended_data.primitives.formats.yaml import ( LiteralScalarString, YamlPairs, YamlTagged, encode_yaml, is_yaml_data, ) +from extended_data.primitives.serialization import normalize_data_encoding +from extended_data.primitives.types import convert_special_types, strtobool def wrap_raw_data_for_export( diff --git a/src/extended_data/file_data_type.py b/src/extended_data/io/files.py similarity index 98% rename from src/extended_data/file_data_type.py rename to src/extended_data/io/files.py index ce28915..d29d608 100644 --- a/src/extended_data/file_data_type.py +++ b/src/extended_data/io/files.py @@ -14,7 +14,7 @@ from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError, Repo -from extended_data.serialization_utils import normalize_data_encoding +from extended_data.primitives.serialization import normalize_data_encoding FilePath: TypeAlias = str | os.PathLike[str] @@ -315,7 +315,7 @@ def decode_file( Any: The decoded data structure, or the original string if format is unknown. """ # Lazy imports to avoid circular dependencies - from extended_data.import_utils import unwrap_raw_data_from_import + from extended_data.io.importers import unwrap_raw_data_from_import if suffix is None and file_path is not None: suffix = get_encoding_for_file_path(file_path) @@ -349,8 +349,8 @@ def write_file( Returns: Path | None: The path that was written to, or None if data was empty and not allowed. """ - from extended_data.export_utils import wrap_raw_data_for_export - from extended_data.state_utils import is_nothing + from extended_data.io.exporters import wrap_raw_data_for_export + from extended_data.primitives.state import is_nothing if is_nothing(data) and not allow_empty: return None diff --git a/src/extended_data/import_utils.py b/src/extended_data/io/importers.py similarity index 75% rename from src/extended_data/import_utils.py rename to src/extended_data/io/importers.py index 6f16ddb..23008d3 100644 --- a/src/extended_data/import_utils.py +++ b/src/extended_data/io/importers.py @@ -4,12 +4,12 @@ from typing import Any -from extended_data.hcl2_utils import decode_hcl2 -from extended_data.json_utils import decode_json -from extended_data.serialization_utils import normalize_data_encoding -from extended_data.string_data_type import bytestostr -from extended_data.toml_utils import decode_toml -from extended_data.yaml_utils import decode_yaml +from extended_data.primitives.formats.hcl import decode_hcl2 +from extended_data.primitives.formats.json import decode_json +from extended_data.primitives.formats.toml import decode_toml +from extended_data.primitives.formats.yaml import decode_yaml +from extended_data.primitives.serialization import normalize_data_encoding +from extended_data.primitives.strings import bytestostr def unwrap_raw_data_from_import( diff --git a/src/extended_data/primitives/__init__.py b/src/extended_data/primitives/__init__.py new file mode 100644 index 0000000..7487fd1 --- /dev/null +++ b/src/extended_data/primitives/__init__.py @@ -0,0 +1,175 @@ +"""Tier 1 pure data primitives.""" + +from extended_data.primitives.formats import ( + decode_hcl2, + decode_json, + decode_toml, + decode_yaml, + encode_hcl2, + encode_json, + encode_toml, + encode_yaml, + is_yaml_data, +) +from extended_data.primitives.introspection import ( + filter_methods, + get_available_methods, + get_caller, + get_inputs_from_docstring, + get_unique_signature, + update_docstring, +) +from extended_data.primitives.mappings import ( + SortedDefaultDict, + all_values_from_map, + create_merger, + deduplicate_map, + deep_merge, + filter_map, + first_non_empty_value_from_map, + flatten_map, + get_default_dict, + unhump_map, + zipmap, +) +from extended_data.primitives.matching import is_non_empty_match, is_partial_match +from extended_data.primitives.numbers import ( + from_roman, + number_to_currency, + number_to_ordinal, + number_to_words, + to_roman, +) +from extended_data.primitives.sequences import filter_list, flatten_list +from extended_data.primitives.serialization import normalize_data_encoding +from extended_data.primitives.splitting import split_dict_by_type, split_list_by_type +from extended_data.primitives.state import ( + all_non_empty, + all_non_empty_in_dict, + all_non_empty_in_list, + any_non_empty, + are_nothing, + first_non_empty, + is_nothing, + yield_non_empty, +) +from extended_data.primitives.string_transforms import ( + humanize, + ordinalize, + pluralize, + singularize, + titleize, + to_camel_case, + to_kebab_case, + to_pascal_case, + to_snake_case, +) +from extended_data.primitives.strings import ( + bytestostr, + lower_first_char, + removeprefix, + removesuffix, + sanitize_key, + titleize_name, + truncate, + upper_first_char, +) +from extended_data.primitives.types import ( + convert_special_type, + convert_special_types, + get_default_value_for_type, + get_primitive_type_for_instance_type, + make_hashable, + reconstruct_special_type, + reconstruct_special_types, + strtobool, + strtodate, + strtodatetime, + strtofloat, + strtoint, + strtopath, + strtotime, + typeof, +) + + +__all__ = [ + "SortedDefaultDict", + "all_non_empty", + "all_non_empty_in_dict", + "all_non_empty_in_list", + "all_values_from_map", + "any_non_empty", + "are_nothing", + "bytestostr", + "convert_special_type", + "convert_special_types", + "create_merger", + "decode_hcl2", + "decode_json", + "decode_toml", + "decode_yaml", + "deduplicate_map", + "deep_merge", + "encode_hcl2", + "encode_json", + "encode_toml", + "encode_yaml", + "filter_list", + "filter_map", + "filter_methods", + "first_non_empty", + "first_non_empty_value_from_map", + "flatten_list", + "flatten_map", + "from_roman", + "get_available_methods", + "get_caller", + "get_default_dict", + "get_default_value_for_type", + "get_inputs_from_docstring", + "get_primitive_type_for_instance_type", + "get_unique_signature", + "humanize", + "is_non_empty_match", + "is_nothing", + "is_partial_match", + "is_yaml_data", + "lower_first_char", + "make_hashable", + "normalize_data_encoding", + "number_to_currency", + "number_to_ordinal", + "number_to_words", + "ordinalize", + "pluralize", + "reconstruct_special_type", + "reconstruct_special_types", + "removeprefix", + "removesuffix", + "sanitize_key", + "singularize", + "split_dict_by_type", + "split_list_by_type", + "strtobool", + "strtodate", + "strtodatetime", + "strtofloat", + "strtoint", + "strtopath", + "strtotime", + "titleize", + "titleize_name", + "to_camel_case", + "to_kebab_case", + "to_pascal_case", + "to_roman", + "to_snake_case", + "truncate", + "typeof", + "unhump_map", + "update_docstring", + "upper_first_char", + "yield_non_empty", + "zipmap", +] diff --git a/src/extended_data/primitives/formats/__init__.py b/src/extended_data/primitives/formats/__init__.py new file mode 100644 index 0000000..2206c55 --- /dev/null +++ b/src/extended_data/primitives/formats/__init__.py @@ -0,0 +1,45 @@ +"""Tier 1 serialization codecs.""" + +from extended_data.primitives.formats.hcl import decode_hcl2, encode_hcl2 +from extended_data.primitives.formats.json import decode_json, encode_json +from extended_data.primitives.formats.toml import decode_toml, encode_toml +from extended_data.primitives.formats.yaml import ( + LiteralScalarString, + PureDumper, + PureLoader, + YamlPairs, + YamlTagged, + decode_yaml, + encode_yaml, + is_yaml_data, + yaml_construct_pairs, + yaml_construct_undefined, + yaml_literal_str_representer, + yaml_represent_pairs, + yaml_represent_tagged, + yaml_str_representer, +) + + +__all__ = [ + "LiteralScalarString", + "PureDumper", + "PureLoader", + "YamlPairs", + "YamlTagged", + "decode_hcl2", + "decode_json", + "decode_toml", + "decode_yaml", + "encode_hcl2", + "encode_json", + "encode_toml", + "encode_yaml", + "is_yaml_data", + "yaml_construct_pairs", + "yaml_construct_undefined", + "yaml_literal_str_representer", + "yaml_represent_pairs", + "yaml_represent_tagged", + "yaml_str_representer", +] diff --git a/src/extended_data/hcl2_utils.py b/src/extended_data/primitives/formats/hcl.py similarity index 98% rename from src/extended_data/hcl2_utils.py rename to src/extended_data/primitives/formats/hcl.py index 1eb1a86..0167ad1 100644 --- a/src/extended_data/hcl2_utils.py +++ b/src/extended_data/primitives/formats/hcl.py @@ -13,8 +13,8 @@ from lark.exceptions import ParseError -from extended_data.string_data_type import bytestostr -from extended_data.type_utils import convert_special_types +from extended_data.primitives.strings import bytestostr +from extended_data.primitives.types import convert_special_types _HCL_METADATA_KEYS = frozenset({"__is_block__"}) diff --git a/src/extended_data/json_utils.py b/src/extended_data/primitives/formats/json.py similarity index 100% rename from src/extended_data/json_utils.py rename to src/extended_data/primitives/formats/json.py diff --git a/src/extended_data/toml_utils.py b/src/extended_data/primitives/formats/toml.py similarity index 90% rename from src/extended_data/toml_utils.py rename to src/extended_data/primitives/formats/toml.py index 943444d..fbe51d3 100644 --- a/src/extended_data/toml_utils.py +++ b/src/extended_data/primitives/formats/toml.py @@ -11,8 +11,8 @@ from tomlkit.exceptions import TOMLKitError -from extended_data.string_data_type import bytestostr -from extended_data.type_utils import convert_special_types +from extended_data.primitives.strings import bytestostr +from extended_data.primitives.types import convert_special_types def decode_toml(toml_data: str | memoryview | bytes | bytearray) -> Any: diff --git a/src/extended_data/yaml_utils/__init__.py b/src/extended_data/primitives/formats/yaml/__init__.py similarity index 64% rename from src/extended_data/yaml_utils/__init__.py rename to src/extended_data/primitives/formats/yaml/__init__.py index 1f3c07c..424cd48 100644 --- a/src/extended_data/yaml_utils/__init__.py +++ b/src/extended_data/primitives/formats/yaml/__init__.py @@ -5,24 +5,24 @@ from __future__ import annotations -from extended_data.yaml_utils.constructors import ( +from extended_data.primitives.formats.yaml.constructors import ( yaml_construct_pairs, yaml_construct_undefined, ) -from extended_data.yaml_utils.dumpers import PureDumper -from extended_data.yaml_utils.loaders import PureLoader -from extended_data.yaml_utils.representers import ( +from extended_data.primitives.formats.yaml.dumpers import PureDumper +from extended_data.primitives.formats.yaml.loaders import PureLoader +from extended_data.primitives.formats.yaml.representers import ( yaml_literal_str_representer, yaml_represent_pairs, yaml_represent_tagged, yaml_str_representer, ) -from extended_data.yaml_utils.tag_classes import ( +from extended_data.primitives.formats.yaml.tag_classes import ( LiteralScalarString, YamlPairs, YamlTagged, ) -from extended_data.yaml_utils.utils import decode_yaml, encode_yaml, is_yaml_data +from extended_data.primitives.formats.yaml.utils import decode_yaml, encode_yaml, is_yaml_data __all__ = [ diff --git a/src/extended_data/yaml_utils/constructors.py b/src/extended_data/primitives/formats/yaml/constructors.py similarity index 95% rename from src/extended_data/yaml_utils/constructors.py rename to src/extended_data/primitives/formats/yaml/constructors.py index af057fb..4d1ba03 100644 --- a/src/extended_data/yaml_utils/constructors.py +++ b/src/extended_data/primitives/formats/yaml/constructors.py @@ -9,7 +9,7 @@ from yaml import MappingNode, SafeLoader, ScalarNode, SequenceNode -from extended_data.yaml_utils.tag_classes import YamlPairs, YamlTagged +from extended_data.primitives.formats.yaml.tag_classes import YamlPairs, YamlTagged def yaml_construct_undefined( diff --git a/src/extended_data/yaml_utils/dumpers.py b/src/extended_data/primitives/formats/yaml/dumpers.py similarity index 94% rename from src/extended_data/yaml_utils/dumpers.py rename to src/extended_data/primitives/formats/yaml/dumpers.py index acb4aaa..ac8acd8 100644 --- a/src/extended_data/yaml_utils/dumpers.py +++ b/src/extended_data/primitives/formats/yaml/dumpers.py @@ -13,13 +13,13 @@ from yaml import SafeDumper -from extended_data.yaml_utils.representers import ( +from extended_data.primitives.formats.yaml.representers import ( yaml_literal_str_representer, yaml_represent_pairs, yaml_represent_tagged, yaml_str_representer, ) -from extended_data.yaml_utils.tag_classes import ( +from extended_data.primitives.formats.yaml.tag_classes import ( LiteralScalarString, YamlPairs, YamlTagged, diff --git a/src/extended_data/yaml_utils/loaders.py b/src/extended_data/primitives/formats/yaml/loaders.py similarity index 93% rename from src/extended_data/yaml_utils/loaders.py rename to src/extended_data/primitives/formats/yaml/loaders.py index 756b9d7..eb49317 100644 --- a/src/extended_data/yaml_utils/loaders.py +++ b/src/extended_data/primitives/formats/yaml/loaders.py @@ -9,7 +9,7 @@ from yaml import SafeLoader -from extended_data.yaml_utils.constructors import ( +from extended_data.primitives.formats.yaml.constructors import ( yaml_construct_pairs, yaml_construct_undefined, ) diff --git a/src/extended_data/yaml_utils/representers.py b/src/extended_data/primitives/formats/yaml/representers.py similarity index 97% rename from src/extended_data/yaml_utils/representers.py rename to src/extended_data/primitives/formats/yaml/representers.py index 0691599..4c87c55 100644 --- a/src/extended_data/yaml_utils/representers.py +++ b/src/extended_data/primitives/formats/yaml/representers.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from yaml import MappingNode, Node, SafeDumper, ScalarNode -from extended_data.yaml_utils.tag_classes import ( +from extended_data.primitives.formats.yaml.tag_classes import ( LiteralScalarString, YamlPairs, YamlTagged, diff --git a/src/extended_data/yaml_utils/tag_classes.py b/src/extended_data/primitives/formats/yaml/tag_classes.py similarity index 100% rename from src/extended_data/yaml_utils/tag_classes.py rename to src/extended_data/primitives/formats/yaml/tag_classes.py diff --git a/src/extended_data/yaml_utils/utils.py b/src/extended_data/primitives/formats/yaml/utils.py similarity index 86% rename from src/extended_data/yaml_utils/utils.py rename to src/extended_data/primitives/formats/yaml/utils.py index 038a57f..f7d4dee 100644 --- a/src/extended_data/yaml_utils/utils.py +++ b/src/extended_data/primitives/formats/yaml/utils.py @@ -10,10 +10,10 @@ import yaml -from extended_data.string_data_type import bytestostr -from extended_data.yaml_utils.dumpers import PureDumper -from extended_data.yaml_utils.loaders import PureLoader -from extended_data.yaml_utils.tag_classes import YamlPairs, YamlTagged +from extended_data.primitives.formats.yaml.dumpers import PureDumper +from extended_data.primitives.formats.yaml.loaders import PureLoader +from extended_data.primitives.formats.yaml.tag_classes import YamlPairs, YamlTagged +from extended_data.primitives.strings import bytestostr def decode_yaml(yaml_data: str | memoryview | bytes | bytearray) -> Any: diff --git a/src/extended_data/stack_utils.py b/src/extended_data/primitives/introspection.py similarity index 100% rename from src/extended_data/stack_utils.py rename to src/extended_data/primitives/introspection.py diff --git a/src/extended_data/map_data_type.py b/src/extended_data/primitives/mappings.py similarity index 98% rename from src/extended_data/map_data_type.py rename to src/extended_data/primitives/mappings.py index d78dad9..b5c609d 100644 --- a/src/extended_data/map_data_type.py +++ b/src/extended_data/primitives/mappings.py @@ -7,6 +7,7 @@ from __future__ import annotations import builtins +import copy from collections import defaultdict from collections.abc import Callable, Mapping, MutableMapping @@ -17,7 +18,7 @@ from deepmerge.merger import Merger from sortedcontainers import SortedDict -from extended_data.type_utils import convert_special_types +from extended_data.primitives.types import convert_special_types # Default merger configuration: @@ -59,7 +60,7 @@ def deep_merge(*mappings: Mapping[str, Any]) -> dict[str, Any]: result: dict[str, Any] = {} for mapping in mappings: if mapping: - result = _DEFAULT_MERGER.merge(result, dict(mapping)) + result = _DEFAULT_MERGER.merge(result, copy.deepcopy(dict(mapping))) return result diff --git a/src/extended_data/matcher_utils.py b/src/extended_data/primitives/matching.py similarity index 95% rename from src/extended_data/matcher_utils.py rename to src/extended_data/primitives/matching.py index 1d51671..10571e0 100644 --- a/src/extended_data/matcher_utils.py +++ b/src/extended_data/primitives/matching.py @@ -9,8 +9,8 @@ from collections.abc import Mapping from typing import Any -from extended_data.state_utils import is_nothing -from extended_data.type_utils import make_hashable +from extended_data.primitives.state import is_nothing +from extended_data.primitives.types import make_hashable def is_partial_match( diff --git a/src/extended_data/number_transformations.py b/src/extended_data/primitives/numbers.py similarity index 100% rename from src/extended_data/number_transformations.py rename to src/extended_data/primitives/numbers.py diff --git a/src/extended_data/list_data_type.py b/src/extended_data/primitives/sequences.py similarity index 100% rename from src/extended_data/list_data_type.py rename to src/extended_data/primitives/sequences.py diff --git a/src/extended_data/serialization_utils.py b/src/extended_data/primitives/serialization.py similarity index 100% rename from src/extended_data/serialization_utils.py rename to src/extended_data/primitives/serialization.py diff --git a/src/extended_data/splitter_utils.py b/src/extended_data/primitives/splitting.py similarity index 97% rename from src/extended_data/splitter_utils.py rename to src/extended_data/primitives/splitting.py index 94ad35b..aecaab6 100644 --- a/src/extended_data/splitter_utils.py +++ b/src/extended_data/primitives/splitting.py @@ -15,7 +15,7 @@ from collections import defaultdict from typing import Any -from extended_data.type_utils import typeof +from extended_data.primitives.types import typeof def split_list_by_type( diff --git a/src/extended_data/state_utils.py b/src/extended_data/primitives/state.py similarity index 100% rename from src/extended_data/state_utils.py rename to src/extended_data/primitives/state.py diff --git a/src/extended_data/string_transformations.py b/src/extended_data/primitives/string_transforms.py similarity index 100% rename from src/extended_data/string_transformations.py rename to src/extended_data/primitives/string_transforms.py diff --git a/src/extended_data/string_data_type.py b/src/extended_data/primitives/strings.py similarity index 100% rename from src/extended_data/string_data_type.py rename to src/extended_data/primitives/strings.py diff --git a/src/extended_data/transformations/__init__.py b/src/extended_data/primitives/transformations/__init__.py similarity index 52% rename from src/extended_data/transformations/__init__.py rename to src/extended_data/primitives/transformations/__init__.py index 25822ec..9a3549c 100644 --- a/src/extended_data/transformations/__init__.py +++ b/src/extended_data/primitives/transformations/__init__.py @@ -2,8 +2,8 @@ from __future__ import annotations -from extended_data.transformations.numbers import notation, words -from extended_data.transformations.strings import inflection +from extended_data.primitives.transformations.numbers import notation, words +from extended_data.primitives.transformations.strings import inflection __all__ = [ diff --git a/src/extended_data/transformations/numbers/__init__.py b/src/extended_data/primitives/transformations/numbers/__init__.py similarity index 82% rename from src/extended_data/transformations/numbers/__init__.py rename to src/extended_data/primitives/transformations/numbers/__init__.py index bb1beb7..c0bfe50 100644 --- a/src/extended_data/transformations/numbers/__init__.py +++ b/src/extended_data/primitives/transformations/numbers/__init__.py @@ -2,7 +2,7 @@ from __future__ import annotations -from extended_data.transformations.numbers.notation import ( +from extended_data.primitives.transformations.numbers.notation import ( from_fraction, from_ordinal, from_roman, @@ -12,7 +12,7 @@ to_roman, to_words, ) -from extended_data.transformations.numbers.words import ( +from extended_data.primitives.transformations.numbers.words import ( fraction_to_words, number_to_words, ordinal_to_words, diff --git a/src/extended_data/transformations/numbers/notation.py b/src/extended_data/primitives/transformations/numbers/notation.py similarity index 98% rename from src/extended_data/transformations/numbers/notation.py rename to src/extended_data/primitives/transformations/numbers/notation.py index 3af2f53..8cfd068 100644 --- a/src/extended_data/transformations/numbers/notation.py +++ b/src/extended_data/primitives/transformations/numbers/notation.py @@ -7,7 +7,7 @@ from num2words import num2words -from extended_data.transformations.numbers import words as words_module +from extended_data.primitives.transformations.numbers import words as words_module _ROMAN_VALUES: Final[dict[str, int]] = { diff --git a/src/extended_data/transformations/numbers/words.py b/src/extended_data/primitives/transformations/numbers/words.py similarity index 100% rename from src/extended_data/transformations/numbers/words.py rename to src/extended_data/primitives/transformations/numbers/words.py diff --git a/src/extended_data/transformations/strings/__init__.py b/src/extended_data/primitives/transformations/strings/__init__.py similarity index 83% rename from src/extended_data/transformations/strings/__init__.py rename to src/extended_data/primitives/transformations/strings/__init__.py index ff810f9..1b2be27 100644 --- a/src/extended_data/transformations/strings/__init__.py +++ b/src/extended_data/primitives/transformations/strings/__init__.py @@ -2,7 +2,7 @@ from __future__ import annotations -from extended_data.transformations.strings.inflection import ( +from extended_data.primitives.transformations.strings.inflection import ( camelize, humanize, ordinalize, diff --git a/src/extended_data/transformations/strings/inflection.py b/src/extended_data/primitives/transformations/strings/inflection.py similarity index 100% rename from src/extended_data/transformations/strings/inflection.py rename to src/extended_data/primitives/transformations/strings/inflection.py diff --git a/src/extended_data/type_utils.py b/src/extended_data/primitives/types.py similarity index 98% rename from src/extended_data/type_utils.py rename to src/extended_data/primitives/types.py index e73088e..ab0e246 100644 --- a/src/extended_data/type_utils.py +++ b/src/extended_data/primitives/types.py @@ -44,9 +44,9 @@ from orjson import JSONDecodeError from yaml.error import YAMLError -from extended_data.json_utils import decode_json -from extended_data.string_data_type import removesuffix -from extended_data.yaml_utils import YamlPairs, YamlTagged, decode_yaml +from extended_data.primitives.formats.json import decode_json +from extended_data.primitives.formats.yaml import YamlPairs, YamlTagged, decode_yaml +from extended_data.primitives.strings import removesuffix # Patterns for matching date, datetime, and time strings diff --git a/tests/core/test_base64_utils.py b/tests/core/test_base64_utils.py index 886411d..9e73185 100644 --- a/tests/core/test_base64_utils.py +++ b/tests/core/test_base64_utils.py @@ -22,8 +22,8 @@ import pytest -from extended_data.base64_utils import base64_decode, base64_encode -from extended_data.export_utils import wrap_raw_data_for_export +from extended_data.io.base64 import base64_decode, base64_encode +from extended_data.io.exporters import wrap_raw_data_for_export def test_base64_encode_string() -> None: diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py new file mode 100644 index 0000000..34903a8 --- /dev/null +++ b/tests/core/test_containers.py @@ -0,0 +1,62 @@ +"""Tests for Tier 2 extended containers.""" + +from __future__ import annotations + +import extended_data + +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString + + +def test_extended_string_chains_primitive_transforms() -> None: + """ExtendedString composes Tier 1 string primitives.""" + value = ExtendedString("API Response Value") + + assert value.to_snake_case().remove_suffix("_value") == "api_response" + assert value.to_kebab_case() == "api-response-value" + assert ExtendedString("1").ordinalize() == "1st" + assert ExtendedString("yes").to_bool() is True + + +def test_extended_dict_composes_mapping_primitives() -> None: + """ExtendedDict composes Tier 1 mapping primitives.""" + value = ExtendedDict({"outer": {"inner": 1}, "items": [1, 1, 2], "empty": ""}) + + merged = value.deep_merge({"outer": {"other": 2}}) + accepted, rejected = merged.filter(allowlist=["outer"]) + + assert merged["outer"] == {"inner": 1, "other": 2} + assert value["outer"] == {"inner": 1} + assert value.flatten() == {"outer.inner": 1, "items.0": 1, "items.1": 1, "items.2": 2, "empty": ""} + assert value.deduplicate()["items"] == [1, 2] + assert value.compact() == {"outer": {"inner": 1}, "items": [1, 1, 2]} + assert accepted == {"outer": {"inner": 1, "other": 2}} + assert "items" in rejected + + +def test_extended_list_composes_sequence_primitives() -> None: + """ExtendedList composes Tier 1 sequence primitives.""" + value = ExtendedList([1, [2, [3]], "", 2]) + + assert value.flatten() == [1, 2, 3, "", 2] + assert value.compact() == [1, [2, [3]], 2] + assert value.unique() == [1, [2, [3]], "", 2] + assert value.filter(lambda item: isinstance(item, int)) == [1, 2] + assert ExtendedList([1, 2]).map(lambda item: item * 2) == [2, 4] + + +def test_extended_set_composes_set_operations() -> None: + """ExtendedSet provides chainable set operations.""" + value = ExtendedSet({1, 2, 3, None}) + + assert value.compact().to_set() == {1, 2, 3} + assert value.union({4}).to_set() == {1, 2, 3, 4, None} + assert value.intersection({2, 3, 5}).to_set() == {2, 3} + assert value.difference({1, None}).to_set() == {2, 3} + + +def test_container_classes_are_root_exports() -> None: + """Tier 2 containers are root-level convenience exports.""" + assert extended_data.ExtendedString is ExtendedString + assert extended_data.ExtendedDict is ExtendedDict + assert extended_data.ExtendedList is ExtendedList + assert extended_data.ExtendedSet is ExtendedSet diff --git a/tests/core/test_export_utils.py b/tests/core/test_export_utils.py index 7a5b101..fb445c2 100644 --- a/tests/core/test_export_utils.py +++ b/tests/core/test_export_utils.py @@ -21,11 +21,11 @@ import pytest -from extended_data.export_utils import ( +from extended_data.io.exporters import ( make_raw_data_export_safe, wrap_raw_data_for_export, ) -from extended_data.yaml_utils import ( +from extended_data.primitives.formats.yaml import ( LiteralScalarString, YamlPairs, YamlTagged, diff --git a/tests/core/test_file_data_type.py b/tests/core/test_file_data_type.py index 97d57a3..4e0478d 100644 --- a/tests/core/test_file_data_type.py +++ b/tests/core/test_file_data_type.py @@ -24,7 +24,7 @@ from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError, Repo -from extended_data.file_data_type import ( +from extended_data.io.files import ( FilePath, clone_repository_to_temp, decode_file, @@ -80,7 +80,7 @@ def test_get_parent_repository(mocker) -> None: The result of get_parent_repository is either a valid Repo object or None if invalid. """ # Mock the Repo constructor to return a mock Repo instance - mock_repo_constructor = mocker.patch("extended_data.file_data_type.Repo") + mock_repo_constructor = mocker.patch("extended_data.io.files.Repo") mock_repo_instance = mocker.Mock(spec=Repo) mock_repo_constructor.return_value = mock_repo_instance @@ -124,7 +124,7 @@ def test_clone_repository_to_temp(mocker, valid_repo_data: dict) -> None: valid_repo_data: Dictionary containing valid repository data. """ # Mock the Repo.clone_from method to return a mock Repo instance - mock_clone_from = mocker.patch("extended_data.file_data_type.Repo.clone_from") + mock_clone_from = mocker.patch("extended_data.io.files.Repo.clone_from") mock_repo_instance = mocker.Mock(spec=Repo) mock_clone_from.return_value = mock_repo_instance @@ -156,7 +156,7 @@ def test_clone_repository_to_temp_additional_errors( mocker, valid_repo_data: dict, side_effect: Exception, message: str ) -> None: """Map additional git clone failures to consistent OSError messages.""" - mocker.patch("extended_data.file_data_type.Repo.clone_from", side_effect=side_effect) + mocker.patch("extended_data.io.files.Repo.clone_from", side_effect=side_effect) with pytest.raises(OSError, match=message): clone_repository_to_temp(**valid_repo_data) @@ -171,7 +171,7 @@ def test_get_tld(mocker) -> None: The result of get_tld matches the expected top-level directory or None if not a repository. """ # Mock get_parent_repository to return a mock Repo instance - mock_get_parent_repo = mocker.patch("extended_data.file_data_type.get_parent_repository") + mock_get_parent_repo = mocker.patch("extended_data.io.files.get_parent_repository") mock_repo_instance = mocker.Mock(spec=Repo) mock_repo_instance.working_tree_dir = "/valid/repo" mock_get_parent_repo.return_value = mock_repo_instance @@ -341,7 +341,7 @@ def test_resolve_local_path_relative_no_tld(mocker) -> None: Asserts: RuntimeError is raised when no tld is available. """ - mocker.patch("extended_data.file_data_type.get_tld", return_value=None) + mocker.patch("extended_data.io.files.get_tld", return_value=None) with pytest.raises(RuntimeError, match="Cannot resolve relative path"): resolve_local_path("relative/file.txt") @@ -392,7 +392,7 @@ def read(self) -> bytes: return b"hello from url" mock_urlopen = mocker.patch( - "extended_data.file_data_type.urllib.request.urlopen", + "extended_data.io.files.urllib.request.urlopen", return_value=MockResponse(), ) @@ -418,7 +418,7 @@ def read(self) -> bytes: return b"\x00\x01\x02" mocker.patch( - "extended_data.file_data_type.urllib.request.urlopen", + "extended_data.io.files.urllib.request.urlopen", return_value=MockResponse(), ) diff --git a/tests/core/test_hcl2_utils.py b/tests/core/test_hcl2_utils.py index 2357a01..a3a3e63 100644 --- a/tests/core/test_hcl2_utils.py +++ b/tests/core/test_hcl2_utils.py @@ -6,8 +6,8 @@ from lark.exceptions import ParseError, UnexpectedToken -from extended_data import hcl2_utils -from extended_data.hcl2_utils import decode_hcl2, encode_hcl2 +from extended_data.primitives.formats import hcl as hcl2_utils +from extended_data.primitives.formats.hcl import decode_hcl2, encode_hcl2 @pytest.fixture diff --git a/tests/core/test_import_utils.py b/tests/core/test_import_utils.py index f26237f..7cc0e55 100644 --- a/tests/core/test_import_utils.py +++ b/tests/core/test_import_utils.py @@ -4,7 +4,7 @@ import pytest -from extended_data.import_utils import unwrap_raw_data_from_import +from extended_data.io.importers import unwrap_raw_data_from_import @pytest.mark.parametrize( diff --git a/tests/core/test_json_utils.py b/tests/core/test_json_utils.py index 0184c24..ee785a2 100644 --- a/tests/core/test_json_utils.py +++ b/tests/core/test_json_utils.py @@ -15,7 +15,7 @@ import pytest -from extended_data.json_utils import decode_json, encode_json +from extended_data.primitives.formats.json import decode_json, encode_json @pytest.fixture diff --git a/tests/core/test_list_data_type.py b/tests/core/test_list_data_type.py index f0e1733..f94fe23 100644 --- a/tests/core/test_list_data_type.py +++ b/tests/core/test_list_data_type.py @@ -22,7 +22,7 @@ import pytest -from extended_data.list_data_type import filter_list, flatten_list +from extended_data.primitives.sequences import filter_list, flatten_list @pytest.fixture diff --git a/tests/core/test_map_data_type.py b/tests/core/test_map_data_type.py index 2510984..d5068b6 100644 --- a/tests/core/test_map_data_type.py +++ b/tests/core/test_map_data_type.py @@ -37,7 +37,7 @@ import pytest -from extended_data.map_data_type import ( +from extended_data.primitives.mappings import ( SortedDefaultDict, all_values_from_map, create_merger, @@ -226,10 +226,12 @@ def test_first_non_empty_value_from_map_returns_none_for_falsy_values() -> None: def test_deep_merge_merges_nested_dicts_lists_and_sets() -> None: """Merge nested structures using the default strategies.""" + first = {"config": {"enabled": True}, "items": [1], "tags": {"a"}} + second = {"config": {"threshold": 2}, "items": [2], "tags": {"b"}} result = deep_merge( {}, - {"config": {"enabled": True}, "items": [1], "tags": {"a"}}, - {"config": {"threshold": 2}, "items": [2], "tags": {"b"}}, + first, + second, ) assert result == { @@ -237,6 +239,8 @@ def test_deep_merge_merges_nested_dicts_lists_and_sets() -> None: "items": [1, 2], "tags": {"a", "b"}, } + assert first == {"config": {"enabled": True}, "items": [1], "tags": {"a"}} + assert second == {"config": {"threshold": 2}, "items": [2], "tags": {"b"}} def test_create_merger_can_override_list_values() -> None: diff --git a/tests/core/test_matcher_utils.py b/tests/core/test_matcher_utils.py index 7fbdcce..f7e7942 100644 --- a/tests/core/test_matcher_utils.py +++ b/tests/core/test_matcher_utils.py @@ -12,7 +12,7 @@ import pytest -from extended_data.matcher_utils import is_non_empty_match, is_partial_match +from extended_data.primitives.matching import is_non_empty_match, is_partial_match @pytest.mark.parametrize( diff --git a/tests/core/test_number_transformations.py b/tests/core/test_number_transformations.py index b00a296..a8000ba 100644 --- a/tests/core/test_number_transformations.py +++ b/tests/core/test_number_transformations.py @@ -4,7 +4,7 @@ import pytest -from extended_data.number_transformations import ( +from extended_data.primitives.numbers import ( from_roman, number_to_currency, number_to_ordinal, diff --git a/tests/core/test_serialization_utils.py b/tests/core/test_serialization_utils.py index bd79ace..d457d9b 100644 --- a/tests/core/test_serialization_utils.py +++ b/tests/core/test_serialization_utils.py @@ -2,7 +2,7 @@ from __future__ import annotations -from extended_data.serialization_utils import normalize_data_encoding +from extended_data.primitives.serialization import normalize_data_encoding def test_normalize_data_encoding_aliases_and_passthrough() -> None: diff --git a/tests/core/test_splitter_utils.py b/tests/core/test_splitter_utils.py index 0f1dcf7..5cec76e 100644 --- a/tests/core/test_splitter_utils.py +++ b/tests/core/test_splitter_utils.py @@ -15,7 +15,7 @@ import pytest -from extended_data.splitter_utils import split_dict_by_type, split_list_by_type +from extended_data.primitives.splitting import split_dict_by_type, split_list_by_type @pytest.mark.parametrize( diff --git a/tests/core/test_stack_utils.py b/tests/core/test_stack_utils.py index b21fadf..e4806da 100644 --- a/tests/core/test_stack_utils.py +++ b/tests/core/test_stack_utils.py @@ -19,7 +19,7 @@ import pytest -from extended_data.stack_utils import ( +from extended_data.primitives.introspection import ( current_python_version_is_at_least, filter_methods, get_available_methods, diff --git a/tests/core/test_state_utils.py b/tests/core/test_state_utils.py index d883d99..d6ee079 100644 --- a/tests/core/test_state_utils.py +++ b/tests/core/test_state_utils.py @@ -23,7 +23,7 @@ import pytest -from extended_data.state_utils import ( +from extended_data.primitives.state import ( all_non_empty, any_non_empty, are_nothing, @@ -96,7 +96,7 @@ def test_are_nothing_with_no_inputs_returns_true() -> None: def test_are_nothing_fallback_branch_returns_false(mocker) -> None: """Defensively return False for unexpected all_non_empty output types.""" - mocker.patch("extended_data.state_utils.all_non_empty", return_value="unexpected") + mocker.patch("extended_data.primitives.state.all_non_empty", return_value="unexpected") assert are_nothing("value") is False diff --git a/tests/core/test_string_data_type.py b/tests/core/test_string_data_type.py index 3a355e6..1cebad8 100644 --- a/tests/core/test_string_data_type.py +++ b/tests/core/test_string_data_type.py @@ -47,7 +47,7 @@ import pytest -from extended_data.string_data_type import ( +from extended_data.primitives.strings import ( bytestostr, is_url, lower_first_char, diff --git a/tests/core/test_string_transformations.py b/tests/core/test_string_transformations.py index 5fc8b93..1ce3ffa 100644 --- a/tests/core/test_string_transformations.py +++ b/tests/core/test_string_transformations.py @@ -4,7 +4,7 @@ import pytest -from extended_data.string_transformations import ( +from extended_data.primitives.string_transforms import ( humanize, ordinalize, pluralize, diff --git a/tests/core/test_toml_utils.py b/tests/core/test_toml_utils.py index 1105f05..37a1b6f 100644 --- a/tests/core/test_toml_utils.py +++ b/tests/core/test_toml_utils.py @@ -17,7 +17,7 @@ import pytest import tomlkit -from extended_data.toml_utils import decode_toml, encode_toml +from extended_data.primitives.formats.toml import decode_toml, encode_toml def test_decode_toml_invalid_format() -> None: diff --git a/tests/core/test_type_utils.py b/tests/core/test_type_utils.py index 1a6ebe1..e907175 100644 --- a/tests/core/test_type_utils.py +++ b/tests/core/test_type_utils.py @@ -1,4 +1,4 @@ -"""Test suite for extended_data.type_utils module. +"""Test suite for extended_data.primitives.types module. This module contains unit tests for various utility functions provided by the type_utils module, ensuring correct functionality of type conversions, @@ -14,7 +14,8 @@ import pytest -from extended_data.type_utils import ( +from extended_data.primitives.formats.yaml import YamlPairs, YamlTagged +from extended_data.primitives.types import ( ConversionError, convert_special_type, convert_special_types, @@ -32,7 +33,6 @@ strtotime, typeof, ) -from extended_data.yaml_utils import YamlPairs, YamlTagged # Constants for expected test values @@ -246,7 +246,7 @@ def test_strtoint(strtoint_data: tuple[str, int | None]) -> None: def test_strtoint_wraps_nested_conversion_errors(mocker) -> None: """Map nested float conversion failures to integer conversion failures.""" mocker.patch( - "extended_data.type_utils.strtofloat", + "extended_data.primitives.types.strtofloat", side_effect=ConversionError(float, "3.14"), ) @@ -257,7 +257,7 @@ def test_strtoint_wraps_nested_conversion_errors(mocker) -> None: def test_strtoint_swallows_nested_conversion_errors_when_not_requested(mocker) -> None: """Return None when nested conversion fails and raise_on_error is disabled.""" mocker.patch( - "extended_data.type_utils.strtofloat", + "extended_data.primitives.types.strtofloat", side_effect=ConversionError(float, "3.14"), ) @@ -266,7 +266,7 @@ def test_strtoint_swallows_nested_conversion_errors_when_not_requested(mocker) - def test_strtoint_raises_when_nested_conversion_returns_none(mocker) -> None: """Raise an integer conversion error when nested conversion returns no value.""" - mocker.patch("extended_data.type_utils.strtofloat", return_value=None) + mocker.patch("extended_data.primitives.types.strtofloat", return_value=None) with pytest.raises(ConversionError, match=r"Invalid value: '3.14'"): strtoint("3.14", raise_on_error=True) diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 9fab626..d9d5e7b 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -17,7 +17,7 @@ to_snake_case, write_file, ) -from extended_data.yaml_utils import YamlTagged +from extended_data.primitives.formats.yaml import YamlTagged def test_layered_config_workflow_round_trip(tmp_path: Path) -> None: diff --git a/tests/core/test_yaml_utils.py b/tests/core/test_yaml_utils.py index 4a45679..9663988 100644 --- a/tests/core/test_yaml_utils.py +++ b/tests/core/test_yaml_utils.py @@ -23,7 +23,7 @@ from yaml import MappingNode, ScalarNode, SequenceNode -from extended_data.yaml_utils import ( +from extended_data.primitives.formats.yaml import ( LiteralScalarString, YamlPairs, YamlTagged, diff --git a/tests/core/transformations/numbers/test_notation.py b/tests/core/transformations/numbers/test_notation.py index da6b646..e7eb2ea 100644 --- a/tests/core/transformations/numbers/test_notation.py +++ b/tests/core/transformations/numbers/test_notation.py @@ -4,7 +4,7 @@ import pytest -from extended_data.transformations.numbers.notation import ( +from extended_data.primitives.transformations.numbers.notation import ( from_fraction, from_ordinal, from_roman, diff --git a/tests/core/transformations/numbers/test_words.py b/tests/core/transformations/numbers/test_words.py index 04947c7..6f7c7d0 100644 --- a/tests/core/transformations/numbers/test_words.py +++ b/tests/core/transformations/numbers/test_words.py @@ -4,8 +4,8 @@ import pytest -from extended_data.transformations.numbers import words as words_module -from extended_data.transformations.numbers.words import ( +from extended_data.primitives.transformations.numbers import words as words_module +from extended_data.primitives.transformations.numbers.words import ( fraction_to_words, number_to_words, ordinal_to_words, diff --git a/tests/core/transformations/strings/test_inflection.py b/tests/core/transformations/strings/test_inflection.py index 1fe541e..cf331d7 100644 --- a/tests/core/transformations/strings/test_inflection.py +++ b/tests/core/transformations/strings/test_inflection.py @@ -4,7 +4,7 @@ import pytest -from extended_data.transformations.strings.inflection import ( +from extended_data.primitives.transformations.strings.inflection import ( camelize, humanize, ordinalize, From 50b6d4b6a024c155da1689075ce1911291ef628e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:30:48 -0500 Subject: [PATCH 003/287] feat: bridge decoders to extended containers --- README.md | 8 ++- docs/package-surface.md | 24 +++++++-- examples/core/composed_workflows.py | 26 ++++----- examples/core/file_operations.py | 5 +- examples/inputs/encoding_decoding.py | 4 +- src/extended_data/__init__.py | 4 +- src/extended_data/containers/__init__.py | 3 ++ src/extended_data/containers/factory.py | 53 +++++++++++++++++++ src/extended_data/containers/mappings.py | 34 ++++++++---- src/extended_data/inputs/__main__.py | 29 +++++----- src/extended_data/inputs/decorators.py | 2 + src/extended_data/io/base64.py | 5 +- src/extended_data/io/exporters.py | 12 +++-- src/extended_data/io/files.py | 5 +- src/extended_data/io/importers.py | 32 ++++++----- tests/connectors/test_aws_codedeploy.py | 3 ++ tests/connectors/test_aws_connector.py | 4 ++ tests/connectors/test_aws_organizations.py | 4 ++ tests/connectors/test_aws_s3.py | 4 ++ tests/connectors/test_aws_sso.py | 4 ++ tests/connectors/test_aws_tools.py | 4 ++ tests/connectors/test_github_connector.py | 5 ++ tests/connectors/test_github_tools.py | 3 ++ .../test_github_workflow_builder.py | 5 ++ tests/connectors/test_google_billing.py | 6 +++ tests/connectors/test_google_cloud.py | 4 ++ tests/connectors/test_google_connector.py | 6 +++ tests/connectors/test_google_services.py | 4 ++ tests/connectors/test_google_tools.py | 4 ++ tests/connectors/test_google_workspace.py | 4 ++ tests/connectors/test_slack_connector.py | 5 ++ tests/connectors/test_vault_connector.py | 3 ++ tests/connectors/test_vault_tools.py | 3 ++ tests/core/test_base64_utils.py | 12 +++++ tests/core/test_containers.py | 40 +++++++++++++- tests/core/test_file_data_type.py | 15 ++++++ tests/core/test_import_utils.py | 23 ++++++++ tests/core/test_workflows.py | 46 ++++++++++++---- tests/inputs/test_decorators.py | 13 +++++ tests/inputs/test_main.py | 23 ++++++++ 40 files changed, 421 insertions(+), 72 deletions(-) create mode 100644 src/extended_data/containers/factory.py diff --git a/README.md b/README.md index fdfa69a..c3d5d0e 100644 --- a/README.md +++ b/README.md @@ -26,15 +26,17 @@ pip install "extended-data[secrets]" ## Usage ```python -from extended_data import ConnectorFabric, ExtendedDict, InputProvider, Logging, decode_json, encode_yaml +from extended_data import ConnectorFabric, ExtendedDict, InputProvider, Logging, decode_file, decode_json, encode_yaml logger = Logging(logger_name="example") inputs = InputProvider(inputs={"GITHUB_OWNER": "jbcom"}, from_environment=False) connectors = ConnectorFabric(inputs=inputs.inputs, logger=logger) data = decode_json('{"status": "ok"}') payload = ExtendedDict(data).deep_merge({"source": "example"}) +decoded_file = decode_file('{"service": {"name": "api"}}', suffix="json", as_extended=True) print(encode_yaml(payload.data)) +print(decoded_file["service"]["name"].upper_first()) ``` The fabric can also instantiate any registered connector by name: @@ -92,6 +94,10 @@ The package is intentionally tiered: - Tier 3 processors use the first two tiers to handle files, inputs, API data, vendor integrations, and workflows. +Tier 3 decoders can opt into Tier 2 containers with `as_extended=True`, so +decoded files, Base64 payloads, and directed inputs can immediately use +`ExtendedDict`, `ExtendedList`, `ExtendedSet`, and `ExtendedString` methods. + More detail lives in [`docs/package-surface.md`](docs/package-surface.md). ## Development diff --git a/docs/package-surface.md b/docs/package-surface.md index 6ac85bd..e54f916 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -13,8 +13,10 @@ from extended_data import ( InputProvider, Logging, decode_json, + extend_data, encode_yaml, flatten_map, + to_builtin, ) ``` @@ -36,11 +38,25 @@ items = ExtendedList([1, [2, [3]]]).flatten() tags = ExtendedSet({"prod", "prod", ""}).compact() ``` +Tier 3 decode surfaces can promote plain decoded values into Tier 2 containers: + +```python +from extended_data import decode_file + +payload = decode_file('{"service": {"name": "api"}}', suffix="json", as_extended=True) +assert payload["service"]["name"].upper_first() == "Api" +``` + +Use `extend_data(value)` to promote existing plain data and `to_builtin(value)` +to lower extended containers back to standard Python data. + `InputProvider` loads input data from explicit mappings, environment variables, -and stdin, then decodes or coerces values through the primitive layer. `Logging` -provides structured lifecycle logging for applications and connector workflows. -`ConnectorFabric` caches and coordinates vendor connectors while sharing input -loading, logging, data normalization, retry behavior, and serialization. +and stdin, then decodes or coerces values through the primitive layer. Its +`decode_input(..., as_extended=True)` path gives input-driven workflows the same +container bridge as file and Base64 decoding. `Logging` provides structured +lifecycle logging for applications and connector workflows. `ConnectorFabric` +caches and coordinates vendor connectors while sharing input loading, logging, +data normalization, retry behavior, and serialization. ## Connector Fabric diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index 1534aca..1dfdc9c 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -11,16 +11,14 @@ from tempfile import TemporaryDirectory from extended_data import ( + ExtendedDict, base64_decode, base64_encode, decode_file, decode_hcl2, - deduplicate_map, - deep_merge, encode_hcl2, filter_list, read_file, - to_snake_case, write_file, ) from extended_data.primitives.formats.yaml import YamlTagged @@ -49,9 +47,9 @@ def demonstrate_layered_config_workflow() -> None: base_text = read_file("config/base.yaml", tld=tld) env_text = read_file("config/dev.yaml", tld=tld) - base_data = decode_file(base_text, file_path="config/base.yaml") - env_data = decode_file(env_text, file_path="config/dev.yaml") - merged = deep_merge(base_data, env_data) + base_data = decode_file(base_text, file_path="config/base.yaml", as_extended=True) + env_data = decode_file(env_text, file_path="config/dev.yaml", as_extended=True) + merged = base_data.deep_merge(env_data) write_file("build/config.yaml", merged, tld=tld) merged_text = read_file("build/config.yaml", tld=tld) @@ -91,13 +89,15 @@ def demonstrate_api_payload_workflow() -> None: """Normalize and serialize an API-style payload.""" print("\n=== API Payload Workflow ===\n") - payload = { - "HTTPResponseCode": 200, - "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), - "Tags": ["api", "api", "docs"], - } + payload = ExtendedDict( + { + "HTTPResponseCode": 200, + "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), + "Tags": ["api", "api", "docs"], + } + ) - normalized = {to_snake_case(key): value for key, value in deduplicate_map(payload).items()} + normalized = payload.deduplicate().unhump() with TemporaryDirectory() as tmpdir: tld = Path(tmpdir) @@ -120,7 +120,7 @@ def demonstrate_yaml_native_workflow() -> None: tld = Path(tmpdir) write_file("template.yaml", template, tld=tld) rendered = read_file("template.yaml", tld=tld) - decoded = decode_file(rendered, file_path="template.yaml") + decoded = decode_file(rendered, file_path="template.yaml", as_extended=True) print(rendered) print(f"\nDecoded tag: {decoded['bucket_name'].tag}") diff --git a/examples/core/file_operations.py b/examples/core/file_operations.py index 7783f0e..72b205b 100755 --- a/examples/core/file_operations.py +++ b/examples/core/file_operations.py @@ -90,8 +90,9 @@ def demonstrate_file_operations() -> None: write_file(yaml_file, yaml_content) yaml_text = read_file(yaml_file) - data = decode_file(yaml_text, file_path=yaml_file) + data = decode_file(yaml_text, file_path=yaml_file, as_extended=True) print(f"\nDecoded YAML file: {data}") + print(f"YAML service keys: {data.flatten().keys()}") # Write and read JSON json_file = Path(tmpdir) / "data.json" @@ -99,7 +100,7 @@ def demonstrate_file_operations() -> None: write_file(json_file, json_content) json_text = read_file(json_file) - data = decode_file(json_text, file_path=json_file) + data = decode_file(json_text, file_path=json_file, as_extended=True) print(f"Decoded JSON file: {data}") diff --git a/examples/inputs/encoding_decoding.py b/examples/inputs/encoding_decoding.py index 28c3a0f..3ea5879 100644 --- a/examples/inputs/encoding_decoding.py +++ b/examples/inputs/encoding_decoding.py @@ -38,10 +38,10 @@ def main() -> None: ) # JSON decoding - inputs.decode_input("json_config", decode_from_json=True) + inputs.decode_input("json_config", decode_from_json=True, as_extended=True) # YAML decoding - inputs.decode_input("yaml_config", decode_from_yaml=True) + inputs.decode_input("yaml_config", decode_from_yaml=True, as_extended=True) # Base64 + JSON decoding inputs.decode_input( diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index c830b12..b596b00 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -11,7 +11,7 @@ from typing import TYPE_CHECKING from extended_data._version import __version__ -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, extend_data, to_builtin from extended_data.io.base64 import base64_decode, base64_encode from extended_data.io.exporters import ( make_raw_data_export_safe, @@ -198,6 +198,7 @@ def __getattr__(name: str): "encode_json", "encode_toml", "encode_yaml", + "extend_data", "file_path_depth", "file_path_rel_to_root", "filter_list", @@ -255,6 +256,7 @@ def __getattr__(name: str): "strtotime", "titleize", "titleize_name", + "to_builtin", "to_camel_case", "to_kebab_case", "to_pascal_case", diff --git a/src/extended_data/containers/__init__.py b/src/extended_data/containers/__init__.py index e48324a..0f830c2 100644 --- a/src/extended_data/containers/__init__.py +++ b/src/extended_data/containers/__init__.py @@ -1,5 +1,6 @@ """Tier 2 extended container classes.""" +from extended_data.containers.factory import extend_data, to_builtin from extended_data.containers.mappings import ExtendedDict from extended_data.containers.sequences import ExtendedList, ExtendedSet from extended_data.containers.strings import ExtendedString @@ -10,4 +11,6 @@ "ExtendedList", "ExtendedSet", "ExtendedString", + "extend_data", + "to_builtin", ] diff --git a/src/extended_data/containers/factory.py b/src/extended_data/containers/factory.py new file mode 100644 index 0000000..897bb93 --- /dev/null +++ b/src/extended_data/containers/factory.py @@ -0,0 +1,53 @@ +"""Factories for moving between plain data and extended containers.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +from extended_data.containers.mappings import ExtendedDict +from extended_data.containers.sequences import ExtendedList, ExtendedSet +from extended_data.containers.strings import ExtendedString +from extended_data.primitives.formats.yaml import LiteralScalarString, YamlPairs, YamlTagged + + +def extend_data(value: Any) -> Any: + """Recursively wrap built-in containers in Extended Data containers.""" + if isinstance(value, YamlTagged | YamlPairs | LiteralScalarString): + return value + if isinstance(value, ExtendedString | ExtendedDict | ExtendedList | ExtendedSet): + return value + if isinstance(value, str): + return ExtendedString(value) + if isinstance(value, Mapping): + return ExtendedDict({key: extend_data(item) for key, item in value.items()}) + if isinstance(value, list | tuple): + return ExtendedList(extend_data(item) for item in value) + if isinstance(value, set | frozenset): + return ExtendedSet(extend_data(item) for item in value) + return value + + +def to_builtin(value: Any) -> Any: + """Recursively unwrap Extended Data containers to built-in Python values.""" + if isinstance(value, YamlTagged | YamlPairs | LiteralScalarString): + return value + if isinstance(value, ExtendedString): + return str(value) + if isinstance(value, ExtendedDict): + return {key: to_builtin(item) for key, item in value.items()} + if isinstance(value, ExtendedList): + return [to_builtin(item) for item in value] + if isinstance(value, ExtendedSet): + return {to_builtin(item) for item in value} + if isinstance(value, Mapping): + return {key: to_builtin(item) for key, item in value.items()} + if isinstance(value, list): + return [to_builtin(item) for item in value] + if isinstance(value, tuple): + return tuple(to_builtin(item) for item in value) + if isinstance(value, set): + return {to_builtin(item) for item in value} + if isinstance(value, frozenset): + return frozenset(to_builtin(item) for item in value) + return value diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index 6c70fec..d139a9c 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -27,11 +27,15 @@ def __init__(self, initialdata: Mapping[str, Any] | None = None, **kwargs: Any) def deep_merge(self, *mappings: Mapping[str, Any]) -> ExtendedDict: """Return a deeply merged copy.""" - return ExtendedDict(deep_merge(self.data, *mappings)) + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(deep_merge(to_builtin(self.data), *(to_builtin(mapping) for mapping in mappings))) def flatten(self, *, separator: str = ".") -> ExtendedDict: """Return a flattened copy.""" - return ExtendedDict(flatten_map(self.data, separator=separator)) + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(flatten_map(to_builtin(self.data), separator=separator)) def filter( self, @@ -40,25 +44,37 @@ def filter( denylist: list[str] | None = None, ) -> tuple[ExtendedDict, ExtendedDict]: """Return accepted and rejected mapping entries.""" - accepted, rejected = filter_map(self.data, allowlist=allowlist, denylist=denylist) - return ExtendedDict(accepted), ExtendedDict(rejected) + from extended_data.containers.factory import extend_data, to_builtin + + accepted, rejected = filter_map(to_builtin(self.data), allowlist=allowlist, denylist=denylist) + return extend_data(accepted), extend_data(rejected) def compact(self) -> ExtendedDict: """Return a copy without values considered empty.""" - return ExtendedDict(all_non_empty_in_dict(self.data)) + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(all_non_empty_in_dict(to_builtin(self.data))) def deduplicate(self) -> ExtendedDict: """Return a copy with nested duplicate list values removed.""" - return ExtendedDict(deduplicate_map(self.data)) + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(deduplicate_map(to_builtin(self.data))) def unhump(self, *, drop_without_prefix: str | None = None) -> ExtendedDict: """Return a copy with camelCase keys converted to snake_case.""" - return ExtendedDict(unhump_map(self.data, drop_without_prefix=drop_without_prefix)) + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(unhump_map(to_builtin(self.data), drop_without_prefix=drop_without_prefix)) def all_values(self) -> list[Any]: """Return all values from the nested mapping.""" - return all_values_from_map(self.data) + from extended_data.containers.factory import to_builtin + + return all_values_from_map(to_builtin(self.data)) def first_non_empty_value(self, *keys: str) -> Any: """Return the first non-empty value for the provided keys.""" - return first_non_empty_value_from_map(self.data, *keys) + from extended_data.containers.factory import to_builtin + + return first_non_empty_value_from_map(to_builtin(self.data), *keys) diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index aaf93ab..0a08372 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -20,17 +20,12 @@ from deepmerge import Merger # type: ignore[attr-defined] from yaml import YAMLError -from extended_data import ( - base64_decode, - decode_json, - decode_yaml, - is_nothing, - strtobool, - strtodatetime, - strtofloat, - strtoint, - strtopath, -) +from extended_data.containers.factory import extend_data +from extended_data.io.base64 import base64_decode +from extended_data.primitives.formats.json import decode_json +from extended_data.primitives.formats.yaml import decode_yaml +from extended_data.primitives.state import is_nothing +from extended_data.primitives.types import strtobool, strtodatetime, strtofloat, strtoint, strtopath if TYPE_CHECKING: @@ -229,6 +224,7 @@ def decode_input( decode_from_yaml: bool = False, decode_from_base64: bool = False, allow_none: bool = True, + as_extended: bool = False, ) -> Any: """Decodes an input value, optionally from Base64, JSON, or YAML. @@ -241,6 +237,7 @@ def decode_input( decode_from_yaml (bool): Whether to decode the input from YAML format. decode_from_base64 (bool): Whether to decode the input from Base64. allow_none (bool): Whether to allow None as a valid return value. + as_extended (bool): Wrap decoded container values in Tier 2 Extended Data containers. Returns: Any: The decoded input, potentially converted or defaulted. @@ -253,7 +250,7 @@ def decode_input( conf = self._coerce_text(conf) if not isinstance(conf, str): - return conf + return extend_data(conf) if as_extended else conf if decode_from_base64: try: @@ -266,6 +263,11 @@ def decode_input( message = f"Failed to decode {conf} from base64" raise RuntimeError(message) from exc + if not isinstance(conf, str): + if conf is None and not allow_none: + return default + return extend_data(conf) if as_extended else conf + if decode_from_yaml: try: conf = decode_yaml(conf) @@ -282,6 +284,9 @@ def decode_input( if conf is None and not allow_none: return default + if as_extended: + return extend_data(conf) + return conf def freeze_inputs(self) -> CaseInsensitiveDict[str, Any]: diff --git a/src/extended_data/inputs/decorators.py b/src/extended_data/inputs/decorators.py index e21f9c6..9fb1365 100644 --- a/src/extended_data/inputs/decorators.py +++ b/src/extended_data/inputs/decorators.py @@ -59,6 +59,7 @@ class InputConfig: decode_from_yaml: bool = False decode_from_base64: bool = False allow_none: bool = True + as_extended: bool = False is_bool: bool = False is_integer: bool = False is_float: bool = False @@ -80,6 +81,7 @@ def resolve(self, provider: InputProvider) -> Any | object: decode_from_yaml=self.decode_from_yaml, decode_from_base64=self.decode_from_base64, allow_none=self.allow_none, + as_extended=self.as_extended, ) else: value = provider.get_input( diff --git a/src/extended_data/io/base64.py b/src/extended_data/io/base64.py index b6adfbe..2011290 100644 --- a/src/extended_data/io/base64.py +++ b/src/extended_data/io/base64.py @@ -37,6 +37,8 @@ def base64_decode( encoded_data: str, unwrap_raw_data: bool = True, encoding: str = "yaml", + *, + as_extended: bool = False, ) -> Any: """Decodes data from base64 format. @@ -44,6 +46,7 @@ def base64_decode( encoded_data (str): The base64 encoded string to decode. unwrap_raw_data (bool): Whether to unwrap the raw data after decoding. encoding (str): The encoding format used for wrapping (default is 'yaml'). + as_extended (bool): Wrap decoded container values in Tier 2 Extended Data containers. Returns: Any: The decoded bytes when ``unwrap_raw_data`` is ``False``, otherwise @@ -59,4 +62,4 @@ def base64_decode( message = "Decoded Base64 payload is not valid UTF-8 text." raise ValueError(message) from exc - return unwrap_raw_data_from_import(decoded_text, encoding=encoding) + return unwrap_raw_data_from_import(decoded_text, encoding=encoding, as_extended=as_extended) diff --git a/src/extended_data/io/exporters.py b/src/extended_data/io/exporters.py index 5499ccc..c0e3e21 100644 --- a/src/extended_data/io/exporters.py +++ b/src/extended_data/io/exporters.py @@ -8,6 +8,7 @@ from collections.abc import Mapping from typing import Any +from extended_data.containers.factory import to_builtin from extended_data.primitives.formats.hcl import encode_hcl2 from extended_data.primitives.formats.json import encode_json from extended_data.primitives.formats.toml import encode_toml @@ -40,14 +41,15 @@ def wrap_raw_data_for_export( Raises: ValueError: If an invalid or unsupported encoding is provided. """ - contains_yaml_data = is_yaml_data(raw_data) - converted_data = convert_special_types(raw_data) + export_data = to_builtin(raw_data) + contains_yaml_data = is_yaml_data(export_data) + converted_data = convert_special_types(export_data) # Check if allow_encoding is a string specifying the format if isinstance(allow_encoding, str): allow_encoding_lower = normalize_data_encoding(allow_encoding) if allow_encoding_lower == "yaml": - return encode_yaml(make_raw_data_export_safe(raw_data, export_to_yaml=True)) + return encode_yaml(make_raw_data_export_safe(export_data, export_to_yaml=True)) if allow_encoding_lower == "json": return encode_json(converted_data, **format_opts) if allow_encoding_lower == "toml": @@ -67,7 +69,7 @@ def wrap_raw_data_for_export( # Determine the encoding based on boolean allow_encoding and YAML data check if allow_encoding: if contains_yaml_data: - return encode_yaml(make_raw_data_export_safe(raw_data, export_to_yaml=True)) + return encode_yaml(make_raw_data_export_safe(export_data, export_to_yaml=True)) # Call encode_json with options unpacked to ensure they are correctly passed return encode_json(converted_data, **format_opts) @@ -103,6 +105,8 @@ def make_raw_data_export_safe(raw_data: Any, export_to_yaml: bool = False) -> An >>> type(result["script"]).__name__ 'LiteralScalarString' """ + raw_data = to_builtin(raw_data) + if export_to_yaml and isinstance(raw_data, YamlTagged): return YamlTagged( raw_data.tag, diff --git a/src/extended_data/io/files.py b/src/extended_data/io/files.py index d29d608..2b1311b 100644 --- a/src/extended_data/io/files.py +++ b/src/extended_data/io/files.py @@ -299,6 +299,8 @@ def decode_file( file_data: str | memoryview | bytes | bytearray, file_path: FilePath | None = None, suffix: str | None = None, + *, + as_extended: bool = False, ) -> Any: """Decodes file data based on file extension or explicit suffix. @@ -310,6 +312,7 @@ def decode_file( file_path (FilePath | None): Optional file path to infer format from extension. suffix (str | None): Explicit format suffix (e.g., "yaml", "json", "toml", "hcl"). Takes precedence over file_path extension. + as_extended (bool): Wrap decoded container values in Tier 2 Extended Data containers. Returns: Any: The decoded data structure, or the original string if format is unknown. @@ -323,7 +326,7 @@ def decode_file( suffix = normalize_data_encoding(suffix) if suffix is not None and suffix in {"yaml", "json", "toml", "hcl", "raw"}: - return unwrap_raw_data_from_import(file_data, encoding=suffix) + return unwrap_raw_data_from_import(file_data, encoding=suffix, as_extended=as_extended) return file_data diff --git a/src/extended_data/io/importers.py b/src/extended_data/io/importers.py index 23008d3..d81984b 100644 --- a/src/extended_data/io/importers.py +++ b/src/extended_data/io/importers.py @@ -4,6 +4,7 @@ from typing import Any +from extended_data.containers.factory import extend_data from extended_data.primitives.formats.hcl import decode_hcl2 from extended_data.primitives.formats.json import decode_json from extended_data.primitives.formats.toml import decode_toml @@ -15,12 +16,15 @@ def unwrap_raw_data_from_import( wrapped_data: str | memoryview | bytes | bytearray, encoding: str = "yaml", + *, + as_extended: bool = False, ) -> Any: """Unwraps the data that was wrapped for import. Args: wrapped_data (str | memoryview | bytes | bytearray): The wrapped data. encoding (str): The encoding format (default is 'yaml'). + as_extended (bool): Wrap decoded container values in Tier 2 Extended Data containers. Returns: Any: The unwrapped data. @@ -31,15 +35,19 @@ def unwrap_raw_data_from_import( normalized_encoding = normalize_data_encoding(encoding) if normalized_encoding == "yaml": - return decode_yaml(wrapped_data) - if normalized_encoding == "json": - return decode_json(wrapped_data) - if normalized_encoding == "toml": - return decode_toml(wrapped_data) - if normalized_encoding == "hcl": - return decode_hcl2(wrapped_data) - if normalized_encoding == "raw": - return bytestostr(wrapped_data) - - error_message = f"Unsupported encoding format: {encoding}" - raise ValueError(error_message) + decoded = decode_yaml(wrapped_data) + elif normalized_encoding == "json": + decoded = decode_json(wrapped_data) + elif normalized_encoding == "toml": + decoded = decode_toml(wrapped_data) + elif normalized_encoding == "hcl": + decoded = decode_hcl2(wrapped_data) + elif normalized_encoding == "raw": + decoded = bytestostr(wrapped_data) + else: + error_message = f"Unsupported encoding format: {encoding}" + raise ValueError(error_message) + + if as_extended: + return extend_data(decoded) + return decoded diff --git a/tests/connectors/test_aws_codedeploy.py b/tests/connectors/test_aws_codedeploy.py index e20fecb..f3f9ebd 100644 --- a/tests/connectors/test_aws_codedeploy.py +++ b/tests/connectors/test_aws_codedeploy.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for the AWS CodeDeploy helper module.""" from __future__ import annotations @@ -6,6 +7,8 @@ import pytest +pytest.importorskip("botocore") + from botocore.exceptions import ClientError, WaiterError from extended_data.connectors.aws.codedeploy import ( diff --git a/tests/connectors/test_aws_connector.py b/tests/connectors/test_aws_connector.py index bdbc621..895c55d 100644 --- a/tests/connectors/test_aws_connector.py +++ b/tests/connectors/test_aws_connector.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for AWSConnector.""" from __future__ import annotations @@ -6,6 +7,9 @@ import pytest +pytest.importorskip("boto3") +pytest.importorskip("botocore") + from botocore.exceptions import ClientError from extended_data.connectors.aws import AWSConnector diff --git a/tests/connectors/test_aws_organizations.py b/tests/connectors/test_aws_organizations.py index ea4f075..c5fec18 100644 --- a/tests/connectors/test_aws_organizations.py +++ b/tests/connectors/test_aws_organizations.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for AWS Organizations helper mixin.""" from __future__ import annotations @@ -6,6 +7,9 @@ import pytest +pytest.importorskip("boto3") +pytest.importorskip("botocore") + from extended_data.connectors.aws.organizations import AWSOrganizationsMixin diff --git a/tests/connectors/test_aws_s3.py b/tests/connectors/test_aws_s3.py index be8d976..a506275 100644 --- a/tests/connectors/test_aws_s3.py +++ b/tests/connectors/test_aws_s3.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for AWS S3 operations.""" from __future__ import annotations @@ -9,6 +10,9 @@ import pytest +pytest.importorskip("boto3") +pytest.importorskip("botocore") + from botocore.exceptions import ClientError from extended_data.connectors.aws import AWSConnectorFull diff --git a/tests/connectors/test_aws_sso.py b/tests/connectors/test_aws_sso.py index b5deac6..61c61c4 100644 --- a/tests/connectors/test_aws_sso.py +++ b/tests/connectors/test_aws_sso.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for AWS SSO/Identity Center operations.""" from __future__ import annotations @@ -6,6 +7,9 @@ import pytest +pytest.importorskip("boto3") +pytest.importorskip("botocore") + from botocore.exceptions import ClientError from extended_data.connectors.aws import AWSConnectorFull diff --git a/tests/connectors/test_aws_tools.py b/tests/connectors/test_aws_tools.py index 2dfdb49..b5dee1f 100644 --- a/tests/connectors/test_aws_tools.py +++ b/tests/connectors/test_aws_tools.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for AWS AI tools.""" from __future__ import annotations @@ -6,6 +7,9 @@ import pytest +pytest.importorskip("boto3") +pytest.importorskip("botocore") + # Patch target for AWSConnectorFull - must patch where it's imported AWS_CONNECTOR_PATCH = "extended_data.connectors.aws.AWSConnectorFull" diff --git a/tests/connectors/test_github_connector.py b/tests/connectors/test_github_connector.py index a75fb08..f09b71d 100644 --- a/tests/connectors/test_github_connector.py +++ b/tests/connectors/test_github_connector.py @@ -1,9 +1,14 @@ +# ruff: noqa: I001 """Tests for GitHub connector aliases and behavior.""" from __future__ import annotations from unittest.mock import MagicMock, patch +import pytest + +pytest.importorskip("github") + from extended_data.connectors import GitHubConnector as RootGitHubConnector from extended_data.connectors.github import GitHubConnector diff --git a/tests/connectors/test_github_tools.py b/tests/connectors/test_github_tools.py index 15a4fd4..a65b5ef 100644 --- a/tests/connectors/test_github_tools.py +++ b/tests/connectors/test_github_tools.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for GitHub AI tools.""" from __future__ import annotations @@ -6,6 +7,8 @@ import pytest +pytest.importorskip("github") + # Patch target for GitHubConnector - patch at source since tools.py imports lazily inside functions GITHUB_CONNECTOR_PATCH = "extended_data.connectors.github.GitHubConnector" diff --git a/tests/connectors/test_github_workflow_builder.py b/tests/connectors/test_github_workflow_builder.py index 91775a3..98426a7 100644 --- a/tests/connectors/test_github_workflow_builder.py +++ b/tests/connectors/test_github_workflow_builder.py @@ -1,9 +1,14 @@ +# ruff: noqa: I001 """Tests for GitHub workflow builder utility.""" from __future__ import annotations +import pytest + from ruamel.yaml import YAML +pytest.importorskip("github") + from extended_data.connectors.github import build_github_actions_workflow diff --git a/tests/connectors/test_google_billing.py b/tests/connectors/test_google_billing.py index d199ef5..a7c9754 100644 --- a/tests/connectors/test_google_billing.py +++ b/tests/connectors/test_google_billing.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for Google Billing mixin helpers.""" from __future__ import annotations @@ -6,6 +7,11 @@ from collections.abc import Iterable from typing import Any +import pytest + +pytest.importorskip("google.oauth2.service_account") +pytest.importorskip("googleapiclient") + from extended_data.connectors.google.billing import GoogleBillingMixin diff --git a/tests/connectors/test_google_cloud.py b/tests/connectors/test_google_cloud.py index 23338cf..3d05059 100644 --- a/tests/connectors/test_google_cloud.py +++ b/tests/connectors/test_google_cloud.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for Google Cloud Platform resource management operations.""" from __future__ import annotations @@ -6,6 +7,9 @@ import pytest +pytest.importorskip("google.oauth2.service_account") +pytest.importorskip("googleapiclient") + from extended_data.connectors.google import GoogleConnectorFull diff --git a/tests/connectors/test_google_connector.py b/tests/connectors/test_google_connector.py index a363bf6..febf28c 100644 --- a/tests/connectors/test_google_connector.py +++ b/tests/connectors/test_google_connector.py @@ -1,9 +1,15 @@ +# ruff: noqa: I001 """Tests for GoogleConnector.""" from __future__ import annotations from unittest.mock import MagicMock, patch +import pytest + +pytest.importorskip("google.oauth2.service_account") +pytest.importorskip("googleapiclient") + from extended_data.connectors.google import ( GoogleBillingConnector, GoogleCloudConnector, diff --git a/tests/connectors/test_google_services.py b/tests/connectors/test_google_services.py index ac791bf..58f80f3 100644 --- a/tests/connectors/test_google_services.py +++ b/tests/connectors/test_google_services.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for Google Cloud services discovery operations.""" from __future__ import annotations @@ -6,6 +7,9 @@ import pytest +pytest.importorskip("google.oauth2.service_account") +pytest.importorskip("googleapiclient") + from extended_data.connectors.google import GoogleConnectorFull diff --git a/tests/connectors/test_google_tools.py b/tests/connectors/test_google_tools.py index 17aa80c..5982703 100644 --- a/tests/connectors/test_google_tools.py +++ b/tests/connectors/test_google_tools.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for Google AI tools.""" from __future__ import annotations @@ -6,6 +7,9 @@ import pytest +pytest.importorskip("google.oauth2.service_account") +pytest.importorskip("googleapiclient") + # Patch target for GoogleConnectorFull - must patch where it's imported GOOGLE_CONNECTOR_PATCH = "extended_data.connectors.google.GoogleConnectorFull" diff --git a/tests/connectors/test_google_workspace.py b/tests/connectors/test_google_workspace.py index f606ca1..4e59089 100644 --- a/tests/connectors/test_google_workspace.py +++ b/tests/connectors/test_google_workspace.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for Google Workspace (Admin Directory) operations.""" from __future__ import annotations @@ -6,6 +7,9 @@ import pytest +pytest.importorskip("google.oauth2.service_account") +pytest.importorskip("googleapiclient") + from extended_data.connectors.google import GoogleConnectorFull diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index c2a7026..d488719 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -1,9 +1,14 @@ +# ruff: noqa: I001 """Tests for SlackConnector.""" from __future__ import annotations from unittest.mock import MagicMock, patch +import pytest + +pytest.importorskip("slack_sdk") + from extended_data.connectors.slack import SlackConnector diff --git a/tests/connectors/test_vault_connector.py b/tests/connectors/test_vault_connector.py index 38a6db8..d3954da 100644 --- a/tests/connectors/test_vault_connector.py +++ b/tests/connectors/test_vault_connector.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for VaultConnector.""" from __future__ import annotations @@ -7,6 +8,8 @@ import pytest +pytest.importorskip("hvac") + from hvac.exceptions import VaultError from extended_data.connectors.vault import VaultConnector diff --git a/tests/connectors/test_vault_tools.py b/tests/connectors/test_vault_tools.py index 3612c92..01bc81e 100644 --- a/tests/connectors/test_vault_tools.py +++ b/tests/connectors/test_vault_tools.py @@ -1,3 +1,4 @@ +# ruff: noqa: I001 """Tests for Vault AI tools.""" from __future__ import annotations @@ -6,6 +7,8 @@ import pytest +pytest.importorskip("hvac") + # Patch target for VaultConnector - must patch where it's used (in tools.py), not where it's defined VAULT_CONNECTOR_PATCH = "extended_data.connectors.vault.VaultConnector" diff --git a/tests/core/test_base64_utils.py b/tests/core/test_base64_utils.py index 9e73185..0edd04a 100644 --- a/tests/core/test_base64_utils.py +++ b/tests/core/test_base64_utils.py @@ -22,6 +22,7 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.io.base64 import base64_decode, base64_encode from extended_data.io.exporters import wrap_raw_data_for_export @@ -178,6 +179,17 @@ def test_base64_decode_with_tf_alias_unwrap() -> None: assert result == {"locals": [{"region": "us-east-1"}]} +def test_base64_decode_can_return_extended_containers() -> None: + """Base64 decoding can opt into the Tier 2 container layer.""" + encoded_data = base64.b64encode(b'{"service": {"name": "api"}, "ports": [8080]}').decode("utf-8") + result = base64_decode(encoded_data, unwrap_raw_data=True, encoding="json", as_extended=True) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["service"], ExtendedDict) + assert isinstance(result["service"]["name"], ExtendedString) + assert isinstance(result["ports"], ExtendedList) + + def test_base64_decode_rejects_non_utf8_when_unwrapping() -> None: """Raise a clear error when wrapped decoding requires non-text bytes to be parsed.""" encoded_data = base64.b64encode(b"\xff\xfe").decode("utf-8") diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 34903a8..99d7ea7 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -4,7 +4,7 @@ import extended_data -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, extend_data, to_builtin def test_extended_string_chains_primitive_transforms() -> None: @@ -54,9 +54,47 @@ def test_extended_set_composes_set_operations() -> None: assert value.difference({1, None}).to_set() == {2, 3} +def test_extend_data_recursively_wraps_builtin_containers() -> None: + """The container factory promotes plain values into the Tier 2 surface.""" + wrapped = extend_data( + { + "service": {"name": "api"}, + "ports": [8080, 8081], + "tags": {"prod", "api"}, + } + ) + + assert isinstance(wrapped, ExtendedDict) + assert isinstance(wrapped["service"], ExtendedDict) + assert isinstance(wrapped["service"]["name"], ExtendedString) + assert isinstance(wrapped["ports"], ExtendedList) + assert isinstance(wrapped["tags"], ExtendedSet) + assert wrapped["service"]["name"].upper_first() == "Api" + + +def test_to_builtin_recursively_unwraps_extended_containers() -> None: + """Extended containers can be lowered back to normal Python data.""" + wrapped = ExtendedDict( + { + "service": ExtendedDict({"name": ExtendedString("api")}), + "ports": ExtendedList([8080, 8081]), + "tags": ExtendedSet({"prod", "api"}), + } + ) + + plain = to_builtin(wrapped) + + assert isinstance(plain, dict) + assert plain["service"] == {"name": "api"} + assert plain["ports"] == [8080, 8081] + assert plain["tags"] == {"prod", "api"} + + def test_container_classes_are_root_exports() -> None: """Tier 2 containers are root-level convenience exports.""" assert extended_data.ExtendedString is ExtendedString assert extended_data.ExtendedDict is ExtendedDict assert extended_data.ExtendedList is ExtendedList assert extended_data.ExtendedSet is ExtendedSet + assert extended_data.extend_data is extend_data + assert extended_data.to_builtin is to_builtin diff --git a/tests/core/test_file_data_type.py b/tests/core/test_file_data_type.py index 4e0478d..5648648 100644 --- a/tests/core/test_file_data_type.py +++ b/tests/core/test_file_data_type.py @@ -24,6 +24,7 @@ from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError, Repo +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.io.files import ( FilePath, clone_repository_to_temp, @@ -506,6 +507,20 @@ def test_decode_file_accepts_bytes_payload() -> None: assert result == {"key": "value"} +def test_decode_file_can_return_extended_containers() -> None: + """File decoding can opt into the Tier 2 container layer.""" + result = decode_file( + '{"service": {"name": "api"}, "ports": [8080]}', + file_path="/path/to/file.json", + as_extended=True, + ) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["service"], ExtendedDict) + assert isinstance(result["service"]["name"], ExtendedString) + assert isinstance(result["ports"], ExtendedList) + + def test_write_file_json(tmp_path: Path) -> None: """Tests writing data as JSON. diff --git a/tests/core/test_import_utils.py b/tests/core/test_import_utils.py index 7cc0e55..37d6bef 100644 --- a/tests/core/test_import_utils.py +++ b/tests/core/test_import_utils.py @@ -4,6 +4,7 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.io.importers import unwrap_raw_data_from_import @@ -35,3 +36,25 @@ def test_unwrap_raw_data_from_import_rejects_unsupported_encoding() -> None: """Reject unsupported import encodings.""" with pytest.raises(ValueError, match="Unsupported encoding format: xml"): unwrap_raw_data_from_import("value", "xml") + + +def test_unwrap_raw_data_from_import_can_return_extended_containers() -> None: + """Decoded imports can opt into the Tier 2 container layer.""" + result = unwrap_raw_data_from_import( + '{"service": {"name": "api"}, "ports": [8080]}', + encoding="json", + as_extended=True, + ) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["service"], ExtendedDict) + assert isinstance(result["service"]["name"], ExtendedString) + assert isinstance(result["ports"], ExtendedList) + + +def test_unwrap_raw_data_from_import_can_return_extended_raw_strings() -> None: + """Raw imports can opt into ExtendedString.""" + result = unwrap_raw_data_from_import("plain text", encoding="raw", as_extended=True) + + assert isinstance(result, ExtendedString) + assert result.upper_first() == "Plain text" diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index d9d5e7b..aa536ad 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -5,16 +5,14 @@ from pathlib import Path from extended_data import ( + ExtendedDict, base64_decode, base64_encode, decode_file, decode_hcl2, - deduplicate_map, - deep_merge, encode_hcl2, filter_list, read_file, - to_snake_case, write_file, ) from extended_data.primitives.formats.yaml import YamlTagged @@ -36,12 +34,14 @@ def test_layered_config_workflow_round_trip(tmp_path: Path) -> None: write_file("config/base.yaml", base_config, tld=tmp_path) write_file("config/dev.yaml", env_config, tld=tmp_path) - base_data = decode_file(read_file("config/base.yaml", tld=tmp_path), file_path="config/base.yaml") - env_data = decode_file(read_file("config/dev.yaml", tld=tmp_path), file_path="config/dev.yaml") - merged = deep_merge(base_data, env_data) + base_data = decode_file(read_file("config/base.yaml", tld=tmp_path), file_path="config/base.yaml", as_extended=True) + env_data = decode_file(read_file("config/dev.yaml", tld=tmp_path), file_path="config/dev.yaml", as_extended=True) + merged = base_data.deep_merge(env_data) output_path = write_file("build/config.yaml", merged, tld=tmp_path) + assert isinstance(base_data, ExtendedDict) + assert isinstance(merged, ExtendedDict) assert output_path == tmp_path / "build" / "config.yaml" assert decode_file(read_file(output_path), file_path=output_path) == { "service": {"name": "api", "debug": True}, @@ -74,17 +74,44 @@ def test_terraform_handoff_workflow_round_trip() -> None: def test_api_payload_normalization_workflow_round_trip(tmp_path: Path) -> None: """Compose list, map, string, and file helpers into a normalized payload flow.""" - payload = { + payload = ExtendedDict( + { + "HTTPResponseCode": 200, + "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), + "Tags": ["api", "api", "docs"], + } + ) + + normalized = payload.deduplicate().unhump() + + output_path = write_file("build/payload.json", normalized, tld=tmp_path) + + assert output_path == tmp_path / "build" / "payload.json" + assert isinstance(normalized, ExtendedDict) + assert decode_file(read_file(output_path), file_path=output_path) == { + "http_response_code": 200, + "selected_services": ["api", "worker"], + "tags": ["api", "docs"], + } + + +def test_api_payload_factory_workflow_round_trip(tmp_path: Path) -> None: + """Promote decoded API payloads into containers before normalization.""" + raw_payload = { "HTTPResponseCode": 200, "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), "Tags": ["api", "api", "docs"], } - normalized = {to_snake_case(key): value for key, value in deduplicate_map(payload).items()} + raw_path = write_file("build/raw-payload.json", raw_payload, tld=tmp_path) + decoded = decode_file(read_file(raw_path), file_path=raw_path, as_extended=True) + normalized = decoded.deduplicate().unhump() output_path = write_file("build/payload.json", normalized, tld=tmp_path) assert output_path == tmp_path / "build" / "payload.json" + assert isinstance(decoded, ExtendedDict) + assert isinstance(normalized, ExtendedDict) assert decode_file(read_file(output_path), file_path=output_path) == { "http_response_code": 200, "selected_services": ["api", "worker"], @@ -100,9 +127,10 @@ def test_yaml_native_workflow_round_trip(tmp_path: Path) -> None: } output_path = write_file("template.yaml", template, tld=tmp_path) - decoded = decode_file(read_file(output_path), file_path=output_path) + decoded = decode_file(read_file(output_path), file_path=output_path, as_extended=True) assert output_path == tmp_path / "template.yaml" + assert isinstance(decoded, ExtendedDict) assert isinstance(decoded["bucket_name"], YamlTagged) assert decoded["bucket_name"].tag == "!Ref" assert decoded["bucket_name"].__wrapped__ == "BucketName" diff --git a/tests/inputs/test_decorators.py b/tests/inputs/test_decorators.py index 91534e3..63b5dd3 100644 --- a/tests/inputs/test_decorators.py +++ b/tests/inputs/test_decorators.py @@ -4,6 +4,7 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedString from extended_data.inputs import directed_inputs, input_config @@ -22,6 +23,10 @@ def secure_call(self, api_key: str) -> str: def parse_config(self, config: dict[str, str]) -> dict[str, str]: return config + @input_config("extended_config", decode_from_json=True, as_extended=True) + def parse_extended_config(self, extended_config: ExtendedDict) -> ExtendedDict: + return extended_config + def greet(self, prefix: str = "hello") -> str: return prefix @@ -52,6 +57,14 @@ def test_decode_from_json_input_config() -> None: assert service.parse_config() == {"enabled": True} +def test_decode_from_json_input_config_can_return_extended_containers() -> None: + service = ExampleService(_input_provider_config={"inputs": {"extended_config": '{"name": "api"}'}}) + parsed = service.parse_extended_config() + + assert isinstance(parsed, ExtendedDict) + assert isinstance(parsed["name"], ExtendedString) + + def test_method_default_used_when_input_missing() -> None: service = ExampleService(_input_provider_config={"inputs": {"domain": "acme.io"}}) assert service.greet() == "hello" diff --git a/tests/inputs/test_main.py b/tests/inputs/test_main.py index 6d351e6..762e9ce 100644 --- a/tests/inputs/test_main.py +++ b/tests/inputs/test_main.py @@ -28,6 +28,7 @@ from __future__ import annotations +import base64 import json import os @@ -36,6 +37,7 @@ import pytest from extended_data import base64_encode +from extended_data.containers import ExtendedDict, ExtendedString from extended_data.inputs.__main__ import InputProvider @@ -186,6 +188,27 @@ def test_decode_input_base64_from_bytes(): assert decoded == {"name": "test"} +def test_decode_input_json_can_return_extended_containers(): + """Decoded input payloads can opt into the Tier 2 container layer.""" + dic = InputProvider(inputs={"json_key": '{"name": "test"}'}) + decoded = dic.decode_input("json_key", decode_from_json=True, as_extended=True) + + assert isinstance(decoded, ExtendedDict) + assert isinstance(decoded["name"], ExtendedString) + assert decoded["name"].upper_first() == "Test" + + +def test_decode_input_base64_external_json_can_return_extended_containers(): + """Externally produced Base64 JSON should decode once and then be extended.""" + encoded_value = base64.b64encode(b'{"name": "test"}').decode("utf-8") + dic = InputProvider(inputs={"base64_key": encoded_value}) + decoded = dic.decode_input("base64_key", decode_from_base64=True, decode_from_json=True, as_extended=True) + + assert isinstance(decoded, ExtendedDict) + assert isinstance(decoded["name"], ExtendedString) + assert decoded["name"].upper_first() == "Test" + + def test_freeze_inputs(): """Test freezing inputs. From cd0af5e330394a305dc2b1e7148521d60c2f79f7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:33:07 -0500 Subject: [PATCH 004/287] ci: add core typecheck gate --- .github/workflows/ci.yml | 1 + README.md | 1 + src/extended_data/primitives/formats/yaml/constructors.py | 2 +- src/extended_data/primitives/formats/yaml/tag_classes.py | 4 ++-- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 43625d7..c5f85e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,7 @@ jobs: enable-cache: true - run: uv sync --python 3.13 --extra tests --extra typing - run: uvx ruff check src tests + - run: uv run mypy src/extended_data/primitives src/extended_data/containers src/extended_data/io src/extended_data/inputs src/extended_data/logging - run: uv run pytest tests/core tests/logging tests/inputs - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py - run: uv build diff --git a/README.md b/README.md index c3d5d0e..6294275 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ More detail lives in [`docs/package-surface.md`](docs/package-surface.md). uv sync --extra tests --extra typing uv run pytest uv run ruff check src tests +uv run mypy src/extended_data/primitives src/extended_data/containers src/extended_data/io src/extended_data/inputs src/extended_data/logging uv build ``` diff --git a/src/extended_data/primitives/formats/yaml/constructors.py b/src/extended_data/primitives/formats/yaml/constructors.py index 4d1ba03..78337a4 100644 --- a/src/extended_data/primitives/formats/yaml/constructors.py +++ b/src/extended_data/primitives/formats/yaml/constructors.py @@ -51,7 +51,7 @@ def yaml_construct_pairs( Returns: Union[Dict[Any, Any], YamlPairs]: The constructed YAML pairs. """ - value: list[tuple[Any, Any]] = loader.construct_pairs(node) # type: ignore[no-untyped-call] + value: list[tuple[Any, Any]] = loader.construct_pairs(node) try: return dict(value) except TypeError: diff --git a/src/extended_data/primitives/formats/yaml/tag_classes.py b/src/extended_data/primitives/formats/yaml/tag_classes.py index e212de3..14c323b 100644 --- a/src/extended_data/primitives/formats/yaml/tag_classes.py +++ b/src/extended_data/primitives/formats/yaml/tag_classes.py @@ -13,12 +13,12 @@ if TYPE_CHECKING: from typing import TypeAlias - _ObjectProxyBase: TypeAlias = "wrapt.ObjectProxy[Any]" + _ObjectProxyBase: TypeAlias = wrapt.ObjectProxy[Any] else: _ObjectProxyBase = wrapt.ObjectProxy -class YamlTagged(_ObjectProxyBase): +class YamlTagged(_ObjectProxyBase): # type: ignore[misc] """Wrapper class for YAML tagged objects.""" def __init__(self, tag: str, wrapped: Any) -> None: From 59a7a2468c58837319d0cf9cefcaf94850e4476b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:40:18 -0500 Subject: [PATCH 005/287] fix: lazy load optional vendor sdks --- src/extended_data/connectors/aws/__init__.py | 74 +++++++++++++++---- .../connectors/github/__init__.py | 44 ++++++++++- .../connectors/google/__init__.py | 29 +++++++- src/extended_data/connectors/registry.py | 13 +++- .../connectors/vault/__init__.py | 33 +++++++-- tests/connectors/test_aws_tools.py | 17 ++++- tests/connectors/test_connectors.py | 21 ++++++ tests/connectors/test_github_tools.py | 16 +++- .../test_github_workflow_builder.py | 5 -- tests/connectors/test_google_tools.py | 17 ++++- tests/connectors/test_vault_tools.py | 16 +++- 11 files changed, 234 insertions(+), 51 deletions(-) diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 71499dc..614b346 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -18,19 +18,37 @@ from typing import TYPE_CHECKING, Any -import boto3 - -from boto3.resources.base import ServiceResource -from botocore.config import Config -from botocore.exceptions import ClientError - from extended_data import is_nothing +from extended_data.connectors._optional import is_connector_available, require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.logging import Logging if TYPE_CHECKING: - pass + import boto3 + + from boto3.resources.base import ServiceResource + from botocore.config import Config + from botocore.exceptions import ClientError +else: + boto3 = None + Config = None + ServiceResource = Any + + class ClientError(Exception): + """Fallback exception used until botocore is imported.""" + + +def _load_aws_sdk() -> Any: + """Load boto3/botocore lazily so tool metadata can import without the aws extra.""" + global ClientError, Config, ServiceResource, boto3 + + if boto3 is None: + boto3 = require_extra("boto3", "aws") + Config = require_extra("botocore.config", "aws").Config + ClientError = require_extra("botocore.exceptions", "aws").ClientError + ServiceResource = require_extra("boto3.resources.base", "aws").ServiceResource + return boto3 class AWSConnector(VendorConnectorBase): @@ -51,9 +69,10 @@ def __init__( **kwargs, ): super().__init__(logger=logger, **kwargs) + self._boto3 = _load_aws_sdk() self.execution_role_arn = execution_role_arn self.aws_sessions: dict[str, dict[str, boto3.Session]] = {} - self.default_aws_session = boto3.Session() + self.default_aws_session = self._boto3.Session() # ========================================================================= # Session Management @@ -79,7 +98,7 @@ def assume_role(self, execution_role_arn: str, role_session_name: str) -> boto3. response = sts_client.assume_role(RoleArn=execution_role_arn, RoleSessionName=role_session_name) credentials = response["Credentials"] self.logger.info(f"Successfully assumed role: {execution_role_arn}") - return boto3.Session( + return self._boto3.Session( aws_access_key_id=credentials["AccessKeyId"], aws_secret_access_key=credentials["SecretAccessKey"], aws_session_token=credentials["SessionToken"], @@ -132,6 +151,7 @@ def create_standard_retry_config(max_attempts: int = 5) -> Config: Returns: A botocore Config with retry settings. """ + _load_aws_sdk() return Config(retries={"max_attempts": max_attempts, "mode": "standard"}) def get_aws_client( @@ -553,7 +573,8 @@ def load_vendors_from_asm(prefix: str = "/vendors/") -> dict[str, str]: prefix = os.getenv("TM_VENDORS_PREFIX", prefix) try: - session = boto3.Session() + aws_sdk = _load_aws_sdk() + session = aws_sdk.Session() secretsmanager = session.client("secretsmanager") # List secrets with the prefix @@ -578,11 +599,34 @@ def load_vendors_from_asm(prefix: str = "/vendors/") -> dict[str, str]: return vendors -# Import submodule operations to make them available -from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments -from extended_data.connectors.aws.organizations import AWSOrganizationsMixin -from extended_data.connectors.aws.s3 import AWSS3Mixin -from extended_data.connectors.aws.sso import AWSSSOmixin +if is_connector_available("aws"): + # Import submodule operations to make them available when the AWS SDK is present. + from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments + from extended_data.connectors.aws.organizations import AWSOrganizationsMixin + from extended_data.connectors.aws.s3 import AWSS3Mixin + from extended_data.connectors.aws.sso import AWSSSOmixin +else: + + class AWSOrganizationsMixin: + """Placeholder mixin used when the aws extra is not installed.""" + + + class AWSS3Mixin: + """Placeholder mixin used when the aws extra is not installed.""" + + + class AWSSSOmixin: + """Placeholder mixin used when the aws extra is not installed.""" + + + def create_codedeploy_deployment(*args: Any, **kwargs: Any) -> Any: + """Require the aws extra before creating CodeDeploy deployments.""" + _load_aws_sdk() + + + def get_aws_codedeploy_deployments(*args: Any, **kwargs: Any) -> Any: + """Require the aws extra before listing CodeDeploy deployments.""" + _load_aws_sdk() class AWSConnectorFull(AWSConnector, AWSOrganizationsMixin, AWSSSOmixin, AWSS3Mixin): diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index f791fed..55a6f2d 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -6,11 +6,8 @@ import os from copy import deepcopy -from typing import Any +from typing import TYPE_CHECKING, Any -from github import Auth, Github -from github.GithubException import GithubException, UnknownObjectException -from python_graphql_client import GraphqlClient from ruamel.yaml import YAML from extended_data import ( @@ -20,13 +17,51 @@ is_nothing, wrap_raw_data_for_export, ) +from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.logging import Logging +if TYPE_CHECKING: + from github import Auth, Github + from github.GithubException import GithubException, UnknownObjectException + from python_graphql_client import GraphqlClient +else: + Auth = None + Github = None + GraphqlClient = None + + class GitHubFallbackError(Exception): + """Fallback exception used until PyGithub is imported.""" + + + GithubException = GitHubFallbackError + UnknownObjectException = GitHubFallbackError + + FilePath = str | bytes | os.PathLike[Any] +def _load_github_sdk() -> None: + """Load GitHub SDK dependencies lazily so tool metadata remains importable.""" + global Auth, Github, GithubException, GraphqlClient, UnknownObjectException + + if Github is None: + try: + github_module = require_extra("github", "github") + github_exceptions = require_extra("github.GithubException", "github") + graphql_module = require_extra("python_graphql_client", "github") + except ImportError as exc: + msg = "PyGithub is required for GitHubConnector. Install with: pip install extended-data[github]" + raise ImportError(msg) from exc + + Auth = github_module.Auth + Github = github_module.Github + GithubException = github_exceptions.GithubException + UnknownObjectException = github_exceptions.UnknownObjectException + GraphqlClient = graphql_module.GraphqlClient + + def get_github_api_error(exc: GithubException) -> str | None: """Extract error message from a GitHub exception.""" data = getattr(exc, "data", {}) @@ -50,6 +85,7 @@ def __init__( **kwargs, ): super().__init__(logger=logger, **kwargs) + _load_github_sdk() self.GITHUB_OWNER = github_owner self.GITHUB_REPO = github_repo diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index 3b4cf02..de4c836 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -5,15 +5,35 @@ import json from collections.abc import Sequence -from typing import Any - -from google.oauth2 import service_account -from googleapiclient.discovery import build +from typing import TYPE_CHECKING, Any +from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.logging import Logging +if TYPE_CHECKING: + from google.oauth2 import service_account + from googleapiclient.discovery import build +else: + service_account = None + build = None + + +def _load_google_sdk() -> None: + """Load Google SDK dependencies lazily so tool metadata remains importable.""" + global build, service_account + + if service_account is None or build is None: + try: + service_account = require_extra("google.oauth2.service_account", "google") + discovery = require_extra("googleapiclient.discovery", "google") + except ImportError as exc: + msg = "google-api-python-client is required for GoogleConnector. Install with: pip install extended-data[google]" + raise ImportError(msg) from exc + build = discovery.build + + # Default Google scopes DEFAULT_SCOPES = [ "https://www.googleapis.com/auth/cloud-platform", @@ -56,6 +76,7 @@ def __init__( **kwargs: Additional arguments passed to VendorConnectorBase. """ super().__init__(logger=logger, **kwargs) + _load_google_sdk() self.scopes = scopes or DEFAULT_SCOPES self.subject = subject diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index ed2397d..2fd9fbb 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -230,6 +230,12 @@ def get_connector_class(name: str) -> builtins.type[VendorConnectorBase]: available = ", ".join(sorted(connectors.keys())) raise ValueError(f"Unknown connector: {name}. Available: {available}") + if name_lower in BUILTIN_CONNECTORS: + missing = get_missing_connector_requirements(name_lower) + if missing: + error = ImportError(f"Missing packages: {', '.join(missing)}") + _raise_missing_builtin_connector(name_lower, error) + return connectors[name_lower] @@ -282,7 +288,7 @@ def _available_connector_info(name: str, cls: builtins.type[VendorConnectorBase] return ConnectorInfo( name=name, - available=True, + available=not missing, source=source, extra=extra, install=get_connector_install_command(name), @@ -350,4 +356,7 @@ def list_connector_info(*, include_unavailable: bool = True) -> list[dict[str, A if include_unavailable: names.update(BUILTIN_CONNECTORS) names.update(_missing_builtin_connectors) - return [get_connector_info(name, include_unavailable=include_unavailable) for name in sorted(names)] + info = [get_connector_info(name, include_unavailable=include_unavailable) for name in sorted(names)] + if not include_unavailable: + return [connector for connector in info if connector["available"]] + return info diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 17a8a74..c474c03 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -4,17 +4,35 @@ from collections import deque from datetime import datetime, timezone -from typing import Any - -import hvac - -from hvac.exceptions import VaultError +from typing import TYPE_CHECKING, Any from extended_data import is_nothing +from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.logging import Logging +if TYPE_CHECKING: + import hvac + + from hvac.exceptions import VaultError +else: + hvac = None + + class VaultError(Exception): + """Fallback exception used until hvac is imported.""" + + +def _load_hvac() -> Any: + """Load hvac lazily so tool metadata can import without the vault extra.""" + global VaultError, hvac + + if hvac is None: + hvac = require_extra("hvac", "vault") + VaultError = require_extra("hvac.exceptions", "vault").VaultError + return hvac + + # Default Vault settings VAULT_URL_ENV_VAR = "VAULT_ADDR" VAULT_NAMESPACE_ENV_VAR = "VAULT_NAMESPACE" @@ -36,6 +54,7 @@ def __init__( ): super().__init__(logger=logger, **kwargs) + self._hvac = _load_hvac() self.vault_url = vault_url self.vault_namespace = vault_namespace self.vault_token = vault_token @@ -63,7 +82,7 @@ def vault_client(self) -> hvac.Client: vault_opts["token"] = vault_token try: - self._vault_client = hvac.Client(**vault_opts) + self._vault_client = self._hvac.Client(**vault_opts) if vault_token and self._vault_client.is_authenticated(): self._set_token_expiration() @@ -86,7 +105,7 @@ def vault_client(self) -> hvac.Client: if vault_namespace: vault_opts["namespace"] = vault_namespace - self._vault_client = hvac.Client(**vault_opts) + self._vault_client = self._hvac.Client(**vault_opts) self._vault_client.auth.approle.login( role_id=role_id, secret_id=secret_id, diff --git a/tests/connectors/test_aws_tools.py b/tests/connectors/test_aws_tools.py index b5dee1f..092be4f 100644 --- a/tests/connectors/test_aws_tools.py +++ b/tests/connectors/test_aws_tools.py @@ -1,20 +1,29 @@ -# ruff: noqa: I001 """Tests for AWS AI tools.""" from __future__ import annotations +import importlib.util + from unittest.mock import MagicMock, patch import pytest -pytest.importorskip("boto3") -pytest.importorskip("botocore") - # Patch target for AWSConnectorFull - must patch where it's imported AWS_CONNECTOR_PATCH = "extended_data.connectors.aws.AWSConnectorFull" +def test_aws_connector_requires_boto3_when_constructed_without_extra() -> None: + """AWS tool metadata imports without boto3, but the connector still requires the extra.""" + if importlib.util.find_spec("boto3") is not None: + pytest.skip("boto3 is installed") + + from extended_data.connectors.aws import AWSConnector + + with pytest.raises(ImportError, match=r"extended-data\[aws\]"): + AWSConnector(from_environment=False) + + class TestAWSToolDefinitions: """Test tool definitions and metadata.""" diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 55e62ca..f3c780f 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -358,6 +358,27 @@ def test_get_connector_info_includes_known_missing_builtin(self, monkeypatch): assert info["install"] == "pip install extended-data[github]" assert info["class"] == "GitHubConnector" + def test_lazy_builtin_with_missing_requirements_is_unavailable(self): + """Lazy-loadable built-ins still report unavailable when extras are missing.""" + registry.clear_cache() + + if not _has_module("boto3"): + info = registry.get_connector_info("aws") + + assert info["available"] is False + assert info["missing"] == ["boto3"] + + with pytest.raises(ImportError, match=r"extended-data\[aws\]"): + registry.get_connector_class("aws") + + def test_available_only_catalog_filters_missing_lazy_builtins(self): + """Available-only metadata excludes lazy built-ins with missing extras.""" + registry.clear_cache() + + info = registry.list_connector_info(include_unavailable=False) + + assert all(connector["available"] for connector in info) + def test_register_builtins_tracks_missing_optional_dependency(self, monkeypatch): """Built-in discovery remembers optional dependency import failures.""" monkeypatch.setattr(registry, "_missing_builtin_connectors", {}) diff --git a/tests/connectors/test_github_tools.py b/tests/connectors/test_github_tools.py index a65b5ef..592befb 100644 --- a/tests/connectors/test_github_tools.py +++ b/tests/connectors/test_github_tools.py @@ -1,19 +1,29 @@ -# ruff: noqa: I001 """Tests for GitHub AI tools.""" from __future__ import annotations +import importlib.util + from unittest.mock import MagicMock, patch import pytest -pytest.importorskip("github") - # Patch target for GitHubConnector - patch at source since tools.py imports lazily inside functions GITHUB_CONNECTOR_PATCH = "extended_data.connectors.github.GitHubConnector" +def test_github_connector_requires_pygithub_when_constructed_without_extra() -> None: + """GitHub tool metadata imports without PyGithub, but the connector still requires the extra.""" + if importlib.util.find_spec("github") is not None: + pytest.skip("PyGithub is installed") + + from extended_data.connectors.github import GitHubConnector + + with pytest.raises(ImportError, match=r"extended-data\[github\]"): + GitHubConnector(github_owner="jbcom", github_token="token", from_environment=False) + + class TestGitHubToolDefinitions: """Test tool definitions and metadata.""" diff --git a/tests/connectors/test_github_workflow_builder.py b/tests/connectors/test_github_workflow_builder.py index 98426a7..91775a3 100644 --- a/tests/connectors/test_github_workflow_builder.py +++ b/tests/connectors/test_github_workflow_builder.py @@ -1,14 +1,9 @@ -# ruff: noqa: I001 """Tests for GitHub workflow builder utility.""" from __future__ import annotations -import pytest - from ruamel.yaml import YAML -pytest.importorskip("github") - from extended_data.connectors.github import build_github_actions_workflow diff --git a/tests/connectors/test_google_tools.py b/tests/connectors/test_google_tools.py index 5982703..ccf89e8 100644 --- a/tests/connectors/test_google_tools.py +++ b/tests/connectors/test_google_tools.py @@ -1,20 +1,29 @@ -# ruff: noqa: I001 """Tests for Google AI tools.""" from __future__ import annotations +import importlib.util + from unittest.mock import MagicMock, patch import pytest -pytest.importorskip("google.oauth2.service_account") -pytest.importorskip("googleapiclient") - # Patch target for GoogleConnectorFull - must patch where it's imported GOOGLE_CONNECTOR_PATCH = "extended_data.connectors.google.GoogleConnectorFull" +def test_google_connector_requires_google_sdk_when_constructed_without_extra() -> None: + """Google tool metadata imports without Google SDKs, but the connector still requires the extra.""" + if importlib.util.find_spec("googleapiclient") is not None: + pytest.skip("google-api-python-client is installed") + + from extended_data.connectors.google import GoogleConnector + + with pytest.raises(ImportError, match=r"extended-data\[google\]"): + GoogleConnector(service_account_info={"type": "service_account"}, from_environment=False) + + class TestGoogleToolDefinitions: """Test tool definitions and metadata.""" diff --git a/tests/connectors/test_vault_tools.py b/tests/connectors/test_vault_tools.py index 01bc81e..34ddfbe 100644 --- a/tests/connectors/test_vault_tools.py +++ b/tests/connectors/test_vault_tools.py @@ -1,19 +1,29 @@ -# ruff: noqa: I001 """Tests for Vault AI tools.""" from __future__ import annotations +import importlib.util + from unittest.mock import MagicMock, patch import pytest -pytest.importorskip("hvac") - # Patch target for VaultConnector - must patch where it's used (in tools.py), not where it's defined VAULT_CONNECTOR_PATCH = "extended_data.connectors.vault.VaultConnector" +def test_vault_connector_requires_hvac_when_constructed_without_extra() -> None: + """Vault tool metadata imports without hvac, but the connector still requires the extra.""" + if importlib.util.find_spec("hvac") is not None: + pytest.skip("hvac is installed") + + from extended_data.connectors.vault import VaultConnector + + with pytest.raises(ImportError, match=r"extended-data\[vault\]"): + VaultConnector(from_environment=False) + + class TestVaultToolDefinitions: """Test tool definitions and metadata.""" From 2b18b7af4f5893d27ea1377995ae7b13d9e4bea6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:41:51 -0500 Subject: [PATCH 006/287] fix: lazy load slack sdk --- .../connectors/slack/__init__.py | 30 ++++++++++++++++--- tests/connectors/test_slack_connector.py | 14 +++++++-- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 746d78a..50bd19f 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -6,7 +6,7 @@ from collections.abc import Iterator, Mapping, Sequence from time import sleep -from typing import Any +from typing import TYPE_CHECKING, Any # batched was added in Python 3.12 @@ -22,14 +22,35 @@ def batched(iterable, n: int) -> Iterator[tuple]: yield batch -from slack_sdk.errors import SlackApiError -from slack_sdk.web import WebClient - from extended_data import is_nothing, wrap_raw_data_for_export +from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.logging import Logging +if TYPE_CHECKING: + from slack_sdk.errors import SlackApiError + from slack_sdk.web import WebClient +else: + WebClient = None + + class SlackApiError(Exception): + """Fallback exception used until slack-sdk is imported.""" + + +def _load_slack_sdk() -> None: + """Load slack-sdk lazily so tool metadata can import without the slack extra.""" + global SlackApiError, WebClient + + if WebClient is None: + try: + SlackApiError = require_extra("slack_sdk.errors", "slack").SlackApiError + WebClient = require_extra("slack_sdk.web", "slack").WebClient + except ImportError as exc: + msg = "slack-sdk is required for SlackConnector. Install with: pip install extended-data[slack]" + raise ImportError(msg) from exc + + # Settings MAX_RETRY_TIMEOUT_SECONDS = 30 @@ -172,6 +193,7 @@ def __init__( **kwargs: Extra keyword arguments forwarded to VendorConnectorBase. """ super().__init__(logger=logger, **kwargs) + _load_slack_sdk() self.token = token or self.get_input("SLACK_TOKEN", required=True) self.bot_token = bot_token or self.get_input("SLACK_BOT_TOKEN", required=True) diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index d488719..50af92f 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -1,17 +1,25 @@ -# ruff: noqa: I001 """Tests for SlackConnector.""" from __future__ import annotations +import importlib.util + from unittest.mock import MagicMock, patch import pytest -pytest.importorskip("slack_sdk") - from extended_data.connectors.slack import SlackConnector +def test_slack_connector_requires_slack_sdk_when_constructed_without_extra(): + """Slack tool metadata imports without slack-sdk, but the connector still requires the extra.""" + if importlib.util.find_spec("slack_sdk") is not None: + pytest.skip("slack-sdk is installed") + + with pytest.raises(ImportError, match=r"extended-data\[slack\]"): + SlackConnector(token="xoxp-test", bot_token="xoxb-test", from_environment=False) + + class TestSlackConnector: """Test suite for SlackConnector.""" From 1fc307d1de3785b8439e9447e86f02a78e3ef776 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:49:18 -0500 Subject: [PATCH 007/287] ci: expand connector typecheck gate --- .github/workflows/ci.yml | 19 +++++++++- README.md | 18 +++++++++- pyproject.toml | 1 + .../connectors/anthropic/__init__.py | 8 +++-- src/extended_data/connectors/base.py | 36 ++++++++++++------- src/extended_data/connectors/connectors.py | 16 ++++----- .../connectors/cursor/__init__.py | 4 +-- src/extended_data/connectors/meshy/jobs.py | 4 +-- .../meshy/persistence/vector_store.py | 12 +++++-- .../connectors/secrets/__init__.py | 11 +++--- src/extended_data/connectors/zoom/__init__.py | 4 +-- uv.lock | 2 ++ 12 files changed, 97 insertions(+), 38 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c5f85e0..d945c4b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,24 @@ jobs: enable-cache: true - run: uv sync --python 3.13 --extra tests --extra typing - run: uvx ruff check src tests - - run: uv run mypy src/extended_data/primitives src/extended_data/containers src/extended_data/io src/extended_data/inputs src/extended_data/logging + - run: > + uv run mypy + src/extended_data/primitives + src/extended_data/containers + src/extended_data/io + src/extended_data/inputs + src/extended_data/logging + src/extended_data/connectors/_optional.py + src/extended_data/connectors/base.py + src/extended_data/connectors/registry.py + src/extended_data/connectors/cli.py + src/extended_data/connectors/connectors.py + src/extended_data/connectors/secrets/__init__.py + src/extended_data/connectors/meshy/jobs.py + src/extended_data/connectors/meshy/persistence/vector_store.py + src/extended_data/connectors/zoom/__init__.py + src/extended_data/connectors/cursor/__init__.py + src/extended_data/connectors/anthropic/__init__.py - run: uv run pytest tests/core tests/logging tests/inputs - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py - run: uv build diff --git a/README.md b/README.md index 6294275..3b721ed 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,23 @@ More detail lives in [`docs/package-surface.md`](docs/package-surface.md). uv sync --extra tests --extra typing uv run pytest uv run ruff check src tests -uv run mypy src/extended_data/primitives src/extended_data/containers src/extended_data/io src/extended_data/inputs src/extended_data/logging +uv run mypy \ + src/extended_data/primitives \ + src/extended_data/containers \ + src/extended_data/io \ + src/extended_data/inputs \ + src/extended_data/logging \ + src/extended_data/connectors/_optional.py \ + src/extended_data/connectors/base.py \ + src/extended_data/connectors/registry.py \ + src/extended_data/connectors/cli.py \ + src/extended_data/connectors/connectors.py \ + src/extended_data/connectors/secrets/__init__.py \ + src/extended_data/connectors/meshy/jobs.py \ + src/extended_data/connectors/meshy/persistence/vector_store.py \ + src/extended_data/connectors/zoom/__init__.py \ + src/extended_data/connectors/cursor/__init__.py \ + src/extended_data/connectors/anthropic/__init__.py uv build ``` diff --git a/pyproject.toml b/pyproject.toml index 0304bb1..85806cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ dependencies = [ "sortedcontainers>=2.4.0", "tenacity>=8.4.1,<9.0.0", "tomlkit>=0.13.2", + "typing-extensions>=4.12.2", "validators>=0.22.0", "wrapt>=1.16.0", ] diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index 2a5a8a3..4fbdfbf 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -32,6 +32,8 @@ from enum import Enum from typing import TYPE_CHECKING, Any +import httpx + from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase @@ -225,8 +227,8 @@ def __init__( api_version: str = DEFAULT_API_VERSION, timeout: float = DEFAULT_TIMEOUT, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: super().__init__(api_key=api_key, logger=logger, timeout=timeout, **kwargs) # Validate API key @@ -266,7 +268,7 @@ def get_available_models() -> dict[str, str]: """ return CLAUDE_MODELS.copy() - def _handle_error(self, response) -> None: + def _handle_error(self, response: httpx.Response) -> None: """Handle API error responses. Args: diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index c1349a0..eca3c40 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -28,6 +28,7 @@ def my_operation(self) -> dict: from __future__ import annotations import builtins +import sys import threading import time @@ -47,8 +48,14 @@ def my_operation(self) -> dict: from extended_data.logging import Logging +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + if TYPE_CHECKING: from collections.abc import Callable + from types import TracebackType from langchain_core.tools import StructuredTool from pydantic import BaseModel @@ -110,8 +117,8 @@ def __init__( base_url: str | None = None, timeout: float | None = None, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: """Initialize the connector. Args: @@ -143,7 +150,7 @@ def __init__( # Tool registry for LangChain/MCP self._tools: list[StructuredTool] = [] - self._tool_functions: dict[str, Callable] = {} + self._tool_functions: dict[str, Callable[..., Any]] = {} self._tool_schemas: dict[str, builtins.type[BaseModel]] = {} @property @@ -167,11 +174,16 @@ def close(self) -> None: self._client.close() self._client = None - def __enter__(self): + def __enter__(self) -> Self: """Context manager entry.""" return self - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: builtins.type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> None: """Context manager exit - close client.""" self.close() @@ -228,7 +240,7 @@ def request( endpoint: str, *, headers: dict[str, str] | None = None, - **kwargs, + **kwargs: Any, ) -> httpx.Response: """Make HTTP request with retries and rate limiting. @@ -276,23 +288,23 @@ def request( return response - def get(self, endpoint: str, **kwargs) -> httpx.Response: + def get(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP GET request.""" return self.request("GET", endpoint, **kwargs) - def post(self, endpoint: str, **kwargs) -> httpx.Response: + def post(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP POST request.""" return self.request("POST", endpoint, **kwargs) - def put(self, endpoint: str, **kwargs) -> httpx.Response: + def put(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP PUT request.""" return self.request("PUT", endpoint, **kwargs) - def delete(self, endpoint: str, **kwargs) -> httpx.Response: + def delete(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP DELETE request.""" return self.request("DELETE", endpoint, **kwargs) - def patch(self, endpoint: str, **kwargs) -> httpx.Response: + def patch(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP PATCH request.""" return self.request("PATCH", endpoint, **kwargs) @@ -331,7 +343,7 @@ def download(self, url: str, output_path: str) -> int: def register_tool( self, - func: Callable, + func: Callable[..., Any], name: str | None = None, description: str | None = None, schema: builtins.type[BaseModel] | None = None, diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 5fb53b0..63f8d92 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -66,26 +66,26 @@ class ConnectorFabric(InputProvider): def __init__( self, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: super().__init__(**kwargs) self.logging = logger or Logging(logger_name=get_unique_signature(self)) self.logger = self.logging.logger # Client cache - nested dict for different client types and their params - self._client_cache: dict[str, dict[Any, Any]] = get_default_dict(levels=2) + self._client_cache: dict[str, dict[frozenset[tuple[str, Any]], Any]] = get_default_dict(levels=2) - def _get_cache_key(self, **kwargs) -> frozenset: + def _get_cache_key(self, **kwargs: Any) -> frozenset[tuple[str, Any]]: """Generate a hashable cache key from kwargs.""" hashable_kwargs = {k: make_hashable(v) for k, v in kwargs.items()} return frozenset(hashable_kwargs.items()) - def _get_cached_client(self, client_type: str, **kwargs) -> Any | None: + def _get_cached_client(self, client_type: str, **kwargs: Any) -> Any | None: """Retrieve a client from cache.""" cache_key = self._get_cache_key(**kwargs) return self._client_cache[client_type].get(cache_key) - def _set_cached_client(self, client_type: str, client: Any, **kwargs) -> None: + def _set_cached_client(self, client_type: str, client: Any, **kwargs: Any) -> None: """Store a client in cache.""" cache_key = self._get_cache_key(**kwargs) self._client_cache[client_type][cache_key] = client @@ -162,7 +162,7 @@ def get_aws_client( execution_role_arn: str | None = None, role_session_name: str | None = None, config: Config | None = None, - **client_args, + **client_args: Any, ) -> boto3.client: """Get a cached boto3 client.""" execution_role_arn = execution_role_arn or self.get_input("EXECUTION_ROLE_ARN", required=False) @@ -200,7 +200,7 @@ def get_aws_resource( execution_role_arn: str | None = None, role_session_name: str | None = None, config: Config | None = None, - **resource_args, + **resource_args: Any, ) -> ServiceResource: """Get a cached boto3 resource.""" execution_role_arn = execution_role_arn or self.get_input("EXECUTION_ROLE_ARN", required=False) diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index e5f5e8b..3f6bae6 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -344,8 +344,8 @@ def __init__( base_url: str | None = None, timeout: float = DEFAULT_TIMEOUT, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: super().__init__(api_key=api_key, base_url=base_url, logger=logger, timeout=timeout, **kwargs) # Validate API key diff --git a/src/extended_data/connectors/meshy/jobs.py b/src/extended_data/connectors/meshy/jobs.py index ea91963..c6db90b 100644 --- a/src/extended_data/connectors/meshy/jobs.py +++ b/src/extended_data/connectors/meshy/jobs.py @@ -31,9 +31,9 @@ class AssetManifest: task_id: str = "" polycount_target: int | None = None polycount_estimate: int | None = None - metadata: dict[str, Any] = None + metadata: dict[str, Any] | None = None - def __post_init__(self): + def __post_init__(self) -> None: if self.metadata is None: self.metadata = {} diff --git a/src/extended_data/connectors/meshy/persistence/vector_store.py b/src/extended_data/connectors/meshy/persistence/vector_store.py index 3410be6..d018833 100644 --- a/src/extended_data/connectors/meshy/persistence/vector_store.py +++ b/src/extended_data/connectors/meshy/persistence/vector_store.py @@ -46,9 +46,12 @@ from pathlib import Path from typing import TYPE_CHECKING, Any +from typing_extensions import Self + if TYPE_CHECKING: from collections.abc import Iterator + from types import TracebackType # Vector extension is optional _HAS_VECTOR = False @@ -522,10 +525,15 @@ def close(self) -> None: self._conn.close() self._conn = None - def __enter__(self): + def __enter__(self) -> Self: return self - def __exit__(self, *args): + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> None: self.close() diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 38602f0..72470c8 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -37,6 +37,7 @@ from dataclasses import dataclass, field from enum import Enum from pathlib import Path +from typing import Any from extended_data.connectors.base import VendorConnectorBase from extended_data.logging import Logging @@ -100,7 +101,7 @@ class SyncResult: diff_output: str = "" @classmethod - def from_native(cls, native_result) -> SyncResult: + def from_native(cls, native_result: Any) -> SyncResult: """Create from native gopy result.""" return cls( success=native_result.Success, @@ -117,7 +118,7 @@ def from_native(cls, native_result) -> SyncResult: ) @classmethod - def from_cli_output(cls, output: dict) -> SyncResult: + def from_cli_output(cls, output: dict[str, Any]) -> SyncResult: """Create from CLI JSON output.""" return cls( success=output.get("success", False), @@ -149,7 +150,7 @@ class ConfigInfo: aws_region: str = "" @classmethod - def from_native(cls, native_info) -> ConfigInfo: + def from_native(cls, native_info: Any) -> ConfigInfo: """Create from native gopy result.""" return cls( valid=native_info.Valid, @@ -188,8 +189,8 @@ def __init__( cli_path: str | None = None, prefer_native: bool = True, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: """Initialize the secrets connector. Args: diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index ed0ed1e..be9430a 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -25,8 +25,8 @@ def __init__( client_secret: str | None = None, account_id: str | None = None, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: super().__init__(logger=logger, **kwargs) self.errors: list[str] = [] # Track errors for programmatic access diff --git a/uv.lock b/uv.lock index 1d2047e..54af2ba 100644 --- a/uv.lock +++ b/uv.lock @@ -1200,6 +1200,7 @@ dependencies = [ { name = "sortedcontainers" }, { name = "tenacity" }, { name = "tomlkit" }, + { name = "typing-extensions" }, { name = "validators" }, { name = "wrapt" }, ] @@ -1404,6 +1405,7 @@ requires-dist = [ { name = "tomlkit", specifier = ">=0.13.2" }, { name = "types-pyyaml", marker = "extra == 'typing'", specifier = ">=6.0.12.20240724" }, { name = "types-requests", marker = "extra == 'typing'", specifier = ">=2.33.0.20260408" }, + { name = "typing-extensions", specifier = ">=4.12.2" }, { name = "uv", marker = "extra == 'ai'", specifier = ">=0.11.7" }, { name = "uv", marker = "extra == 'all'", specifier = ">=0.11.7" }, { name = "uv", marker = "extra == 'crewai'", specifier = ">=0.11.7" }, From df57bf007cb5519dbd5df4469292827b1896e5ab Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:55:14 -0500 Subject: [PATCH 008/287] ci: typecheck meshy slack and vault connectors --- .github/workflows/ci.yml | 8 +++ README.md | 10 +++- src/extended_data/connectors/mcp.py | 14 ++--- src/extended_data/connectors/meshy/base.py | 19 +++--- .../connectors/meshy/connector.py | 4 +- src/extended_data/connectors/meshy/mcp.py | 23 ++++---- src/extended_data/connectors/meshy/tools.py | 59 ++++++++++++++----- .../connectors/slack/__init__.py | 42 ++++++------- src/extended_data/connectors/slack/tools.py | 8 ++- .../connectors/vault/__init__.py | 24 ++++---- 10 files changed, 129 insertions(+), 82 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d945c4b..60dba63 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,9 +42,17 @@ jobs: src/extended_data/connectors/secrets/__init__.py src/extended_data/connectors/meshy/jobs.py src/extended_data/connectors/meshy/persistence/vector_store.py + src/extended_data/connectors/meshy/base.py + src/extended_data/connectors/meshy/tools.py + src/extended_data/connectors/meshy/mcp.py + src/extended_data/connectors/mcp.py + src/extended_data/connectors/meshy/connector.py src/extended_data/connectors/zoom/__init__.py src/extended_data/connectors/cursor/__init__.py src/extended_data/connectors/anthropic/__init__.py + src/extended_data/connectors/slack/__init__.py + src/extended_data/connectors/slack/tools.py + src/extended_data/connectors/vault/__init__.py - run: uv run pytest tests/core tests/logging tests/inputs - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py - run: uv build diff --git a/README.md b/README.md index 3b721ed..a6d2ac5 100644 --- a/README.md +++ b/README.md @@ -120,9 +120,17 @@ uv run mypy \ src/extended_data/connectors/secrets/__init__.py \ src/extended_data/connectors/meshy/jobs.py \ src/extended_data/connectors/meshy/persistence/vector_store.py \ + src/extended_data/connectors/meshy/base.py \ + src/extended_data/connectors/meshy/tools.py \ + src/extended_data/connectors/meshy/mcp.py \ + src/extended_data/connectors/mcp.py \ + src/extended_data/connectors/meshy/connector.py \ src/extended_data/connectors/zoom/__init__.py \ src/extended_data/connectors/cursor/__init__.py \ - src/extended_data/connectors/anthropic/__init__.py + src/extended_data/connectors/anthropic/__init__.py \ + src/extended_data/connectors/slack/__init__.py \ + src/extended_data/connectors/slack/tools.py \ + src/extended_data/connectors/vault/__init__.py uv build ``` diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 994c79b..c9e9b21 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -41,7 +41,7 @@ def _check_mcp_installed() -> bool: return False -def _get_method_schema(method: Callable) -> dict[str, Any]: +def _get_method_schema(method: Callable[..., Any]) -> dict[str, Any]: """Generate JSON schema from method signature.""" sig = inspect.signature(method) properties = {} @@ -90,7 +90,7 @@ def _get_method_schema(method: Callable) -> dict[str, Any]: } -def _get_public_methods(connector_class: builtins.type[Any]) -> list[tuple[str, Callable]]: +def _get_public_methods(connector_class: builtins.type[Any]) -> list[tuple[str, Callable[..., Any]]]: """Get public methods from a connector class (excluding dunder and private).""" methods = [] for name in dir(connector_class): @@ -102,7 +102,7 @@ def _get_public_methods(connector_class: builtins.type[Any]) -> list[tuple[str, return methods -def create_server(): +def create_server() -> Any: """Create the unified MCP server with all registered connectors.""" try: from mcp.server import Server @@ -148,7 +148,7 @@ def create_server(): "parameters": schema, } - @server.list_tools() + @server.list_tools() # type: ignore[untyped-decorator] async def list_tools() -> list[Tool]: """Return all available tools.""" return [ @@ -156,8 +156,8 @@ async def list_tools() -> list[Tool]: for name, tool in tools.items() ] - @server.call_tool() - async def call_tool(name: str, arguments: dict) -> list[TextContent]: + @server.call_tool() # type: ignore[untyped-decorator] + async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: """Execute a tool and return results.""" if name not in tools: return [TextContent(type="text", text=f"Unknown tool: {name}")] @@ -203,7 +203,7 @@ def main() -> int: server = create_server() - async def run(): + async def run() -> None: async with stdio_server() as (read_stream, write_stream): await server.run(read_stream, write_stream, server.create_initialization_options()) diff --git a/src/extended_data/connectors/meshy/base.py b/src/extended_data/connectors/meshy/base.py index 893b0cb..fb8b104 100644 --- a/src/extended_data/connectors/meshy/base.py +++ b/src/extended_data/connectors/meshy/base.py @@ -11,8 +11,11 @@ from __future__ import annotations +import threading import time +from typing import Any + import httpx from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential @@ -36,6 +39,7 @@ def __init__(self, message: str, status_code: int | None = None): _client: httpx.Client | None = None _inputs: InputProvider | None = None _last_request_time: float = 0 +_rate_limit_lock = threading.Lock() _min_request_interval: float = 0.5 # 500ms between requests BASE_URL = "https://api.meshy.ai" @@ -49,7 +53,7 @@ def _get_inputs() -> InputProvider: return _inputs -def configure(api_key: str | None = None, **kwargs) -> None: +def configure(api_key: str | None = None, **kwargs: Any) -> None: """Configure Meshy API credentials. Args: @@ -79,7 +83,7 @@ def get_client() -> httpx.Client: return _client -def close(): +def close() -> None: """Close the HTTP client.""" global _client if _client: @@ -87,14 +91,9 @@ def close(): _client = None -def _rate_limit(): +def _rate_limit() -> None: """Simple rate limiting with thread safety.""" - import threading - - global _last_request_time, _rate_limit_lock - - if "_rate_limit_lock" not in globals(): - _rate_limit_lock = threading.Lock() + global _last_request_time with _rate_limit_lock: now = time.time() @@ -122,7 +121,7 @@ def request( endpoint: str, *, version: str = "v2", - **kwargs, + **kwargs: Any, ) -> httpx.Response: """Make HTTP request with retries and rate limiting. diff --git a/src/extended_data/connectors/meshy/connector.py b/src/extended_data/connectors/meshy/connector.py index 010db0c..5e3abd5 100644 --- a/src/extended_data/connectors/meshy/connector.py +++ b/src/extended_data/connectors/meshy/connector.py @@ -26,8 +26,8 @@ def __init__( api_key: str | None = None, base_url: str | None = None, timeout: float = 300.0, - **kwargs, - ): + **kwargs: Any, + ) -> None: super().__init__(api_key=api_key, base_url=base_url, timeout=timeout, **kwargs) def text3d_generate( diff --git a/src/extended_data/connectors/meshy/mcp.py b/src/extended_data/connectors/meshy/mcp.py index 2c8e5b2..e31ecc2 100644 --- a/src/extended_data/connectors/meshy/mcp.py +++ b/src/extended_data/connectors/meshy/mcp.py @@ -31,13 +31,14 @@ import json +from collections.abc import Callable from typing import Any MCP_INSTALL_MESSAGE = "MCP SDK not installed. Install with: pip install extended-data[meshy,mcp]" -def _create_mcp_tools() -> list[Any]: +def _create_mcp_tools() -> list[tuple[Any, Callable[..., Any]]]: """Create MCP tool definitions from Meshy functions. Returns: @@ -52,7 +53,7 @@ def _create_mcp_tools() -> list[Any]: from extended_data.connectors.meshy import tools # Define tool schemas manually for better control - tool_schemas = [ + tool_schemas: list[dict[str, Any]] = [ { "name": "text3d_generate", "description": ( @@ -219,8 +220,8 @@ def _create_mcp_tools() -> list[Any]: mcp_tools = [] for schema in tool_schemas: # Build JSON schema properties and required list - properties = {} - required = [] + properties: dict[str, Any] = {} + required: list[str] = [] for param_name, param_def in schema["parameters"].items(): prop = { @@ -253,7 +254,7 @@ def _create_mcp_tools() -> list[Any]: return mcp_tools -def create_server(): +def create_server() -> Any: """Create an MCP server with Meshy AI tools. Returns: @@ -275,12 +276,12 @@ def create_server(): tool_list = [tool for tool, _ in mcp_tools] # Register tools - @server.list_tools() - async def list_tools(): + @server.list_tools() # type: ignore[untyped-decorator] + async def list_tools() -> list[Any]: return tool_list # Handle tool calls - @server.call_tool() + @server.call_tool() # type: ignore[untyped-decorator] async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]: from mcp.types import TextContent @@ -307,7 +308,7 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]: return server -def run_server(server=None): +def run_server(server: Any | None = None) -> None: """Run the MCP server. Args: @@ -323,7 +324,7 @@ def run_server(server=None): if server is None: server = create_server() - async def main(): + async def main() -> None: async with stdio_server() as (read_stream, write_stream): await server.run( read_stream, @@ -334,7 +335,7 @@ async def main(): asyncio.run(main()) -def main(): +def main() -> None: """Entry point for the MCP server.""" run_server() diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index b68c3b1..59acb2f 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -7,6 +7,7 @@ from __future__ import annotations +from collections.abc import Callable from typing import Any from pydantic import BaseModel, Field @@ -156,6 +157,13 @@ def text3d_generate( wait=True, ) + if isinstance(result, str): + return { + "task_id": result, + "status": "pending", + "message": "Text-to-3D task submitted", + } + fields = _extract_result_fields(result) return { "task_id": result.id, @@ -190,6 +198,13 @@ def image3d_generate( wait=True, ) + if isinstance(result, str): + return { + "task_id": result, + "status": "pending", + "message": "Image-to-3D task submitted", + } + fields = _extract_result_fields(result) return { "task_id": result.id, @@ -211,6 +226,13 @@ def rig_model(model_id: str, wait: bool = True) -> dict[str, Any]: result = rigging.rig(model_id, wait=wait) + if isinstance(result, str): + return { + "task_id": result, + "status": "pending", + "message": "Rigging task submitted", + } + if wait: return { "task_id": result.id, @@ -218,11 +240,8 @@ def rig_model(model_id: str, wait: bool = True) -> dict[str, Any]: "message": "Rigging completed", } - return { - "task_id": result, # task_id string when wait=False - "status": "pending", - "message": "Rigging task submitted", - } + msg = "Expected rigging task id when wait=False" + raise TypeError(msg) def apply_animation(model_id: str, animation_id: int, wait: bool = True) -> dict[str, Any]: @@ -240,6 +259,13 @@ def apply_animation(model_id: str, animation_id: int, wait: bool = True) -> dict result = animate.apply(model_id, int(animation_id), wait=wait) + if isinstance(result, str): + return { + "task_id": result, + "status": "pending", + "message": "Animation task submitted", + } + if wait: return { "task_id": result.id, @@ -248,11 +274,8 @@ def apply_animation(model_id: str, animation_id: int, wait: bool = True) -> dict "glb_url": result.animation_glb_url, } - return { - "task_id": result, # task_id string when wait=False - "status": "pending", - "message": "Animation task submitted", - } + msg = "Expected animation task id when wait=False" + raise TypeError(msg) def retexture_model( @@ -281,6 +304,13 @@ def retexture_model( wait=wait, ) + if isinstance(result, str): + return { + "task_id": result, + "status": "pending", + "message": "Retexture task submitted", + } + if wait: return { "task_id": result.id, @@ -289,11 +319,8 @@ def retexture_model( "model_url": getattr(result, "model_url", None), } - return { - "task_id": result, # task_id string when wait=False - "status": "pending", - "message": "Retexture task submitted", - } + msg = "Expected retexture task id when wait=False" + raise TypeError(msg) def list_animations(category: str = "", limit: int = 50) -> dict[str, Any]: @@ -344,7 +371,7 @@ def check_task_status(task_id: str, task_type: str = "text-to-3d") -> dict[str, from extended_data.connectors.meshy import animate, image3d, retexture, rigging, text3d # Call the appropriate get function based on task type - get_funcs = { + get_funcs: dict[str, Callable[[str], Any]] = { "text-to-3d": text3d.get, "image-to-3d": image3d.get, "rigging": rigging.get, diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 50bd19f..206e837 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -4,7 +4,7 @@ import sys -from collections.abc import Iterator, Mapping, Sequence +from collections.abc import Iterable, Iterator, Mapping, Sequence from time import sleep from typing import TYPE_CHECKING, Any @@ -15,7 +15,7 @@ else: from itertools import islice - def batched(iterable, n: int) -> Iterator[tuple]: + def batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: """Batch an iterable into chunks of size n for Python < 3.12.""" it = iter(iterable) while batch := tuple(islice(it, n)): @@ -58,7 +58,7 @@ def _load_slack_sdk() -> None: class SlackAPIError(RuntimeError): """Slack API error wrapper.""" - def __init__(self, response): + def __init__(self, response: Any) -> None: self.response = response self.status_code = response.status_code if hasattr(response, "status_code") else None super().__init__(f"Slack API error: {response}") @@ -88,7 +88,7 @@ def get_header_block(field_title: str) -> list[dict[str, Any]]: ] -def get_field_context_message_blocks(field_name: str, context_data: Mapping) -> list[dict[str, Any]]: +def get_field_context_message_blocks(field_name: str, context_data: Mapping[str, Any]) -> list[dict[str, Any]]: """Build header and context blocks for detailed field data. Args: @@ -99,13 +99,13 @@ def get_field_context_message_blocks(field_name: str, context_data: Mapping) -> list[dict[str, Any]]: Blocks describing the field data. """ field_title = field_name.title() - blocks = [ + blocks: list[dict[str, Any]] = [ {"type": "header", "text": {"type": "plain_text", "text": field_title}}, get_divider(), ] for field_keys in batched(context_data.keys(), 10): - context_elements = [] + context_elements: list[dict[str, str]] = [] for field_key in field_keys: field_value = context_data.get(field_key) if is_nothing(field_value): @@ -156,7 +156,7 @@ def get_rich_text_blocks( Returns: list[dict[str, Any]]: Rich-text block followed by a divider. """ - style = {} + style: dict[str, bool] = {} if bold: style["bold"] = True if italic: @@ -164,9 +164,9 @@ def get_rich_text_blocks( if strike: style["strike"] = True - elements = [] + elements: list[dict[str, Any]] = [] for line in lines: - element = {"type": "text", "text": line} + element: dict[str, Any] = {"type": "text", "text": line} if not is_nothing(style): element["style"] = style elements.append(element) @@ -182,8 +182,8 @@ def __init__( token: str | None = None, bot_token: str | None = None, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: """Initialize the Slack connector. Args: @@ -213,7 +213,7 @@ def _normalize_identifier_filter( Returns: Optional[set[str]]: Unique identifier set, or None when not provided. """ - if is_nothing(identifiers): + if identifiers is None or is_nothing(identifiers): return None if isinstance(identifiers, str): @@ -228,14 +228,14 @@ def send_message( self, channel_name: str, text: str, - blocks: list | None = None, + blocks: list[dict[str, Any]] | None = None, lines: list[str] | None = None, bold: bool = False, italic: bool = False, strike: bool = False, thread_id: str | None = None, raise_on_api_error: bool = True, - ): + ) -> Any: """Send a message to a Slack channel using the bot token. Args: @@ -283,7 +283,7 @@ def send_message( raise SlackAPIError(exc.response) from exc return exc.response - def get_bot_channels(self) -> dict[str, dict]: + def get_bot_channels(self) -> dict[str, dict[str, Any]]: """Return channels the bot account is a member of. Returns: @@ -305,7 +305,7 @@ def list_users( include_deleted: bool | None = None, include_bots: bool | None = None, include_app_users: bool | None = None, - **kwargs, + **kwargs: Any, ) -> dict[str, dict[str, Any]]: """List Slack users with optional filtering flags. @@ -365,7 +365,7 @@ def list_usergroups( include_users: bool | None = None, team_id: str | None = None, usergroup_ids: str | Sequence[str] | None = None, - **kwargs, + **kwargs: Any, ) -> dict[str, dict[str, Any]]: """List Slack user groups with optional filtering. @@ -417,7 +417,7 @@ def list_conversations( types: str | Sequence[str] | None = None, get_members: bool | None = None, channels_only: bool | None = None, - **kwargs, + **kwargs: Any, ) -> dict[str, dict[str, Any]]: """List Slack conversations with optional filtering. @@ -473,7 +473,7 @@ def _call_api( method: str, group_by: str | None = None, id_field_name: str = "id", - **kwargs, + **kwargs: Any, ) -> Any: """Call a Slack WebClient method with retry and grouping support. @@ -495,7 +495,7 @@ def _call_api( if call is None: raise AttributeError(f"{method} is not supported by the Slack WebClient") - response = None + response: Any | None = None attempt = 1 total_delay = 0 @@ -518,7 +518,7 @@ def _call_api( if is_nothing(response) or is_nothing(group_by): return response - grouped = {} + grouped: dict[str, dict[str, Any]] = {} for datum in response.get(group_by, {}): datum_id = datum.get(id_field_name) if is_nothing(datum_id): diff --git a/src/extended_data/connectors/slack/tools.py b/src/extended_data/connectors/slack/tools.py index 7ecaf5c..9588cbf 100644 --- a/src/extended_data/connectors/slack/tools.py +++ b/src/extended_data/connectors/slack/tools.py @@ -25,11 +25,15 @@ import os -from typing import Any +from typing import TYPE_CHECKING, Any from pydantic import BaseModel, Field +if TYPE_CHECKING: + from extended_data.connectors.slack import SlackConnector + + # ============================================================================= # Input Schemas # ============================================================================= @@ -71,7 +75,7 @@ class GetChannelHistorySchema(BaseModel): # ============================================================================= -def _get_connector(): +def _get_connector() -> SlackConnector: """Create a SlackConnector with tokens from environment variables. The slack_sdk WebClient only falls back to environment variables when diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index c474c03..137e0b1 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -50,8 +50,8 @@ def __init__( vault_namespace: str | None = None, vault_token: str | None = None, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: super().__init__(logger=logger, **kwargs) self._hvac = _load_hvac() @@ -75,7 +75,7 @@ def vault_client(self) -> hvac.Client: vault_namespace = self.vault_namespace or self.get_input(VAULT_NAMESPACE_ENV_VAR, required=False) vault_token = self.vault_token or self.get_input("VAULT_TOKEN", required=False) - vault_opts: dict = {"url": vault_url} + vault_opts: dict[str, Any] = {"url": vault_url} if vault_namespace: vault_opts["namespace"] = vault_namespace if vault_token: @@ -125,7 +125,7 @@ def vault_client(self) -> hvac.Client: msg = "Vault authentication failed: no valid token or AppRole credentials provided" raise RuntimeError(msg) - def _set_token_expiration(self): + def _set_token_expiration(self) -> None: """Set the token expiration time.""" if self._vault_client is None: return @@ -162,7 +162,7 @@ def get_vault_client( vault_url: str | None = None, vault_namespace: str | None = None, vault_token: str | None = None, - **kwargs, + **kwargs: Any, ) -> hvac.Client: """Get an instance of the Vault client.""" instance = cls(vault_url, vault_namespace, vault_token, **kwargs) @@ -173,7 +173,7 @@ def list_secrets( root_path: str = "/", mount_point: str = "secret", max_depth: int | None = None, - ) -> dict[str, dict]: + ) -> dict[str, dict[str, Any]]: """List secrets recursively from Vault KV v2 engine. Args: @@ -195,7 +195,7 @@ def list_secrets( display_root = root_path if root_path not in (None, "", "/") else "/" self.logger.info(f"Listing Vault secrets from {mount_point}{display_root}") - secrets: dict[str, dict] = {} + secrets: dict[str, dict[str, Any]] = {} client = self.vault_client normalized_root = (root_path or "").strip("/") @@ -253,7 +253,7 @@ def read_secret( self, path: str, mount_point: str = "secret", - ) -> dict | None: + ) -> dict[str, Any] | None: """Read a single secret from Vault. Args: @@ -279,7 +279,7 @@ def get_secret( secret_name: str | None = None, matchers: dict[str, str] | None = None, mount_point: str = "secret", - ) -> dict | None: + ) -> dict[str, Any] | None: """Get Vault secret by path, name, or by searching with matchers. This method supports three modes: @@ -347,7 +347,7 @@ def get_secret( continue # If no matchers, take the first non-empty secret - if is_nothing(matchers): + if matchers is None or is_nothing(matchers): self.logger.warning("No matchers provided, taking the first non-empty secret found") secret_data = matching_secret_data continue @@ -369,7 +369,7 @@ def get_secret( def write_secret( self, path: str, - data: dict, + data: dict[str, Any], mount_point: str = "secret", ) -> bool: """Write a secret to Vault. @@ -434,7 +434,7 @@ def get_aws_iam_role( self, role_name: str, mount_point: str = "aws", - ) -> dict | None: + ) -> dict[str, Any] | None: """Retrieve details about a specific AWS IAM role configured in Vault. Args: From cf3880679cc988ca8926fbfb1ef5d41ddcd82c6e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 00:57:46 -0500 Subject: [PATCH 009/287] ci: typecheck github connector --- .github/workflows/ci.yml | 2 + README.md | 4 +- .../connectors/github/__init__.py | 77 ++++++++++--------- src/extended_data/connectors/github/tools.py | 12 +-- 4 files changed, 51 insertions(+), 44 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 60dba63..e75e473 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,8 @@ jobs: src/extended_data/connectors/slack/__init__.py src/extended_data/connectors/slack/tools.py src/extended_data/connectors/vault/__init__.py + src/extended_data/connectors/github/__init__.py + src/extended_data/connectors/github/tools.py - run: uv run pytest tests/core tests/logging tests/inputs - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py - run: uv build diff --git a/README.md b/README.md index a6d2ac5..bf4b2bd 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,9 @@ uv run mypy \ src/extended_data/connectors/anthropic/__init__.py \ src/extended_data/connectors/slack/__init__.py \ src/extended_data/connectors/slack/tools.py \ - src/extended_data/connectors/vault/__init__.py + src/extended_data/connectors/vault/__init__.py \ + src/extended_data/connectors/github/__init__.py \ + src/extended_data/connectors/github/tools.py uv build ``` diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index 55a6f2d..c83b238 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -39,7 +39,7 @@ class GitHubFallbackError(Exception): UnknownObjectException = GitHubFallbackError -FilePath = str | bytes | os.PathLike[Any] +FilePath = str | os.PathLike[str] def _load_github_sdk() -> None: @@ -82,8 +82,8 @@ def __init__( github_token: str | None = None, per_page: int = DEFAULT_PER_PAGE, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: super().__init__(logger=logger, **kwargs) _load_github_sdk() @@ -110,7 +110,7 @@ def __init__( self.graphql_client = GraphqlClient(endpoint="https://api.github.com/graphql") - def get_repository_branch(self, branch_name: str): + def get_repository_branch(self, branch_name: str) -> Any | None: """Get a repository branch by name.""" if self.repo is None: self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot get branch {branch_name}") @@ -122,14 +122,14 @@ def get_repository_branch(self, branch_name: str): self.logger.warning(f"{branch_name} does not yet exist") return None - def create_repository_branch(self, branch_name: str, parent_branch: str | None = None): + def create_repository_branch(self, branch_name: str, parent_branch: str | None = None) -> Any | None: """Create a new repository branch.""" if self.repo is None: self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot create branch {branch_name}") return None parent_branch_ref = self.get_repository_branch(parent_branch or self.repo.default_branch) - if is_nothing(parent_branch_ref): + if parent_branch_ref is None or is_nothing(parent_branch_ref): raise RuntimeError( f"Cannot create Git branch {branch_name}, parent branch {parent_branch} does not yet exist" ) @@ -155,19 +155,20 @@ def get_repository_file( charset: str | None = "utf-8", errors: str | None = "strict", raise_on_not_found: bool = False, - ): + ) -> Any: """Get a file from the repository.""" + file_path_text = os.fspath(file_path) if self.repo is None: - self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot get file {file_path}") + self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot get file {file_path_text}") return None - def state_negative_result(result: str): + def state_negative_result(result: str) -> None: self.logger.warning(result) if raise_on_not_found: raise FileNotFoundError(result) - def get_retval(d: str | None, s: str | None, p: FilePath): - retval = [d] + def get_retval(d: Any, s: str | None, p: str) -> Any: + retval: list[Any] = [d] if return_sha: retval.append(s) if return_path: @@ -176,29 +177,29 @@ def get_retval(d: str | None, s: str | None, p: FilePath): return retval[0] return tuple(retval) - file_data = {} if decode else "" + file_data: Any = {} if decode else "" file_sha = None - self.logger.debug(f"Getting repository file: {file_path}") + self.logger.debug(f"Getting repository file: {file_path_text}") try: - raw_file_data = self.repo.get_contents(str(file_path), ref=self.GITHUB_BRANCH) + raw_file_data = self.repo.get_contents(file_path_text, ref=self.GITHUB_BRANCH) file_sha = raw_file_data.sha if is_nothing(raw_file_data.content): - self.logger.warning(f"{file_path} is empty of content: {self.GITHUB_BRANCH}") + self.logger.warning(f"{file_path_text} is empty of content: {self.GITHUB_BRANCH}") else: file_data = raw_file_data.decoded_content.decode(charset, errors) except (UnknownObjectException, AttributeError): - state_negative_result(f"{file_path} does not exist") + state_negative_result(f"{file_path_text} does not exist") except ValueError as exc: - self.logger.warning(f"Reading {file_path} not supported: {exc}") + self.logger.warning(f"Reading {file_path_text} not supported: {exc}") decode = False if not decode or is_nothing(file_data): - return get_retval(file_data, file_sha, file_path) + return get_retval(file_data, file_sha, file_path_text) # Decode file content based on file type - encoding = get_encoding_for_file_path(file_path) + encoding = get_encoding_for_file_path(file_path_text) try: if encoding == "json": decoded_data = decode_json(file_data) @@ -208,10 +209,10 @@ def get_retval(d: str | None, s: str | None, p: FilePath): # For raw or unknown types, return the string as-is decoded_data = file_data except Exception as exc: - self.logger.warning(f"Failed to decode {file_path} as {encoding}: {exc}") + self.logger.warning(f"Failed to decode {file_path_text} as {encoding}: {exc}") decoded_data = file_data - return get_retval(decoded_data, file_sha, file_path) + return get_retval(decoded_data, file_sha, file_path_text) def update_repository_file( self, @@ -222,63 +223,65 @@ def update_repository_file( allow_encoding: bool | str | None = None, allow_empty: bool = False, **format_opts: Any, - ): + ) -> Any | None: """Update a file in the repository.""" + file_path_text = os.fspath(file_path) if self.repo is None: - self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot update file {file_path}") + self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot update file {file_path_text}") return None if is_nothing(file_data) and not allow_empty: - self.logger.warning(f"Empty file data for {file_path} not allowed") + self.logger.warning(f"Empty file data for {file_path_text} not allowed") return None if msg: self.logger.info(msg) if allow_encoding is None: - allow_encoding = get_encoding_for_file_path(file_path) + allow_encoding = get_encoding_for_file_path(file_path_text) file_data = wrap_raw_data_for_export(file_data, allow_encoding=allow_encoding, **format_opts) if not isinstance(file_data, str): file_data = str(file_data) - self.logger.info(f"Updating repository file: {file_path}") + self.logger.info(f"Updating repository file: {file_path_text}") if file_sha is None: - result = self.get_repository_file(file_path, return_sha=True) + result = self.get_repository_file(file_path_text, return_sha=True) if isinstance(result, tuple): _, file_sha = result if file_sha is None: if msg is None: - msg = f"Creating {file_path}" + msg = f"Creating {file_path_text}" return self.repo.create_file( - path=str(file_path), + path=file_path_text, message=msg, branch=self.GITHUB_BRANCH, content=file_data, ) else: if msg is None: - msg = f"Updating {file_path}" + msg = f"Updating {file_path_text}" return self.repo.update_file( - path=str(file_path), + path=file_path_text, message=msg, content=file_data, sha=file_sha, branch=self.GITHUB_BRANCH, ) - def delete_repository_file(self, file_path: FilePath, msg: str | None = None): + def delete_repository_file(self, file_path: FilePath, msg: str | None = None) -> Any | None: """Delete a file from the repository.""" + file_path_text = os.fspath(file_path) if self.repo is None: - self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot delete file {file_path}") + self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot delete file {file_path_text}") return None - self.logger.info(f"Deleting repository file: {file_path}") + self.logger.info(f"Deleting repository file: {file_path_text}") - result = self.get_repository_file(file_path=file_path, return_sha=True) + result = self.get_repository_file(file_path=file_path_text, return_sha=True) sha = None if isinstance(result, tuple): _, sha = result @@ -287,10 +290,10 @@ def delete_repository_file(self, file_path: FilePath, msg: str | None = None): return None if msg is None: - msg = f"Deleting {file_path}" + msg = f"Deleting {file_path_text}" return self.repo.delete_file( - path=str(file_path), + path=file_path_text, message=msg, branch=self.GITHUB_BRANCH, sha=sha, diff --git a/src/extended_data/connectors/github/tools.py b/src/extended_data/connectors/github/tools.py index 527c720..553252e 100644 --- a/src/extended_data/connectors/github/tools.py +++ b/src/extended_data/connectors/github/tools.py @@ -79,7 +79,7 @@ def list_repositories( type_filter: str = "all", include_branches: bool = False, github_token: str | None = None, - **kwargs, + **kwargs: Any, ) -> list[dict[str, Any]]: """List repositories in a GitHub organization. @@ -109,7 +109,7 @@ def get_repository( github_owner: str, repo_name: str, github_token: str | None = None, - **kwargs, + **kwargs: Any, ) -> dict[str, Any]: """Get details of a specific GitHub repository. @@ -137,7 +137,7 @@ def list_teams( include_members: bool = False, include_repos: bool = False, github_token: str | None = None, - **kwargs, + **kwargs: Any, ) -> list[dict[str, Any]]: """List teams in a GitHub organization. @@ -161,7 +161,7 @@ def get_team( github_owner: str, team_slug: str, github_token: str | None = None, - **kwargs, + **kwargs: Any, ) -> dict[str, Any]: """Get details of a specific GitHub team. @@ -189,7 +189,7 @@ def list_org_members( role: str = "member", include_pending: bool = False, github_token: str | None = None, - **kwargs, + **kwargs: Any, ) -> list[dict[str, Any]]: """List members of a GitHub organization. @@ -215,7 +215,7 @@ def get_repository_file( file_path: str, github_branch: str | None = None, github_token: str | None = None, - **kwargs, + **kwargs: Any, ) -> dict[str, Any]: """Get a file from a GitHub repository. From 09b2487a8c36e5414382dd3e02aaf67ca6e534b1 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:02:54 -0500 Subject: [PATCH 010/287] ci: typecheck aws connectors --- .github/workflows/ci.yml | 5 +++ README.md | 7 +++- src/extended_data/connectors/aws/__init__.py | 37 ++++++++++++------- .../connectors/aws/organizations.py | 24 ++++++++++-- src/extended_data/connectors/aws/s3.py | 26 ++++++++++++- src/extended_data/connectors/aws/sso.py | 13 +++++++ src/extended_data/connectors/aws/tools.py | 33 +++++++++++------ 7 files changed, 114 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e75e473..db6f333 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,6 +55,11 @@ jobs: src/extended_data/connectors/vault/__init__.py src/extended_data/connectors/github/__init__.py src/extended_data/connectors/github/tools.py + src/extended_data/connectors/aws/__init__.py + src/extended_data/connectors/aws/tools.py + src/extended_data/connectors/aws/s3.py + src/extended_data/connectors/aws/sso.py + src/extended_data/connectors/aws/organizations.py - run: uv run pytest tests/core tests/logging tests/inputs - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py - run: uv build diff --git a/README.md b/README.md index bf4b2bd..2ab456b 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,12 @@ uv run mypy \ src/extended_data/connectors/slack/tools.py \ src/extended_data/connectors/vault/__init__.py \ src/extended_data/connectors/github/__init__.py \ - src/extended_data/connectors/github/tools.py + src/extended_data/connectors/github/tools.py \ + src/extended_data/connectors/aws/__init__.py \ + src/extended_data/connectors/aws/tools.py \ + src/extended_data/connectors/aws/s3.py \ + src/extended_data/connectors/aws/sso.py \ + src/extended_data/connectors/aws/organizations.py uv build ``` diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 614b346..e4a0681 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -24,6 +24,9 @@ from extended_data.logging import Logging +AWSSecretValue = str | dict[str, Any] | None + + if TYPE_CHECKING: import boto3 @@ -66,8 +69,8 @@ def __init__( self, execution_role_arn: str | None = None, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: super().__init__(logger=logger, **kwargs) self._boto3 = _load_aws_sdk() self.execution_role_arn = execution_role_arn @@ -160,7 +163,7 @@ def get_aws_client( execution_role_arn: str | None = None, role_session_name: str | None = None, config: Config | None = None, - **client_args, + **client_args: Any, ) -> boto3.client: """Get a boto3 client for the specified service. @@ -185,7 +188,7 @@ def get_aws_resource( execution_role_arn: str | None = None, role_session_name: str | None = None, config: Config | None = None, - **resource_args, + **resource_args: Any, ) -> ServiceResource: """Get a boto3 resource for the specified service. @@ -275,14 +278,14 @@ def get_secret( def list_secrets( self, - filters: list[dict] | None = None, + filters: list[dict[str, Any]] | None = None, prefix: str | None = None, get_secret_values: bool = False, skip_empty_secrets: bool = False, execution_role_arn: str | None = None, role_session_name: str | None = None, - **kwargs, - ) -> dict[str, str | dict]: + **kwargs: Any, + ) -> dict[str, AWSSecretValue]: """List secrets from AWS Secrets Manager. Args: @@ -318,16 +321,16 @@ def list_secrets( role_session_name=role_session_name, ) - secrets: dict[str, str | dict] = {} + secrets: dict[str, AWSSecretValue] = {} paginator = secretsmanager.get_paginator("list_secrets") - effective_filters: list[dict] = [] + effective_filters: list[dict[str, Any]] = [] if filters: effective_filters.extend(filters) if prefix: effective_filters.append({"Key": "name", "Values": [prefix]}) - paginate_kwargs: dict = {"IncludePlannedDeletion": False} + paginate_kwargs: dict[str, Any] = {"IncludePlannedDeletion": False} if effective_filters: paginate_kwargs["Filters"] = effective_filters @@ -465,7 +468,7 @@ def delete_secrets_matching( force_delete: bool = False, dry_run: bool = True, execution_role_arn: str | None = None, - **kwargs, + **kwargs: Any, ) -> list[str]: """Delete all secrets that match the provided name prefix.""" prefix = prefix or kwargs.get("name_prefix") @@ -514,7 +517,7 @@ def delete_secrets_matching( def copy_secrets_to_s3( self, - secrets: dict[str, str | dict], + secrets: dict[str, AWSSecretValue], bucket: str, key: str, execution_role_arn: str | None = None, @@ -599,7 +602,15 @@ def load_vendors_from_asm(prefix: str = "/vendors/") -> dict[str, str]: return vendors -if is_connector_available("aws"): +if TYPE_CHECKING: + from extended_data.connectors.aws.codedeploy import ( + create_codedeploy_deployment, + get_aws_codedeploy_deployments, + ) + from extended_data.connectors.aws.organizations import AWSOrganizationsMixin + from extended_data.connectors.aws.s3 import AWSS3Mixin + from extended_data.connectors.aws.sso import AWSSSOmixin +elif is_connector_available("aws"): # Import submodule operations to make them available when the AWS SDK is present. from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments from extended_data.connectors.aws.organizations import AWSOrganizationsMixin diff --git a/src/extended_data/connectors/aws/organizations.py b/src/extended_data/connectors/aws/organizations.py index 8e41c3c..e61e828 100644 --- a/src/extended_data/connectors/aws/organizations.py +++ b/src/extended_data/connectors/aws/organizations.py @@ -9,6 +9,7 @@ import re from collections import defaultdict +from collections.abc import Iterator from copy import deepcopy from typing import TYPE_CHECKING, Any @@ -30,6 +31,21 @@ class AWSOrganizationsMixin: - execution_role_arn """ + if TYPE_CHECKING: + logger: Any + execution_role_arn: str | None + + def get_aws_client( + self, + client_name: str, + execution_role_arn: str | None = None, + role_session_name: str | None = None, + config: Any | None = None, + **client_args: Any, + ) -> Any: ... + + def get_caller_account_id(self) -> str: ... + def get_organization_accounts( self, unhump_accounts: bool = True, @@ -80,7 +96,7 @@ def get_organization_accounts( ou_paginator = orgs.get_paginator("list_organizational_units_for_parent") tags_paginator = orgs.get_paginator("list_tags_for_resource") - def yield_tag_keypairs(tags: list[dict[str, str]]): + def yield_tag_keypairs(tags: list[dict[str, str]]) -> Iterator[tuple[str, str]]: for tag in tags: yield tag["Key"], tag["Value"] @@ -102,7 +118,7 @@ def get_accounts_recursive(parent_id: str) -> dict[str, dict[str, Any]]: for ou in page["OrganizationalUnits"]: ou_id = ou["Id"] ou_data = org_units.get(ou_id) - if is_nothing(ou_data): + if ou_data is None or is_nothing(ou_data): ou_data = {} for k, v in deepcopy(ou).items(): ou_data[f"Ou{k.title()}"] = v @@ -275,7 +291,7 @@ def get_organization_units( ou_paginator = orgs.get_paginator("list_organizational_units_for_parent") org_units: dict[str, dict[str, Any]] = {} - def get_ous_recursive(parent_id: str, parent_path: str = ""): + def get_ous_recursive(parent_id: str, parent_path: str = "") -> None: for page in ou_paginator.paginate(ParentId=parent_id): for ou in page["OrganizationalUnits"]: ou_id = ou["Id"] @@ -319,7 +335,7 @@ def get_tags(resource_id: str) -> dict[str, str]: tag_map[tag["Key"]] = tag["Value"] return tag_map - def walk(parent_id: str): + def walk(parent_id: str) -> None: for page in ou_paginator.paginate(ParentId=parent_id): for ou in page["OrganizationalUnits"]: ou_id = ou["Id"] diff --git a/src/extended_data/connectors/aws/s3.py b/src/extended_data/connectors/aws/s3.py index 910e483..59f7c7a 100644 --- a/src/extended_data/connectors/aws/s3.py +++ b/src/extended_data/connectors/aws/s3.py @@ -28,6 +28,28 @@ class AWSS3Mixin: - execution_role_arn """ + if TYPE_CHECKING: + logger: Any + execution_role_arn: str | None + + def get_aws_client( + self, + client_name: str, + execution_role_arn: str | None = None, + role_session_name: str | None = None, + config: Any | None = None, + **client_args: Any, + ) -> Any: ... + + def get_aws_resource( + self, + service_name: str, + execution_role_arn: str | None = None, + role_session_name: str | None = None, + config: Any | None = None, + **resource_args: Any, + ) -> ServiceResource: ... + def list_s3_buckets( self, unhump_buckets: bool = True, @@ -132,7 +154,7 @@ def get_json_object( bucket: str, key: str, execution_role_arn: str | None = None, - ) -> dict[str, Any] | list | None: + ) -> dict[str, Any] | list[Any] | None: """Get a JSON object from S3. Args: @@ -212,7 +234,7 @@ def put_json_object( self, bucket: str, key: str, - data: dict[str, Any] | list, + data: dict[str, Any] | list[Any], indent: int = 2, metadata: dict[str, str] | None = None, execution_role_arn: str | None = None, diff --git a/src/extended_data/connectors/aws/sso.py b/src/extended_data/connectors/aws/sso.py index 04277b1..881cc71 100644 --- a/src/extended_data/connectors/aws/sso.py +++ b/src/extended_data/connectors/aws/sso.py @@ -27,6 +27,19 @@ class AWSSSOmixin: - execution_role_arn """ + if TYPE_CHECKING: + logger: Any + execution_role_arn: str | None + + def get_aws_client( + self, + client_name: str, + execution_role_arn: str | None = None, + role_session_name: str | None = None, + config: Any | None = None, + **client_args: Any, + ) -> Any: ... + def get_identity_store_id( self, execution_role_arn: str | None = None, diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index 7eca792..841a6c7 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -126,15 +126,24 @@ def list_s3_objects(bucket: str) -> list[dict[str, Any]]: from extended_data.connectors.aws import AWSConnectorFull connector = AWSConnectorFull() - objects = connector.list_objects(bucket) - return [ - { - "key": key, - "size": data.get("Size", 0), - "last_modified": str(data.get("LastModified", "")), - } - for key, data in objects.items() - ] + objects_raw: Any = connector.list_objects(bucket) + if isinstance(objects_raw, dict): + objects = [{"key": key, **data} for key, data in objects_raw.items()] + else: + objects = objects_raw + + result: list[dict[str, Any]] = [] + for data in objects: + if not isinstance(data, dict): + continue + result.append( + { + "key": data.get("key", data.get("Key", "")), + "size": data.get("size", data.get("Size", 0)), + "last_modified": str(data.get("last_modified", data.get("LastModified", ""))), + } + ) + return result def list_accounts() -> list[dict[str, Any]]: @@ -216,7 +225,7 @@ def list_secrets( connector = AWSConnectorFull() # Align with tests: only pass arguments that match test expectations - kwargs = {} + kwargs: dict[str, Any] = {} if prefix: kwargs["prefix"] = prefix if get_values: @@ -224,10 +233,12 @@ def list_secrets( secrets = connector.list_secrets(**kwargs) - result = [] + result: list[dict[str, Any]] = [] for name, data in secrets.items(): if isinstance(data, str): result.append({"name": name, "arn": data}) + elif data is None: + result.append({"name": name, "arn": None, "value": None}) else: result.append({"name": name, "arn": data.get("ARN"), "value": data}) return result From 46900b8d01f29ced7fd7dee2da37acdc04ffecc5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:07:48 -0500 Subject: [PATCH 011/287] ci: typecheck google connectors --- .github/workflows/ci.yml | 7 ++++ README.md | 9 ++++- .../connectors/google/__init__.py | 14 +++++--- .../connectors/google/billing.py | 8 ++++- src/extended_data/connectors/google/cloud.py | 9 ++++- src/extended_data/connectors/google/jules.py | 20 ++++++----- .../connectors/google/services.py | 33 +++++++++++++++---- src/extended_data/connectors/google/tools.py | 14 +++++--- .../connectors/google/workspace.py | 20 +++++++---- 9 files changed, 99 insertions(+), 35 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index db6f333..7fe73b9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,6 +60,13 @@ jobs: src/extended_data/connectors/aws/s3.py src/extended_data/connectors/aws/sso.py src/extended_data/connectors/aws/organizations.py + src/extended_data/connectors/google/__init__.py + src/extended_data/connectors/google/services.py + src/extended_data/connectors/google/workspace.py + src/extended_data/connectors/google/cloud.py + src/extended_data/connectors/google/billing.py + src/extended_data/connectors/google/tools.py + src/extended_data/connectors/google/jules.py - run: uv run pytest tests/core tests/logging tests/inputs - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py - run: uv build diff --git a/README.md b/README.md index 2ab456b..91bee76 100644 --- a/README.md +++ b/README.md @@ -137,7 +137,14 @@ uv run mypy \ src/extended_data/connectors/aws/tools.py \ src/extended_data/connectors/aws/s3.py \ src/extended_data/connectors/aws/sso.py \ - src/extended_data/connectors/aws/organizations.py + src/extended_data/connectors/aws/organizations.py \ + src/extended_data/connectors/google/__init__.py \ + src/extended_data/connectors/google/services.py \ + src/extended_data/connectors/google/workspace.py \ + src/extended_data/connectors/google/cloud.py \ + src/extended_data/connectors/google/billing.py \ + src/extended_data/connectors/google/tools.py \ + src/extended_data/connectors/google/jules.py uv build ``` diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index de4c836..d603df8 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -63,8 +63,8 @@ def __init__( scopes: list[str] | None = None, subject: str | None = None, logger: Logging | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> None: """Initialize the Google connector. Args: @@ -93,7 +93,11 @@ def __init__( self.logger.exception(f"Failed to parse GOOGLE_SERVICE_ACCOUNT JSON: {e}") raise - self.service_account_info = service_account_info + if not isinstance(service_account_info, dict): + msg = "Google service account info must be a JSON object" + raise TypeError(msg) + + self.service_account_info: dict[str, Any] = service_account_info self._credentials: service_account.Credentials | None = None self._services: dict[str, Any] = {} @@ -274,8 +278,8 @@ def _normalize_str_sequence(value: Sequence[Any] | str | None) -> list[str] | No return None if isinstance(value, str): - normalized = [item.strip() for item in value.split(",") if item.strip()] - return normalized or None + parts = [item.strip() for item in value.split(",") if item.strip()] + return parts or None normalized: list[str] = [] for item in value: diff --git a/src/extended_data/connectors/google/billing.py b/src/extended_data/connectors/google/billing.py index 3425960..821e3bf 100644 --- a/src/extended_data/connectors/google/billing.py +++ b/src/extended_data/connectors/google/billing.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING, Any from extended_data import unhump_map @@ -19,6 +19,12 @@ class GoogleBillingMixin: - logger """ + if TYPE_CHECKING: + logger: Any + service_account_info: dict[str, Any] + + def get_billing_service(self) -> Any: ... + def list_billing_accounts( self, filter_query: str | None = None, diff --git a/src/extended_data/connectors/google/cloud.py b/src/extended_data/connectors/google/cloud.py index 0ff8763..0c948c7 100644 --- a/src/extended_data/connectors/google/cloud.py +++ b/src/extended_data/connectors/google/cloud.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING, Any from extended_data import unhump_map @@ -20,6 +20,13 @@ class GoogleCloudMixin: - logger """ + if TYPE_CHECKING: + logger: Any + + def get_cloud_resource_manager_service(self) -> Any: ... + + def get_iam_service(self) -> Any: ... + def get_organization_id(self) -> str: """Get the Google Cloud organization ID. diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index 287fd25..4a6d33b 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -29,6 +29,8 @@ from enum import Enum from typing import Any +import httpx + from pydantic import BaseModel, Field from extended_data.connectors.base import VendorConnectorBase @@ -74,14 +76,14 @@ class Source(BaseModel): name: str = Field(..., description="Resource name (e.g., sources/github/org/repo)") id: str = Field(..., description="Source ID") - github_repo: dict | None = Field(None, alias="githubRepo") + github_repo: dict[str, Any] | None = Field(None, alias="githubRepo") class SourceContext(BaseModel): """Context for a session's source.""" source: str = Field(..., description="Source resource name") - github_repo_context: dict | None = Field(None, alias="githubRepoContext") + github_repo_context: dict[str, Any] | None = Field(None, alias="githubRepoContext") class PullRequestOutput(BaseModel): @@ -103,7 +105,7 @@ class Session(BaseModel): prompt: str = Field("", description="Original prompt") state: str | None = Field(None, description="Current state") source_context: SourceContext | None = Field(None, alias="sourceContext") - outputs: list[dict] = Field(default_factory=list, description="Session outputs") + outputs: list[dict[str, Any]] = Field(default_factory=list, description="Session outputs") @property def pull_request(self) -> PullRequestOutput | None: @@ -137,8 +139,8 @@ def __init__( api_key: str | None = None, base_url: str | None = None, timeout: float = 60.0, - **kwargs, - ): + **kwargs: Any, + ) -> None: """Initialize the Jules connector. Args: @@ -156,7 +158,7 @@ def _build_headers(self) -> dict[str, str]: "Content-Type": "application/json", } - def _handle_response(self, response) -> dict: + def _handle_response(self, response: httpx.Response) -> dict[str, Any]: """Handle API response, raising on errors.""" if not response.is_success: try: @@ -184,7 +186,7 @@ def list_sources(self, page_size: int = 100, page_token: str = "") -> list[Sourc Returns: List of Source objects. """ - params = {"pageSize": page_size} + params: dict[str, Any] = {"pageSize": page_size} if page_token: params["pageToken"] = page_token @@ -219,7 +221,7 @@ def create_session( Returns: Created Session object. """ - body = { + body: dict[str, Any] = { "prompt": prompt, "sourceContext": { "source": source, @@ -268,7 +270,7 @@ def list_sessions(self, page_size: int = 20, page_token: str = "") -> list[Sessi Returns: List of Session objects. """ - params = {"pageSize": page_size} + params: dict[str, Any] = {"pageSize": page_size} if page_token: params["pageToken"] = page_token diff --git a/src/extended_data/connectors/google/services.py b/src/extended_data/connectors/google/services.py index 806a7b8..23a92a6 100644 --- a/src/extended_data/connectors/google/services.py +++ b/src/extended_data/connectors/google/services.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING, Any from extended_data import unhump_map @@ -25,6 +25,25 @@ class GoogleServicesMixin: - logger """ + if TYPE_CHECKING: + logger: Any + + def get_compute_service(self) -> Any: ... + + def get_container_service(self) -> Any: ... + + def get_storage_service(self) -> Any: ... + + def get_sqladmin_service(self) -> Any: ... + + def get_pubsub_service(self) -> Any: ... + + def get_serviceusage_service(self) -> Any: ... + + def get_cloudkms_service(self) -> Any: ... + + def get_cloud_resource_manager_service(self) -> Any: ... + # ========================================================================= # Compute Engine # ========================================================================= @@ -54,11 +73,11 @@ def list_compute_instances( # List instances in specific zone page_token = None while True: - params: dict[str, Any] = {"project": project_id, "zone": zone} + zone_params: dict[str, Any] = {"project": project_id, "zone": zone} if page_token: - params["pageToken"] = page_token + zone_params["pageToken"] = page_token - response = service.instances().list(**params).execute() + response = service.instances().list(**zone_params).execute() instances.extend(response.get("items", [])) page_token = response.get("nextPageToken") @@ -68,11 +87,11 @@ def list_compute_instances( # Aggregate list across all zones page_token = None while True: - params: dict[str, Any] = {"project": project_id} + aggregate_params: dict[str, Any] = {"project": project_id} if page_token: - params["pageToken"] = page_token + aggregate_params["pageToken"] = page_token - response = service.instances().aggregatedList(**params).execute() + response = service.instances().aggregatedList(**aggregate_params).execute() for zone_data in response.get("items", {}).values(): instances.extend(zone_data.get("instances", [])) diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index a1bbcc8..3c93bea 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -229,15 +229,18 @@ def list_workspace_users( from extended_data.connectors.google import GoogleConnectorFull connector = GoogleConnectorFull() - users = connector.list_users( + users_raw: Any = connector.list_users( domain=domain or None, flatten_names=True, key_by_email=False, ) + users = list(users_raw.values()) if isinstance(users_raw, dict) else users_raw # Limit results and extract key fields - result = [] + result: list[dict[str, Any]] = [] for user in users[:max_results]: + if not isinstance(user, dict): + continue result.append( { "email": user.get("primaryEmail", ""), @@ -267,14 +270,17 @@ def list_workspace_groups( from extended_data.connectors.google import GoogleConnectorFull connector = GoogleConnectorFull() - groups = connector.list_groups( + groups_raw: Any = connector.list_groups( domain=domain or None, key_by_email=False, ) + groups = list(groups_raw.values()) if isinstance(groups_raw, dict) else groups_raw # Limit results and extract key fields - result = [] + result: list[dict[str, Any]] = [] for group in groups[:max_results]: + if not isinstance(group, dict): + continue result.append( { "email": group.get("email", ""), diff --git a/src/extended_data/connectors/google/workspace.py b/src/extended_data/connectors/google/workspace.py index b27bb04..3401892 100644 --- a/src/extended_data/connectors/google/workspace.py +++ b/src/extended_data/connectors/google/workspace.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING, Any from extended_data import unhump_map @@ -20,7 +20,13 @@ class GoogleWorkspaceMixin: - logger """ - def list_users( + if TYPE_CHECKING: + logger: Any + service_account_info: dict[str, Any] + + def get_admin_directory_service(self, subject: str | None = None) -> Any: ... + + def list_workspace_users( self, domain: str | None = None, max_results: int = 500, @@ -98,7 +104,7 @@ def create_user( change_password_at_next_login: bool = True, org_unit_path: str = "/", subject: str | None = None, - **additional_fields, + **additional_fields: Any, ) -> dict[str, Any]: """Create a user in Google Workspace. @@ -142,7 +148,7 @@ def update_user( self, user_key: str, subject: str | None = None, - **fields, + **fields: Any, ) -> dict[str, Any]: """Update a user in Google Workspace. @@ -174,7 +180,7 @@ def delete_user( service.users().delete(userKey=user_key).execute() self.logger.info(f"Deleted user: {user_key}") - def list_groups( + def list_workspace_groups( self, domain: str | None = None, max_results: int = 200, @@ -419,7 +425,7 @@ def create_or_update_user( change_password_at_next_login: bool = True, org_unit_path: str = "/", subject: str | None = None, - **additional_fields, + **additional_fields: Any, ) -> dict[str, Any]: """Create or update a user in Google Workspace. @@ -487,7 +493,7 @@ def create_or_update_group( description: str = "", update_if_exists: bool = False, subject: str | None = None, - **additional_fields, + **additional_fields: Any, ) -> dict[str, Any]: """Create or update a group in Google Workspace. From c8046decefef7055b2065bfc948d5c64929fa0d1 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:08:58 -0500 Subject: [PATCH 012/287] ci: typecheck full connector package --- .github/workflows/ci.yml | 34 +--------------------------------- README.md | 34 +--------------------------------- 2 files changed, 2 insertions(+), 66 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7fe73b9..7625854 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,39 +34,7 @@ jobs: src/extended_data/io src/extended_data/inputs src/extended_data/logging - src/extended_data/connectors/_optional.py - src/extended_data/connectors/base.py - src/extended_data/connectors/registry.py - src/extended_data/connectors/cli.py - src/extended_data/connectors/connectors.py - src/extended_data/connectors/secrets/__init__.py - src/extended_data/connectors/meshy/jobs.py - src/extended_data/connectors/meshy/persistence/vector_store.py - src/extended_data/connectors/meshy/base.py - src/extended_data/connectors/meshy/tools.py - src/extended_data/connectors/meshy/mcp.py - src/extended_data/connectors/mcp.py - src/extended_data/connectors/meshy/connector.py - src/extended_data/connectors/zoom/__init__.py - src/extended_data/connectors/cursor/__init__.py - src/extended_data/connectors/anthropic/__init__.py - src/extended_data/connectors/slack/__init__.py - src/extended_data/connectors/slack/tools.py - src/extended_data/connectors/vault/__init__.py - src/extended_data/connectors/github/__init__.py - src/extended_data/connectors/github/tools.py - src/extended_data/connectors/aws/__init__.py - src/extended_data/connectors/aws/tools.py - src/extended_data/connectors/aws/s3.py - src/extended_data/connectors/aws/sso.py - src/extended_data/connectors/aws/organizations.py - src/extended_data/connectors/google/__init__.py - src/extended_data/connectors/google/services.py - src/extended_data/connectors/google/workspace.py - src/extended_data/connectors/google/cloud.py - src/extended_data/connectors/google/billing.py - src/extended_data/connectors/google/tools.py - src/extended_data/connectors/google/jules.py + src/extended_data/connectors - run: uv run pytest tests/core tests/logging tests/inputs - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py - run: uv build diff --git a/README.md b/README.md index 91bee76..20aaa59 100644 --- a/README.md +++ b/README.md @@ -112,39 +112,7 @@ uv run mypy \ src/extended_data/io \ src/extended_data/inputs \ src/extended_data/logging \ - src/extended_data/connectors/_optional.py \ - src/extended_data/connectors/base.py \ - src/extended_data/connectors/registry.py \ - src/extended_data/connectors/cli.py \ - src/extended_data/connectors/connectors.py \ - src/extended_data/connectors/secrets/__init__.py \ - src/extended_data/connectors/meshy/jobs.py \ - src/extended_data/connectors/meshy/persistence/vector_store.py \ - src/extended_data/connectors/meshy/base.py \ - src/extended_data/connectors/meshy/tools.py \ - src/extended_data/connectors/meshy/mcp.py \ - src/extended_data/connectors/mcp.py \ - src/extended_data/connectors/meshy/connector.py \ - src/extended_data/connectors/zoom/__init__.py \ - src/extended_data/connectors/cursor/__init__.py \ - src/extended_data/connectors/anthropic/__init__.py \ - src/extended_data/connectors/slack/__init__.py \ - src/extended_data/connectors/slack/tools.py \ - src/extended_data/connectors/vault/__init__.py \ - src/extended_data/connectors/github/__init__.py \ - src/extended_data/connectors/github/tools.py \ - src/extended_data/connectors/aws/__init__.py \ - src/extended_data/connectors/aws/tools.py \ - src/extended_data/connectors/aws/s3.py \ - src/extended_data/connectors/aws/sso.py \ - src/extended_data/connectors/aws/organizations.py \ - src/extended_data/connectors/google/__init__.py \ - src/extended_data/connectors/google/services.py \ - src/extended_data/connectors/google/workspace.py \ - src/extended_data/connectors/google/cloud.py \ - src/extended_data/connectors/google/billing.py \ - src/extended_data/connectors/google/tools.py \ - src/extended_data/connectors/google/jules.py + src/extended_data/connectors uv build ``` From 684678ea77c4970d3730738ef46e56f885ccee1b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:10:12 -0500 Subject: [PATCH 013/287] ci: typecheck full package --- .github/workflows/ci.yml | 9 +-------- README.md | 8 +------- src/extended_data/__init__.py | 4 ++-- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7625854..07c38f6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,14 +27,7 @@ jobs: enable-cache: true - run: uv sync --python 3.13 --extra tests --extra typing - run: uvx ruff check src tests - - run: > - uv run mypy - src/extended_data/primitives - src/extended_data/containers - src/extended_data/io - src/extended_data/inputs - src/extended_data/logging - src/extended_data/connectors + - run: uv run mypy src/extended_data - run: uv run pytest tests/core tests/logging tests/inputs - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py - run: uv build diff --git a/README.md b/README.md index 20aaa59..c89fd9c 100644 --- a/README.md +++ b/README.md @@ -106,13 +106,7 @@ More detail lives in [`docs/package-surface.md`](docs/package-surface.md). uv sync --extra tests --extra typing uv run pytest uv run ruff check src tests -uv run mypy \ - src/extended_data/primitives \ - src/extended_data/containers \ - src/extended_data/io \ - src/extended_data/inputs \ - src/extended_data/logging \ - src/extended_data/connectors +uv run mypy src/extended_data uv build ``` diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index b596b00..a587e20 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -8,7 +8,7 @@ import importlib -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from extended_data._version import __version__ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, extend_data, to_builtin @@ -146,7 +146,7 @@ } -def __getattr__(name: str): +def __getattr__(name: str) -> Any: """Lazily expose integrated subpackage primitives at the package root.""" if name not in _LAZY_EXPORTS: raise AttributeError(f"module {__name__!r} has no attribute {name!r}") From 6caedbac06db3d846e77c374636147911c617c60 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:16:21 -0500 Subject: [PATCH 014/287] feat: recursively promote extended containers --- docs/package-surface.md | 9 ++++ examples/connectors/README.md | 19 ++++---- examples/core/README.md | 25 ++++++----- examples/core/file_operations.py | 2 +- examples/inputs/README.md | 15 ++++--- examples/logging/README.md | 19 ++++---- src/extended_data/containers/mappings.py | 9 +++- src/extended_data/containers/sequences.py | 54 ++++++++++++++++++++--- src/extended_data/logging/utils.py | 2 +- tests/core/test_containers.py | 47 ++++++++++++++++++++ tests/core/test_string_data_type.py | 2 +- 11 files changed, 158 insertions(+), 45 deletions(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index e54f916..eaac464 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -38,6 +38,15 @@ items = ExtendedList([1, [2, [3]]]).flatten() tags = ExtendedSet({"prod", "prod", ""}).compact() ``` +`ExtendedDict`, `ExtendedList`, and `ExtendedSet` recursively promote nested +plain values on construction and mutation, so method chains can continue through +data loaded from normal Python literals: + +```python +payload = ExtendedDict({"service": {"name": "api"}}) +payload["service"]["name"].upper_first() +``` + Tier 3 decode surfaces can promote plain decoded values into Tier 2 containers: ```python diff --git a/examples/connectors/README.md b/examples/connectors/README.md index e55af93..351b95f 100644 --- a/examples/connectors/README.md +++ b/examples/connectors/README.md @@ -1,6 +1,7 @@ -# Examples +# Connector Examples -This directory contains working examples demonstrating how to use extended-data. +This directory contains working examples for `extended_data.connectors` and the +vendor adapters that hang off `ConnectorFabric`. ## Quick Start @@ -8,17 +9,17 @@ Install extended-data with the extras you need: ```bash # Install with all connectors -pip install extended-data[all] +pip install "extended-data[all]" # Or install specific connectors -pip install extended-data[aws,google,meshy] +pip install "extended-data[aws,google,meshy]" # For AI framework integration -pip install extended-data[langchain] -pip install extended-data[crewai] +pip install "extended-data[langchain]" +pip install "extended-data[crewai]" # For the Meshy MCP server -pip install extended-data[meshy,mcp] +pip install "extended-data[meshy,mcp]" ``` ## Examples @@ -57,8 +58,8 @@ export ANTHROPIC_API_KEY="sk-ant-..." ```bash # Run any example -python examples/basic_meshy.py +uv run python examples/connectors/basic_meshy.py # Run with debug logging -LOGLEVEL=DEBUG python examples/basic_meshy.py +LOGLEVEL=DEBUG uv run python examples/connectors/basic_meshy.py ``` diff --git a/examples/core/README.md b/examples/core/README.md index 1fbb5fa..bebd7b1 100644 --- a/examples/core/README.md +++ b/examples/core/README.md @@ -1,8 +1,9 @@ -# Extended Data Types Examples +# Core Examples -This directory contains working code samples demonstrating the capabilities of -the `Extended Data core` library. The examples intentionally mirror the public -guides and are part of the documented contract, not throwaway snippets. +This directory contains working code samples for the core `extended-data` +package surface: Tier 1 primitives, Tier 2 containers, and Tier 3 file/data +processors. The examples intentionally mirror the public README and package +surface docs, so treat them as part of the documented contract. ## Examples @@ -16,21 +17,23 @@ guides and are part of the documented contract, not throwaway snippets. ## Related Documentation -- [Package docs](https://extended-data.dev/core/data-types/) -- [Getting started](https://extended-data.dev/getting-started/) -- [Packages overview](https://extended-data.dev/packages/) +- [Package surface](../../docs/package-surface.md) +- [Repository README](../../README.md) ## Running Examples ```bash -# From the repository root, run the full example suite -tox -e edt-examples +# From the repository root, install the local package +uv sync --extra tests --extra typing -# Or run a single example with the prepared tox environment +# Run a single example uv run python examples/core/basic_usage.py + +# Run the core test suite +uv run pytest tests/core ``` ## Requirements - Python 3.10-3.14 -- Extended Data core package +- `extended-data` diff --git a/examples/core/file_operations.py b/examples/core/file_operations.py index 72b205b..aefe79e 100755 --- a/examples/core/file_operations.py +++ b/examples/core/file_operations.py @@ -69,7 +69,7 @@ def demonstrate_file_operations() -> None: with tempfile.TemporaryDirectory() as tmpdir: # Write a file test_file = Path(tmpdir) / "test.txt" - content = "Hello, Extended Data Types!\nThis is a test file." + content = "Hello, extended-data!\nThis is a test file." write_file(test_file, content) print(f"Wrote file: {test_file}") diff --git a/examples/inputs/README.md b/examples/inputs/README.md index 3acafce..7556b32 100644 --- a/examples/inputs/README.md +++ b/examples/inputs/README.md @@ -1,19 +1,20 @@ -# Examples +# Input Examples -This directory contains working examples demonstrating the features of `Extended Data inputs`. +This directory contains working examples for `InputProvider` and the decorator +helpers in `extended_data.inputs`. ## Running Examples All examples can be run as Python modules from the project root: ```bash -# Install the package first -uv sync +# Install the local package first +uv sync --extra tests # Run examples -uv run python -m examples.basic_usage -uv run python -m examples.decorator_api -uv run python -m examples.encoding_decoding +uv run python examples/inputs/basic_usage.py +uv run python examples/inputs/decorator_api.py +uv run python examples/inputs/encoding_decoding.py ``` ## Available Examples diff --git a/examples/logging/README.md b/examples/logging/README.md index 5317e58..5421a15 100644 --- a/examples/logging/README.md +++ b/examples/logging/README.md @@ -1,6 +1,7 @@ -# LifecycleLogging Examples +# Logging Examples -This directory contains working examples demonstrating the features of the `extended_data.logging` package. +This directory contains working examples for structured lifecycle logging in +`extended_data.logging`. ## Examples @@ -13,7 +14,7 @@ Demonstrates fundamental logging capabilities: - Adding identifiers to messages ```bash -python examples/basic_logging.py +uv run python examples/logging/basic_logging.py ``` ### markers_and_storage.py @@ -24,7 +25,7 @@ Shows how to use markers for message organization: - Combining both marker types ```bash -python examples/markers_and_storage.py +uv run python examples/logging/markers_and_storage.py ``` ### verbosity_control.py @@ -35,7 +36,7 @@ Demonstrates verbosity settings: - Registering bypass markers that ignore verbosity settings ```bash -python examples/verbosity_control.py +uv run python examples/logging/verbosity_control.py ``` ### exit_run_formatting.py @@ -47,22 +48,22 @@ Shows result formatting and transformation: - Custom transform functions ```bash -python examples/exit_run_formatting.py +uv run python examples/logging/exit_run_formatting.py ``` ## Running the Examples 1. Install the package: ```bash - pip install extended_data.logging + pip install extended-data ``` 2. Run any example: ```bash - python examples/.py + python examples/logging/.py ``` Or from the repository root: ```bash -uv run python examples/.py +uv run python examples/logging/.py ``` diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index d139a9c..3683898 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -23,7 +23,14 @@ class ExtendedDict(UserDict[str, Any]): def __init__(self, initialdata: Mapping[str, Any] | None = None, **kwargs: Any) -> None: """Initialize the extended dictionary.""" - super().__init__(dict(initialdata or {}, **kwargs)) + super().__init__() + self.update(dict(initialdata or {}, **kwargs)) + + def __setitem__(self, key: str, item: Any) -> None: + """Set a value while preserving extended nested containers.""" + from extended_data.containers.factory import extend_data + + self.data[key] = extend_data(item) def deep_merge(self, *mappings: Mapping[str, Any]) -> ExtendedDict: """Return a deeply merged copy.""" diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index 14857c3..b864e3a 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -4,7 +4,7 @@ from collections import UserList from collections.abc import Callable, Iterable, Iterator, MutableSet -from typing import Any, TypeVar +from typing import Any, SupportsIndex, TypeVar, cast, overload from extended_data.primitives.sequences import flatten_list from extended_data.primitives.state import is_nothing @@ -20,11 +20,46 @@ class ExtendedList(UserList[T]): def __init__(self, initlist: Iterable[T] | None = None) -> None: """Initialize the extended list.""" - super().__init__(list(initlist or [])) + super().__init__() + self.extend(initlist or []) + + @staticmethod + def _wrap_item(item: T) -> T: + """Promote nested built-in containers to extended containers.""" + from extended_data.containers.factory import extend_data + + return cast(T, extend_data(item)) + + @overload + def __setitem__(self, i: SupportsIndex, item: T) -> None: ... + + @overload + def __setitem__(self, i: slice, item: Iterable[T]) -> None: ... + + def __setitem__(self, i: SupportsIndex | slice, item: T | Iterable[T]) -> None: + """Set values while preserving extended nested containers.""" + if isinstance(i, slice): + self.data[i] = [self._wrap_item(value) for value in cast(Iterable[T], item)] + return + self.data[i] = self._wrap_item(cast(T, item)) + + def append(self, item: T) -> None: + """Append a value while preserving extended nested containers.""" + self.data.append(self._wrap_item(item)) + + def extend(self, other: Iterable[T]) -> None: + """Extend values while preserving extended nested containers.""" + self.data.extend(self._wrap_item(item) for item in other) + + def insert(self, i: int, item: T) -> None: + """Insert a value while preserving extended nested containers.""" + self.data.insert(i, self._wrap_item(item)) def flatten(self) -> ExtendedList[Any]: """Return a recursively flattened copy.""" - return ExtendedList(flatten_list(list(self.data))) + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(flatten_list(to_builtin(self.data))) def compact(self) -> ExtendedList[T]: """Return a copy without values considered empty.""" @@ -56,7 +91,16 @@ class ExtendedSet(MutableSet[T]): def __init__(self, values: Iterable[T] | None = None) -> None: """Initialize the extended set.""" - self._data: set[T] = set(values or []) + self._data: set[T] = set() + for value in values or []: + self.add(value) + + @staticmethod + def _wrap_item(item: T) -> T: + """Promote nested built-in containers to extended containers.""" + from extended_data.containers.factory import extend_data + + return cast(T, extend_data(item)) def __contains__(self, value: object) -> bool: """Return whether the set contains a value.""" @@ -72,7 +116,7 @@ def __len__(self) -> int: def add(self, value: T) -> None: """Add a value to the set.""" - self._data.add(value) + self._data.add(self._wrap_item(value)) def discard(self, value: T) -> None: """Remove a value from the set if present.""" diff --git a/src/extended_data/logging/utils.py b/src/extended_data/logging/utils.py index 3843e8c..a044805 100644 --- a/src/extended_data/logging/utils.py +++ b/src/extended_data/logging/utils.py @@ -1,4 +1,4 @@ -"""Utility helpers for LifecycleLogging internals.""" +"""Utility helpers for structured logging internals.""" from __future__ import annotations diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 99d7ea7..9bc5805 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import Any + import extended_data from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, extend_data, to_builtin @@ -33,6 +35,21 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert "items" in rejected +def test_extended_dict_promotes_nested_values_on_mutation() -> None: + """ExtendedDict keeps nested values in the Tier 2 surface.""" + value = ExtendedDict({"service": {"name": "api"}}) + + value["owner"] = "platform" + value.update({"ports": [8080, "9090"]}) + + assert isinstance(value["service"], ExtendedDict) + assert isinstance(value["service"]["name"], ExtendedString) + assert isinstance(value["owner"], ExtendedString) + assert isinstance(value["ports"], ExtendedList) + assert isinstance(value["ports"][1], ExtendedString) + assert value["service"]["name"].upper_first() == "Api" + + def test_extended_list_composes_sequence_primitives() -> None: """ExtendedList composes Tier 1 sequence primitives.""" value = ExtendedList([1, [2, [3]], "", 2]) @@ -44,6 +61,25 @@ def test_extended_list_composes_sequence_primitives() -> None: assert ExtendedList([1, 2]).map(lambda item: item * 2) == [2, 4] +def test_extended_list_promotes_nested_values_on_mutation() -> None: + """ExtendedList keeps nested values in the Tier 2 surface.""" + value: ExtendedList[Any] = ExtendedList([{"name": "api"}]) + + value.append("worker") + value.extend([{"name": "scheduler"}]) + value.insert(0, ["frontdoor"]) + value[1] = {"name": "gateway"} + value[2:3] = ["jobs"] + + assert isinstance(value[0], ExtendedList) + assert isinstance(value[0][0], ExtendedString) + assert isinstance(value[1], ExtendedDict) + assert isinstance(value[1]["name"], ExtendedString) + assert isinstance(value[2], ExtendedString) + assert isinstance(value[3], ExtendedDict) + assert value[1]["name"].upper_first() == "Gateway" + + def test_extended_set_composes_set_operations() -> None: """ExtendedSet provides chainable set operations.""" value = ExtendedSet({1, 2, 3, None}) @@ -54,6 +90,17 @@ def test_extended_set_composes_set_operations() -> None: assert value.difference({1, None}).to_set() == {2, 3} +def test_extended_set_promotes_string_values() -> None: + """ExtendedSet keeps hashable nested values in the Tier 2 surface.""" + value = ExtendedSet({"api"}) + + value.add("worker") + + assert all(isinstance(item, ExtendedString) for item in value) + assert value.to_set() == {"api", "worker"} + assert to_builtin(value) == {"api", "worker"} + + def test_extend_data_recursively_wraps_builtin_containers() -> None: """The container factory promotes plain values into the Tier 2 surface.""" wrapped = extend_data( diff --git a/tests/core/test_string_data_type.py b/tests/core/test_string_data_type.py index 1cebad8..aa3a152 100644 --- a/tests/core/test_string_data_type.py +++ b/tests/core/test_string_data_type.py @@ -1,4 +1,4 @@ -"""Test Suite for Extended Data Types - String Operations +"""Test suite for extended-data string operations. This module contains test functions and fixtures for verifying the functionality of various string operations provided by the `extended_data` package. The module covers a wide range of string manipulation and validation From b239d0c53f6f7d562ffbc31a1afc61fd1449f096 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:18:56 -0500 Subject: [PATCH 015/287] feat: decode connector response data --- docs/package-surface.md | 5 ++ src/extended_data/connectors/base.py | 79 +++++++++++++++++++++ tests/connectors/test_base.py | 102 +++++++++++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 tests/connectors/test_base.py diff --git a/docs/package-surface.md b/docs/package-surface.md index eaac464..7062597 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -92,6 +92,11 @@ Both paths share the same input provider and lifecycle logger, and both cache instances by connector type and constructor inputs. Generic connector names are stripped and lowercased before lookup. +Connectors that inherit `VendorConnectorBase` can keep raw transport access with +`request()` or use `request_data()`, `get_data()`, `post_data()`, and the other +verb-specific helpers to decode HTTP JSON, YAML, TOML, HCL, or text responses +through the same Tier 2 container bridge used by file and input decoding. + Use the catalog helpers when a workflow needs to inspect which integrations can run in the current environment: diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index eca3c40..6cecb08 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -288,26 +288,105 @@ def request( return response + @staticmethod + def _suffix_from_content_type(content_type: str | None) -> str | None: + """Infer a data suffix from an HTTP Content-Type header.""" + if not content_type: + return None + + media_type = content_type.split(";", maxsplit=1)[0].strip().lower() + if media_type == "application/json" or media_type.endswith("+json"): + return "json" + if media_type in {"application/yaml", "application/x-yaml", "text/yaml", "text/x-yaml"} or media_type.endswith( + "+yaml" + ): + return "yaml" + if media_type in {"application/toml", "text/toml"}: + return "toml" + if media_type in {"application/hcl", "text/hcl"}: + return "hcl" + if media_type.startswith("text/"): + return "raw" + return None + + def decode_response( + self, + response: httpx.Response, + *, + suffix: str | None = None, + as_extended: bool = True, + ) -> Any: + """Decode an HTTP response body through the extended-data IO layer. + + Structured response bodies are decoded from JSON, YAML, TOML, or HCL and + promoted to Tier 2 containers by default. Text responses become raw + strings, and unknown binary responses remain bytes. + """ + if not response.content: + return None + + resolved_suffix = suffix or self._suffix_from_content_type(response.headers.get("content-type")) + if resolved_suffix is None: + return response.content + + from extended_data.io.files import decode_file + + return decode_file(response.content, suffix=resolved_suffix, as_extended=as_extended) + + def request_data( + self, + method: str, + endpoint: str, + *, + headers: dict[str, str] | None = None, + suffix: str | None = None, + as_extended: bool = True, + **kwargs: Any, + ) -> Any: + """Make an HTTP request and return decoded response data.""" + response = self.request(method, endpoint, headers=headers, **kwargs) + return self.decode_response(response, suffix=suffix, as_extended=as_extended) + def get(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP GET request.""" return self.request("GET", endpoint, **kwargs) + def get_data(self, endpoint: str, *, suffix: str | None = None, as_extended: bool = True, **kwargs: Any) -> Any: + """HTTP GET request returning decoded response data.""" + return self.request_data("GET", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def post(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP POST request.""" return self.request("POST", endpoint, **kwargs) + def post_data(self, endpoint: str, *, suffix: str | None = None, as_extended: bool = True, **kwargs: Any) -> Any: + """HTTP POST request returning decoded response data.""" + return self.request_data("POST", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def put(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP PUT request.""" return self.request("PUT", endpoint, **kwargs) + def put_data(self, endpoint: str, *, suffix: str | None = None, as_extended: bool = True, **kwargs: Any) -> Any: + """HTTP PUT request returning decoded response data.""" + return self.request_data("PUT", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def delete(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP DELETE request.""" return self.request("DELETE", endpoint, **kwargs) + def delete_data(self, endpoint: str, *, suffix: str | None = None, as_extended: bool = True, **kwargs: Any) -> Any: + """HTTP DELETE request returning decoded response data.""" + return self.request_data("DELETE", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def patch(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP PATCH request.""" return self.request("PATCH", endpoint, **kwargs) + def patch_data(self, endpoint: str, *, suffix: str | None = None, as_extended: bool = True, **kwargs: Any) -> Any: + """HTTP PATCH request returning decoded response data.""" + return self.request_data("PATCH", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + # ------------------------------------------------------------------------- # File Downloads # ------------------------------------------------------------------------- diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py new file mode 100644 index 0000000..d9c076c --- /dev/null +++ b/tests/connectors/test_base.py @@ -0,0 +1,102 @@ +"""Tests for base connector data helpers.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import httpx + +from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict, ExtendedString +from extended_data.logging import Logging + + +class ExampleConnector(VendorConnectorBase): + """Small connector used to exercise the base class.""" + + BASE_URL = "https://api.example.com" + + +def _connector() -> ExampleConnector: + logger = MagicMock(spec=Logging) + logger.logger = MagicMock() + return ExampleConnector(from_environment=False, logger=logger) + + +def test_decode_response_promotes_json_to_extended_containers() -> None: + """JSON responses flow through the Tier 2 container bridge.""" + connector = _connector() + response = httpx.Response( + 200, + content=b'{"service":{"name":"api"}}', + headers={"content-type": "application/json; charset=utf-8"}, + ) + + data = connector.decode_response(response) + + assert isinstance(data, ExtendedDict) + assert isinstance(data["service"], ExtendedDict) + assert isinstance(data["service"]["name"], ExtendedString) + assert data["service"]["name"].upper_first() == "Api" + + +def test_decode_response_can_return_plain_json() -> None: + """Response decoding can opt out of extended containers.""" + connector = _connector() + response = httpx.Response( + 200, + content=b'{"service":{"name":"api"}}', + headers={"content-type": "application/vnd.example+json"}, + ) + + data = connector.decode_response(response, as_extended=False) + + assert data == {"service": {"name": "api"}} + assert not isinstance(data["service"]["name"], ExtendedString) + + +def test_decode_response_promotes_text_to_extended_string() -> None: + """Text responses become ExtendedString values by default.""" + connector = _connector() + response = httpx.Response( + 200, + content=b"api response", + headers={"content-type": "text/plain"}, + ) + + data = connector.decode_response(response) + + assert isinstance(data, ExtendedString) + assert data.to_snake_case() == "api_response" + + +def test_decode_response_preserves_unknown_binary_data() -> None: + """Unknown binary responses are left as bytes.""" + connector = _connector() + response = httpx.Response( + 200, + content=b"\x00\x01\x02", + headers={"content-type": "application/octet-stream"}, + ) + + assert connector.decode_response(response) == b"\x00\x01\x02" + + +def test_request_data_decodes_response_body() -> None: + """request_data combines the raw request primitive with response decoding.""" + connector = _connector() + mock_client = MagicMock() + mock_client.request.return_value = httpx.Response( + 200, + content=b'{"ok":true}', + headers={"content-type": "application/json"}, + ) + connector._client = mock_client + + data = connector.request_data("GET", "/status") + + assert data == {"ok": True} + assert isinstance(data, ExtendedDict) + mock_client.request.assert_called_once() + assert mock_client.request.call_args.args[0] == "GET" + assert mock_client.request.call_args.args[1] == "https://api.example.com/status" From bb332126a6347634f8683a7d6bc0dec3b19d8360 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:20:29 -0500 Subject: [PATCH 016/287] ci: run full package gate --- .github/workflows/ci.yml | 5 ++--- .github/workflows/release.yml | 4 ++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 07c38f6..468b104 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,8 +26,7 @@ jobs: version: "0.11.19" enable-cache: true - run: uv sync --python 3.13 --extra tests --extra typing - - run: uvx ruff check src tests + - run: uvx ruff check src tests examples README.md docs/package-surface.md - run: uv run mypy src/extended_data - - run: uv run pytest tests/core tests/logging tests/inputs - - run: uv run pytest tests/connectors/test_cloud_params.py tests/connectors/test_connectors.py tests/connectors/test_secrets.py tests/connectors/test_cli.py tests/connectors/test_mcp.py tests/connectors/meshy/test_models.py + - run: uv run pytest tests - run: uv build diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a5312d8..878a815 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -44,5 +44,9 @@ jobs: with: version: "0.11.19" enable-cache: true + - run: uv sync --python 3.13 --extra tests --extra typing + - run: uvx ruff check src tests examples README.md docs/package-surface.md + - run: uv run mypy src/extended_data + - run: uv run pytest tests - run: uv build - run: uv publish --trusted-publishing always From ee44f31ccd38c6c6de2c8874695b188b075bf549 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:23:06 -0500 Subject: [PATCH 017/287] fix: keep secrets cli output parseable --- .../connectors/secrets/__init__.py | 7 ++-- tests/connectors/test_secrets.py | 41 ++++++++++++++----- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 72470c8..1306ac4 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -81,7 +81,7 @@ class SyncOptions: continue_on_error: bool = False parallelism: int = 4 compute_diff: bool = False - output_format: OutputFormat = OutputFormat.HUMAN + output_format: OutputFormat = OutputFormat.JSON @dataclass @@ -364,6 +364,9 @@ def _cli_run_pipeline( error_message="CLI not available and native bindings not installed", ) + # CLI mode always requests JSON so this Python surface can reliably + # return a structured SyncResult. Native mode can pass through other + # output formats because it returns a typed result directly. cmd = [ self._cli_path, "pipeline", @@ -382,8 +385,6 @@ def _cli_run_pipeline( cmd.append("--dry-run") if options.compute_diff: cmd.append("--diff") - if options.output_format: - cmd.extend(["--output", options.output_format.value]) if options.targets: cmd.extend(["--targets", ",".join(options.targets)]) if options.continue_on_error: diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index b804c25..c443b74 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -1,5 +1,6 @@ import json +from pathlib import Path from unittest.mock import MagicMock, patch import pytest @@ -14,17 +15,17 @@ @pytest.fixture -def mock_logger(): +def mock_logger() -> MagicMock: return MagicMock() @pytest.fixture -def connector(mock_logger): +def connector(mock_logger: MagicMock) -> SecretsConnector: # Force CLI mode by setting prefer_native=False return SecretsConnector(cli_path="/usr/bin/secretsync", prefer_native=False, logger=mock_logger) -def test_cli_get_config_info_valid(connector, tmp_path): +def test_cli_get_config_info_valid(connector: SecretsConnector, tmp_path: Path) -> None: config_file = tmp_path / "config.yaml" config_data = { "sources": {"src1": {}, "src2": {}}, @@ -48,13 +49,13 @@ def test_cli_get_config_info_valid(connector, tmp_path): assert info.aws_region == "us-east-1" -def test_cli_get_config_info_not_found(connector): +def test_cli_get_config_info_not_found(connector: SecretsConnector) -> None: info = connector.get_config_info("/non/existent/path.yaml") assert info.valid is False assert "Configuration file not found" in info.error_message -def test_cli_get_config_info_invalid_yaml(connector, tmp_path): +def test_cli_get_config_info_invalid_yaml(connector: SecretsConnector, tmp_path: Path) -> None: config_file = tmp_path / "config.yaml" config_file.write_text("invalid: yaml: :") @@ -63,7 +64,7 @@ def test_cli_get_config_info_invalid_yaml(connector, tmp_path): assert "Error parsing YAML file" in info.error_message -def test_cli_get_config_info_empty_file(connector, tmp_path): +def test_cli_get_config_info_empty_file(connector: SecretsConnector, tmp_path: Path) -> None: config_file = tmp_path / "config.yaml" config_file.write_text("") @@ -73,7 +74,7 @@ def test_cli_get_config_info_empty_file(connector, tmp_path): @patch("subprocess.run") -def test_cli_run_pipeline_operation(mock_run, connector): +def test_cli_run_pipeline_operation(mock_run: MagicMock, connector: SecretsConnector) -> None: mock_run.return_value = MagicMock( returncode=0, stdout=json.dumps({"success": True, "secrets_processed": 5}), @@ -90,10 +91,12 @@ def test_cli_run_pipeline_operation(mock_run, connector): args = mock_run.call_args[0][0] assert args[1] == "pipeline" assert "--merge-only" in args + assert args.count("--output") == 1 + assert args[args.index("--output") + 1] == "json" @patch("subprocess.run") -def test_cli_run_pipeline_diff_and_format(mock_run, connector): +def test_cli_run_pipeline_diff_and_format(mock_run: MagicMock, connector: SecretsConnector) -> None: mock_run.return_value = MagicMock( returncode=0, stdout=json.dumps({"success": True, "diff_output": "some diff"}), @@ -110,12 +113,28 @@ def test_cli_run_pipeline_diff_and_format(mock_run, connector): args = mock_run.call_args[0][0] assert "--diff" in args - assert "--output" in args - assert "json" in args + assert args.count("--output") == 1 + assert args[args.index("--output") + 1] == "json" @patch("subprocess.run") -def test_cli_validate_config(mock_run, connector): +def test_cli_run_pipeline_default_output_is_json(mock_run: MagicMock, connector: SecretsConnector) -> None: + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps({"success": True}), + stderr="", + ) + + result = connector.run_pipeline("config.yaml") + + assert result.success is True + args = mock_run.call_args[0][0] + assert args.count("--output") == 1 + assert args[args.index("--output") + 1] == "json" + + +@patch("subprocess.run") +def test_cli_validate_config(mock_run: MagicMock, connector: SecretsConnector) -> None: mock_run.return_value = MagicMock( returncode=0, stdout="Valid", From 0667d587d2385ca2a8987140ba09e7d1d4ac9d9b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:23:51 -0500 Subject: [PATCH 018/287] fix: avoid unsupported secrets cli flags --- .../connectors/secrets/__init__.py | 4 ---- tests/connectors/test_secrets.py | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 1306ac4..1305355 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -387,10 +387,6 @@ def _cli_run_pipeline( cmd.append("--diff") if options.targets: cmd.extend(["--targets", ",".join(options.targets)]) - if options.continue_on_error: - cmd.append("--continue-on-error") - if options.parallelism: - cmd.extend(["--parallelism", str(options.parallelism)]) try: result = subprocess.run( diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index c443b74..c5d727f 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -131,6 +131,30 @@ def test_cli_run_pipeline_default_output_is_json(mock_run: MagicMock, connector: args = mock_run.call_args[0][0] assert args.count("--output") == 1 assert args[args.index("--output") + 1] == "json" + assert "--parallelism" not in args + assert "--continue-on-error" not in args + + +@patch("subprocess.run") +def test_cli_run_pipeline_only_emits_supported_cli_flags(mock_run: MagicMock, connector: SecretsConnector) -> None: + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps({"success": True}), + stderr="", + ) + + options = SyncOptions( + targets=["prod", "staging"], + continue_on_error=True, + parallelism=12, + ) + connector.run_pipeline("config.yaml", options) + + args = mock_run.call_args[0][0] + assert "--targets" in args + assert args[args.index("--targets") + 1] == "prod,staging" + assert "--parallelism" not in args + assert "--continue-on-error" not in args @patch("subprocess.run") From de45a5618c80c4c33a4889b2d9bb8f1b460de3ca Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:29:25 -0500 Subject: [PATCH 019/287] fix: align secrets connector cli options --- src/extended_data/connectors/secrets/__init__.py | 7 +++++-- tests/connectors/test_secrets.py | 9 +++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 1305355..887882e 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -78,8 +78,8 @@ class SyncOptions: dry_run: bool = False operation: SyncOperation = SyncOperation.PIPELINE targets: list[str] = field(default_factory=list) - continue_on_error: bool = False - parallelism: int = 4 + continue_on_error: bool = True + parallelism: int = 0 compute_diff: bool = False output_format: OutputFormat = OutputFormat.JSON @@ -387,6 +387,9 @@ def _cli_run_pipeline( cmd.append("--diff") if options.targets: cmd.extend(["--targets", ",".join(options.targets)]) + cmd.append(f"--continue-on-error={str(options.continue_on_error).lower()}") + if options.parallelism > 0: + cmd.extend(["--parallelism", str(options.parallelism)]) try: result = subprocess.run( diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index c5d727f..e647394 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -132,7 +132,7 @@ def test_cli_run_pipeline_default_output_is_json(mock_run: MagicMock, connector: assert args.count("--output") == 1 assert args[args.index("--output") + 1] == "json" assert "--parallelism" not in args - assert "--continue-on-error" not in args + assert "--continue-on-error=true" in args @patch("subprocess.run") @@ -145,7 +145,7 @@ def test_cli_run_pipeline_only_emits_supported_cli_flags(mock_run: MagicMock, co options = SyncOptions( targets=["prod", "staging"], - continue_on_error=True, + continue_on_error=False, parallelism=12, ) connector.run_pipeline("config.yaml", options) @@ -153,8 +153,9 @@ def test_cli_run_pipeline_only_emits_supported_cli_flags(mock_run: MagicMock, co args = mock_run.call_args[0][0] assert "--targets" in args assert args[args.index("--targets") + 1] == "prod,staging" - assert "--parallelism" not in args - assert "--continue-on-error" not in args + assert "--parallelism" in args + assert args[args.index("--parallelism") + 1] == "12" + assert "--continue-on-error=false" in args @patch("subprocess.run") From f2fc6acc4acba7491b8d7b704e8783caa00e104a Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:37:02 -0500 Subject: [PATCH 020/287] fix: remove unsafe crewai extra --- README.md | 6 +- docs/package-surface.md | 6 +- examples/connectors/README.md | 5 +- pyproject.toml | 8 - src/extended_data/connectors/_optional.py | 10 +- src/extended_data/connectors/aws/tools.py | 2 +- src/extended_data/connectors/cli.py | 5 +- src/extended_data/connectors/google/tools.py | 2 +- src/extended_data/connectors/meshy/tools.py | 2 +- src/extended_data/connectors/slack/tools.py | 2 +- tests/connectors/test_cli.py | 14 +- uv.lock | 1811 +----------------- 12 files changed, 43 insertions(+), 1830 deletions(-) diff --git a/README.md b/README.md index c89fd9c..25c5c5f 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,15 @@ Optional integrations are installed by feature: ```bash pip install "extended-data[aws,github,vault]" pip install "extended-data[google,slack,zoom]" -pip install "extended-data[ai]" +pip install "extended-data[ai]" # LangChain, MCP, and Strands pip install "extended-data[meshy,mcp]" pip install "extended-data[secrets]" ``` +CrewAI adapters remain available when `crewai` is installed independently, but +`extended-data` intentionally does not publish a CrewAI extra while current +CrewAI releases pull vulnerable `chromadb` versions transitively. + ## Usage ```python diff --git a/docs/package-surface.md b/docs/package-surface.md index 7062597..408aed1 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -115,10 +115,14 @@ Install only the vendor or AI layers you need: ```bash pip install "extended-data[aws,github,vault]" pip install "extended-data[google,slack,zoom]" -pip install "extended-data[ai]" +pip install "extended-data[ai]" # LangChain, MCP, and Strands pip install "extended-data[meshy,mcp]" ``` +CrewAI tool adapters are still importable when users install `crewai` directly, +but `extended-data` does not expose a CrewAI extra while current CrewAI +dependency trees pull vulnerable `chromadb` releases. + Optional dependency checks live in `extended_data.connectors._optional`; there are no old package compatibility shims in the public API. When a known built-in connector is requested without its optional extra installed, the registry raises diff --git a/examples/connectors/README.md b/examples/connectors/README.md index 351b95f..044ab42 100644 --- a/examples/connectors/README.md +++ b/examples/connectors/README.md @@ -16,7 +16,10 @@ pip install "extended-data[aws,google,meshy]" # For AI framework integration pip install "extended-data[langchain]" -pip install "extended-data[crewai]" + +# CrewAI adapters require a user-managed CrewAI install. extended-data does not +# currently publish a CrewAI extra because current CrewAI releases pull +# vulnerable chromadb versions transitively. # For the Meshy MCP server pip install "extended-data[meshy,mcp]" diff --git a/pyproject.toml b/pyproject.toml index 85806cd..55cbb41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,19 +90,13 @@ langchain = [ "langchain-core>=1.3.0", "langsmith>=0.7.33", ] -crewai = [ - "crewai[tools]>=1.14.2rc1", - "uv>=0.11.7", -] strands = ["strands-agents>=1.36.0"] mcp = ["mcp>=1.26.0,<1.27.dev0"] ai = [ - "crewai[tools]>=1.14.2rc1", "langchain-core>=1.3.0", "langsmith>=0.7.33", "mcp>=1.26.0,<1.27.dev0", "strands-agents>=1.36.0", - "uv>=0.11.7", ] webhooks = [ "fastapi>=0.136.0", @@ -136,7 +130,6 @@ dev = [ all = [ "anthropic>=0.96.0", "boto3>=1.42.92", - "crewai[tools]>=1.14.2rc1", "fastapi>=0.136.0", "filelock>=3.29.0", "google-api-python-client>=2.194.0", @@ -154,7 +147,6 @@ all = [ "slack-sdk>=3.41.0", "sqlite-vec>=0.1.9", "strands-agents>=1.36.0", - "uv>=0.11.7", "uvicorn>=0.45.0", ] diff --git a/src/extended_data/connectors/_optional.py b/src/extended_data/connectors/_optional.py index 0647f45..8104c0c 100644 --- a/src/extended_data/connectors/_optional.py +++ b/src/extended_data/connectors/_optional.py @@ -39,7 +39,6 @@ # AI frameworks "langchain_core": "langchain", "langchain": "langchain", - "crewai": "crewai", "strands": "strands", "mcp": "mcp", # Features @@ -52,6 +51,13 @@ # Cache for import checks _import_cache: dict[str, bool] = {} +PACKAGE_INSTALL_HINTS: dict[str, str] = { + "crewai": ( + "Install CrewAI separately after reviewing its dependency tree; extended-data does not publish a " + "CrewAI extra while current CrewAI releases pull vulnerable chromadb versions." + ), +} + def is_available(package: str) -> bool: """Check if a package is available for import. @@ -102,6 +108,8 @@ def require_extra(package: str, extra: str | None = None) -> Any: try: return importlib.import_module(package) except ImportError as e: + if package in PACKAGE_INSTALL_HINTS: + raise ImportError(f"Package '{package}' is required but not installed.\n{PACKAGE_INSTALL_HINTS[package]}") from e extra_name = extra or get_extra_for_package(package) or package raise ImportError( f"Package '{package}' is required but not installed.\n" diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index 841a6c7..234d2dd 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -349,7 +349,7 @@ def get_crewai_tools() -> list[Any]: try: from crewai.tools import tool as crewai_tool except ImportError as e: - msg = "crewai is required for CrewAI tools.\nInstall with: pip install extended-data[crewai]" + msg = "crewai is required for CrewAI tools. Install CrewAI separately; extended-data does not install it." raise ImportError(msg) from e tools = [] diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index f3f37d9..e39b0e7 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -167,9 +167,8 @@ def cmd_methods(args: argparse.Namespace) -> int: if not callable(attr) or isinstance(attr, type): continue - # Get first line of docstring - if attr.__doc__: - attr.__doc__.split("\n")[0].strip()[:50] + doc = attr.__doc__.split("\n")[0].strip()[:50] if attr.__doc__ else "No description" + _write_stdout(f" {name:<30} {doc}") return 0 diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index 3c93bea..a7f2ee7 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -380,7 +380,7 @@ def get_crewai_tools() -> list[Any]: try: from crewai.tools import tool as crewai_tool except ImportError as e: - msg = "crewai is required for CrewAI tools.\nInstall with: pip install extended-data[crewai]" + msg = "crewai is required for CrewAI tools. Install CrewAI separately; extended-data does not install it." raise ImportError(msg) from e tools = [] diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index 59acb2f..134727c 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -554,7 +554,7 @@ def get_crewai_tools() -> list[Any]: try: from crewai.tools import tool as crewai_tool except ImportError as e: - msg = "crewai is required for CrewAI tools.\nInstall with: pip install extended-data[crewai]" + msg = "crewai is required for CrewAI tools. Install CrewAI separately; extended-data does not install it." raise ImportError(msg) from e tools = [] diff --git a/src/extended_data/connectors/slack/tools.py b/src/extended_data/connectors/slack/tools.py index 9588cbf..222c0ad 100644 --- a/src/extended_data/connectors/slack/tools.py +++ b/src/extended_data/connectors/slack/tools.py @@ -330,7 +330,7 @@ def get_crewai_tools() -> list[Any]: try: from crewai.tools import tool as crewai_tool except ImportError as e: - msg = "crewai is required for CrewAI tools.\nInstall with: pip install extended-data[crewai]" + msg = "crewai is required for CrewAI tools. Install CrewAI separately; extended-data does not install it." raise ImportError(msg) from e tools = [] diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index e3d1622..811cea5 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -8,7 +8,7 @@ import pytest -from extended_data.connectors.cli import cmd_info, cmd_list, main +from extended_data.connectors.cli import cmd_info, cmd_list, cmd_methods, main def test_cli_list(): @@ -48,6 +48,18 @@ def test_cli_info(): assert "install: pip install extended-data[github]" in output +def test_cli_methods_lists_public_methods(): + """Methods command prints public callable methods with descriptions.""" + args = argparse.Namespace(connector="meshy") + with patch("sys.stdout.write") as mock_write: + exit_code = cmd_methods(args) + + assert exit_code == 0 + output = "".join(call.args[0] for call in mock_write.call_args_list if call.args) + assert "request_data" in output + assert "Decode an HTTP response body" in output + + def test_cli_main_help(): """Test main CLI entry point with help.""" with patch("sys.argv", ["extended-data", "--help"]): diff --git a/uv.lock b/uv.lock index 54af2ba..367b146 100644 --- a/uv.lock +++ b/uv.lock @@ -9,15 +9,6 @@ resolution-markers = [ "python_full_version < '3.11'", ] -[[package]] -name = "aiofiles" -version = "24.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/03/a88171e277e8caa88a4c77808c20ebb04ba74cc4681bf1e9416c862de237/aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c", size = 30247, upload-time = "2024-06-24T11:02:03.584Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/45/30bb92d442636f570cb5651bc661f52b610e2eec3f891a5dc3a4c3667db0/aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5", size = 15896, upload-time = "2024-06-24T11:02:01.529Z" }, -] - [[package]] name = "aiohappyeyeballs" version = "2.6.2" @@ -177,18 +168,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, ] -[[package]] -name = "aiosqlite" -version = "0.21.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/13/7d/8bca2bf9a247c2c5dfeec1d7a5f40db6518f88d314b8bca9da29670d2671/aiosqlite-0.21.0.tar.gz", hash = "sha256:131bb8056daa3bc875608c631c678cda73922a2d4ba8aec373b19f18c17e7aa3", size = 13454, upload-time = "2025-02-03T07:30:16.235Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792, upload-time = "2025-02-03T07:30:13.6Z" }, -] - [[package]] name = "annotated-doc" version = "0.0.4" @@ -240,15 +219,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, ] -[[package]] -name = "appdirs" -version = "1.4.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470, upload-time = "2020-05-11T07:59:51.037Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566, upload-time = "2020-05-11T07:59:49.499Z" }, -] - [[package]] name = "ast-serialize" version = "0.5.0" @@ -307,15 +277,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, ] -[[package]] -name = "backoff" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, -] - [[package]] name = "backports-asyncio-runner" version = "1.2.0" @@ -325,89 +286,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" }, ] -[[package]] -name = "bcrypt" -version = "5.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d4/36/3329e2518d70ad8e2e5817d5a4cac6bba05a47767ec416c7d020a965f408/bcrypt-5.0.0.tar.gz", hash = "sha256:f748f7c2d6fd375cc93d3fba7ef4a9e3a092421b8dbf34d8d4dc06be9492dfdd", size = 25386, upload-time = "2025-09-25T19:50:47.829Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/85/3e65e01985fddf25b64ca67275bb5bdb4040bd1a53b66d355c6c37c8a680/bcrypt-5.0.0-cp313-cp313t-macosx_10_12_universal2.whl", hash = "sha256:f3c08197f3039bec79cee59a606d62b96b16669cff3949f21e74796b6e3cd2be", size = 481806, upload-time = "2025-09-25T19:49:05.102Z" }, - { url = "https://files.pythonhosted.org/packages/44/dc/01eb79f12b177017a726cbf78330eb0eb442fae0e7b3dfd84ea2849552f3/bcrypt-5.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:200af71bc25f22006f4069060c88ed36f8aa4ff7f53e67ff04d2ab3f1e79a5b2", size = 268626, upload-time = "2025-09-25T19:49:06.723Z" }, - { url = "https://files.pythonhosted.org/packages/8c/cf/e82388ad5959c40d6afd94fb4743cc077129d45b952d46bdc3180310e2df/bcrypt-5.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:baade0a5657654c2984468efb7d6c110db87ea63ef5a4b54732e7e337253e44f", size = 271853, upload-time = "2025-09-25T19:49:08.028Z" }, - { url = "https://files.pythonhosted.org/packages/ec/86/7134b9dae7cf0efa85671651341f6afa695857fae172615e960fb6a466fa/bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c58b56cdfb03202b3bcc9fd8daee8e8e9b6d7e3163aa97c631dfcfcc24d36c86", size = 269793, upload-time = "2025-09-25T19:49:09.727Z" }, - { url = "https://files.pythonhosted.org/packages/cc/82/6296688ac1b9e503d034e7d0614d56e80c5d1a08402ff856a4549cb59207/bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4bfd2a34de661f34d0bda43c3e4e79df586e4716ef401fe31ea39d69d581ef23", size = 289930, upload-time = "2025-09-25T19:49:11.204Z" }, - { url = "https://files.pythonhosted.org/packages/d1/18/884a44aa47f2a3b88dd09bc05a1e40b57878ecd111d17e5bba6f09f8bb77/bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ed2e1365e31fc73f1825fa830f1c8f8917ca1b3ca6185773b349c20fd606cec2", size = 272194, upload-time = "2025-09-25T19:49:12.524Z" }, - { url = "https://files.pythonhosted.org/packages/0e/8f/371a3ab33c6982070b674f1788e05b656cfbf5685894acbfef0c65483a59/bcrypt-5.0.0-cp313-cp313t-manylinux_2_34_aarch64.whl", hash = "sha256:83e787d7a84dbbfba6f250dd7a5efd689e935f03dd83b0f919d39349e1f23f83", size = 269381, upload-time = "2025-09-25T19:49:14.308Z" }, - { url = "https://files.pythonhosted.org/packages/b1/34/7e4e6abb7a8778db6422e88b1f06eb07c47682313997ee8a8f9352e5a6f1/bcrypt-5.0.0-cp313-cp313t-manylinux_2_34_x86_64.whl", hash = "sha256:137c5156524328a24b9fac1cb5db0ba618bc97d11970b39184c1d87dc4bf1746", size = 271750, upload-time = "2025-09-25T19:49:15.584Z" }, - { url = "https://files.pythonhosted.org/packages/c0/1b/54f416be2499bd72123c70d98d36c6cd61a4e33d9b89562c22481c81bb30/bcrypt-5.0.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:38cac74101777a6a7d3b3e3cfefa57089b5ada650dce2baf0cbdd9d65db22a9e", size = 303757, upload-time = "2025-09-25T19:49:17.244Z" }, - { url = "https://files.pythonhosted.org/packages/13/62/062c24c7bcf9d2826a1a843d0d605c65a755bc98002923d01fd61270705a/bcrypt-5.0.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:d8d65b564ec849643d9f7ea05c6d9f0cd7ca23bdd4ac0c2dbef1104ab504543d", size = 306740, upload-time = "2025-09-25T19:49:18.693Z" }, - { url = "https://files.pythonhosted.org/packages/d5/c8/1fdbfc8c0f20875b6b4020f3c7dc447b8de60aa0be5faaf009d24242aec9/bcrypt-5.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:741449132f64b3524e95cd30e5cd3343006ce146088f074f31ab26b94e6c75ba", size = 334197, upload-time = "2025-09-25T19:49:20.523Z" }, - { url = "https://files.pythonhosted.org/packages/a6/c1/8b84545382d75bef226fbc6588af0f7b7d095f7cd6a670b42a86243183cd/bcrypt-5.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:212139484ab3207b1f0c00633d3be92fef3c5f0af17cad155679d03ff2ee1e41", size = 352974, upload-time = "2025-09-25T19:49:22.254Z" }, - { url = "https://files.pythonhosted.org/packages/10/a6/ffb49d4254ed085e62e3e5dd05982b4393e32fe1e49bb1130186617c29cd/bcrypt-5.0.0-cp313-cp313t-win32.whl", hash = "sha256:9d52ed507c2488eddd6a95bccee4e808d3234fa78dd370e24bac65a21212b861", size = 148498, upload-time = "2025-09-25T19:49:24.134Z" }, - { url = "https://files.pythonhosted.org/packages/48/a9/259559edc85258b6d5fc5471a62a3299a6aa37a6611a169756bf4689323c/bcrypt-5.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f6984a24db30548fd39a44360532898c33528b74aedf81c26cf29c51ee47057e", size = 145853, upload-time = "2025-09-25T19:49:25.702Z" }, - { url = "https://files.pythonhosted.org/packages/2d/df/9714173403c7e8b245acf8e4be8876aac64a209d1b392af457c79e60492e/bcrypt-5.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:9fffdb387abe6aa775af36ef16f55e318dcda4194ddbf82007a6f21da29de8f5", size = 139626, upload-time = "2025-09-25T19:49:26.928Z" }, - { url = "https://files.pythonhosted.org/packages/f8/14/c18006f91816606a4abe294ccc5d1e6f0e42304df5a33710e9e8e95416e1/bcrypt-5.0.0-cp314-cp314t-macosx_10_12_universal2.whl", hash = "sha256:4870a52610537037adb382444fefd3706d96d663ac44cbb2f37e3919dca3d7ef", size = 481862, upload-time = "2025-09-25T19:49:28.365Z" }, - { url = "https://files.pythonhosted.org/packages/67/49/dd074d831f00e589537e07a0725cf0e220d1f0d5d8e85ad5bbff251c45aa/bcrypt-5.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48f753100931605686f74e27a7b49238122aa761a9aefe9373265b8b7aa43ea4", size = 268544, upload-time = "2025-09-25T19:49:30.39Z" }, - { url = "https://files.pythonhosted.org/packages/f5/91/50ccba088b8c474545b034a1424d05195d9fcbaaf802ab8bfe2be5a4e0d7/bcrypt-5.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f70aadb7a809305226daedf75d90379c397b094755a710d7014b8b117df1ebbf", size = 271787, upload-time = "2025-09-25T19:49:32.144Z" }, - { url = "https://files.pythonhosted.org/packages/aa/e7/d7dba133e02abcda3b52087a7eea8c0d4f64d3e593b4fffc10c31b7061f3/bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:744d3c6b164caa658adcb72cb8cc9ad9b4b75c7db507ab4bc2480474a51989da", size = 269753, upload-time = "2025-09-25T19:49:33.885Z" }, - { url = "https://files.pythonhosted.org/packages/33/fc/5b145673c4b8d01018307b5c2c1fc87a6f5a436f0ad56607aee389de8ee3/bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a28bc05039bdf3289d757f49d616ab3efe8cf40d8e8001ccdd621cd4f98f4fc9", size = 289587, upload-time = "2025-09-25T19:49:35.144Z" }, - { url = "https://files.pythonhosted.org/packages/27/d7/1ff22703ec6d4f90e62f1a5654b8867ef96bafb8e8102c2288333e1a6ca6/bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7f277a4b3390ab4bebe597800a90da0edae882c6196d3038a73adf446c4f969f", size = 272178, upload-time = "2025-09-25T19:49:36.793Z" }, - { url = "https://files.pythonhosted.org/packages/c8/88/815b6d558a1e4d40ece04a2f84865b0fef233513bd85fd0e40c294272d62/bcrypt-5.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:79cfa161eda8d2ddf29acad370356b47f02387153b11d46042e93a0a95127493", size = 269295, upload-time = "2025-09-25T19:49:38.164Z" }, - { url = "https://files.pythonhosted.org/packages/51/8c/e0db387c79ab4931fc89827d37608c31cc57b6edc08ccd2386139028dc0d/bcrypt-5.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a5393eae5722bcef046a990b84dff02b954904c36a194f6cfc817d7dca6c6f0b", size = 271700, upload-time = "2025-09-25T19:49:39.917Z" }, - { url = "https://files.pythonhosted.org/packages/06/83/1570edddd150f572dbe9fc00f6203a89fc7d4226821f67328a85c330f239/bcrypt-5.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7f4c94dec1b5ab5d522750cb059bb9409ea8872d4494fd152b53cca99f1ddd8c", size = 334034, upload-time = "2025-09-25T19:49:41.227Z" }, - { url = "https://files.pythonhosted.org/packages/c9/f2/ea64e51a65e56ae7a8a4ec236c2bfbdd4b23008abd50ac33fbb2d1d15424/bcrypt-5.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0cae4cb350934dfd74c020525eeae0a5f79257e8a201c0c176f4b84fdbf2a4b4", size = 352766, upload-time = "2025-09-25T19:49:43.08Z" }, - { url = "https://files.pythonhosted.org/packages/d7/d4/1a388d21ee66876f27d1a1f41287897d0c0f1712ef97d395d708ba93004c/bcrypt-5.0.0-cp314-cp314t-win32.whl", hash = "sha256:b17366316c654e1ad0306a6858e189fc835eca39f7eb2cafd6aaca8ce0c40a2e", size = 152449, upload-time = "2025-09-25T19:49:44.971Z" }, - { url = "https://files.pythonhosted.org/packages/3f/61/3291c2243ae0229e5bca5d19f4032cecad5dfb05a2557169d3a69dc0ba91/bcrypt-5.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:92864f54fb48b4c718fc92a32825d0e42265a627f956bc0361fe869f1adc3e7d", size = 149310, upload-time = "2025-09-25T19:49:46.162Z" }, - { url = "https://files.pythonhosted.org/packages/3e/89/4b01c52ae0c1a681d4021e5dd3e45b111a8fb47254a274fa9a378d8d834b/bcrypt-5.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dd19cf5184a90c873009244586396a6a884d591a5323f0e8a5922560718d4993", size = 143761, upload-time = "2025-09-25T19:49:47.345Z" }, - { url = "https://files.pythonhosted.org/packages/84/29/6237f151fbfe295fe3e074ecc6d44228faa1e842a81f6d34a02937ee1736/bcrypt-5.0.0-cp38-abi3-macosx_10_12_universal2.whl", hash = "sha256:fc746432b951e92b58317af8e0ca746efe93e66555f1b40888865ef5bf56446b", size = 494553, upload-time = "2025-09-25T19:49:49.006Z" }, - { url = "https://files.pythonhosted.org/packages/45/b6/4c1205dde5e464ea3bd88e8742e19f899c16fa8916fb8510a851fae985b5/bcrypt-5.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c2388ca94ffee269b6038d48747f4ce8df0ffbea43f31abfa18ac72f0218effb", size = 275009, upload-time = "2025-09-25T19:49:50.581Z" }, - { url = "https://files.pythonhosted.org/packages/3b/71/427945e6ead72ccffe77894b2655b695ccf14ae1866cd977e185d606dd2f/bcrypt-5.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:560ddb6ec730386e7b3b26b8b4c88197aaed924430e7b74666a586ac997249ef", size = 278029, upload-time = "2025-09-25T19:49:52.533Z" }, - { url = "https://files.pythonhosted.org/packages/17/72/c344825e3b83c5389a369c8a8e58ffe1480b8a699f46c127c34580c4666b/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d79e5c65dcc9af213594d6f7f1fa2c98ad3fc10431e7aa53c176b441943efbdd", size = 275907, upload-time = "2025-09-25T19:49:54.709Z" }, - { url = "https://files.pythonhosted.org/packages/0b/7e/d4e47d2df1641a36d1212e5c0514f5291e1a956a7749f1e595c07a972038/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2b732e7d388fa22d48920baa267ba5d97cca38070b69c0e2d37087b381c681fd", size = 296500, upload-time = "2025-09-25T19:49:56.013Z" }, - { url = "https://files.pythonhosted.org/packages/0f/c3/0ae57a68be2039287ec28bc463b82e4b8dc23f9d12c0be331f4782e19108/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0c8e093ea2532601a6f686edbc2c6b2ec24131ff5c52f7610dd64fa4553b5464", size = 278412, upload-time = "2025-09-25T19:49:57.356Z" }, - { url = "https://files.pythonhosted.org/packages/45/2b/77424511adb11e6a99e3a00dcc7745034bee89036ad7d7e255a7e47be7d8/bcrypt-5.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5b1589f4839a0899c146e8892efe320c0fa096568abd9b95593efac50a87cb75", size = 275486, upload-time = "2025-09-25T19:49:59.116Z" }, - { url = "https://files.pythonhosted.org/packages/43/0a/405c753f6158e0f3f14b00b462d8bca31296f7ecfc8fc8bc7919c0c7d73a/bcrypt-5.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:89042e61b5e808b67daf24a434d89bab164d4de1746b37a8d173b6b14f3db9ff", size = 277940, upload-time = "2025-09-25T19:50:00.869Z" }, - { url = "https://files.pythonhosted.org/packages/62/83/b3efc285d4aadc1fa83db385ec64dcfa1707e890eb42f03b127d66ac1b7b/bcrypt-5.0.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e3cf5b2560c7b5a142286f69bde914494b6d8f901aaa71e453078388a50881c4", size = 310776, upload-time = "2025-09-25T19:50:02.393Z" }, - { url = "https://files.pythonhosted.org/packages/95/7d/47ee337dacecde6d234890fe929936cb03ebc4c3a7460854bbd9c97780b8/bcrypt-5.0.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f632fd56fc4e61564f78b46a2269153122db34988e78b6be8b32d28507b7eaeb", size = 312922, upload-time = "2025-09-25T19:50:04.232Z" }, - { url = "https://files.pythonhosted.org/packages/d6/3a/43d494dfb728f55f4e1cf8fd435d50c16a2d75493225b54c8d06122523c6/bcrypt-5.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:801cad5ccb6b87d1b430f183269b94c24f248dddbbc5c1f78b6ed231743e001c", size = 341367, upload-time = "2025-09-25T19:50:05.559Z" }, - { url = "https://files.pythonhosted.org/packages/55/ab/a0727a4547e383e2e22a630e0f908113db37904f58719dc48d4622139b5c/bcrypt-5.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3cf67a804fc66fc217e6914a5635000259fbbbb12e78a99488e4d5ba445a71eb", size = 359187, upload-time = "2025-09-25T19:50:06.916Z" }, - { url = "https://files.pythonhosted.org/packages/1b/bb/461f352fdca663524b4643d8b09e8435b4990f17fbf4fea6bc2a90aa0cc7/bcrypt-5.0.0-cp38-abi3-win32.whl", hash = "sha256:3abeb543874b2c0524ff40c57a4e14e5d3a66ff33fb423529c88f180fd756538", size = 153752, upload-time = "2025-09-25T19:50:08.515Z" }, - { url = "https://files.pythonhosted.org/packages/41/aa/4190e60921927b7056820291f56fc57d00d04757c8b316b2d3c0d1d6da2c/bcrypt-5.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:35a77ec55b541e5e583eb3436ffbbf53b0ffa1fa16ca6782279daf95d146dcd9", size = 150881, upload-time = "2025-09-25T19:50:09.742Z" }, - { url = "https://files.pythonhosted.org/packages/54/12/cd77221719d0b39ac0b55dbd39358db1cd1246e0282e104366ebbfb8266a/bcrypt-5.0.0-cp38-abi3-win_arm64.whl", hash = "sha256:cde08734f12c6a4e28dc6755cd11d3bdfea608d93d958fffbe95a7026ebe4980", size = 144931, upload-time = "2025-09-25T19:50:11.016Z" }, - { url = "https://files.pythonhosted.org/packages/5d/ba/2af136406e1c3839aea9ecadc2f6be2bcd1eff255bd451dd39bcf302c47a/bcrypt-5.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0c418ca99fd47e9c59a301744d63328f17798b5947b0f791e9af3c1c499c2d0a", size = 495313, upload-time = "2025-09-25T19:50:12.309Z" }, - { url = "https://files.pythonhosted.org/packages/ac/ee/2f4985dbad090ace5ad1f7dd8ff94477fe089b5fab2040bd784a3d5f187b/bcrypt-5.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddb4e1500f6efdd402218ffe34d040a1196c072e07929b9820f363a1fd1f4191", size = 275290, upload-time = "2025-09-25T19:50:13.673Z" }, - { url = "https://files.pythonhosted.org/packages/e4/6e/b77ade812672d15cf50842e167eead80ac3514f3beacac8902915417f8b7/bcrypt-5.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7aeef54b60ceddb6f30ee3db090351ecf0d40ec6e2abf41430997407a46d2254", size = 278253, upload-time = "2025-09-25T19:50:15.089Z" }, - { url = "https://files.pythonhosted.org/packages/36/c4/ed00ed32f1040f7990dac7115f82273e3c03da1e1a1587a778d8cea496d8/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f0ce778135f60799d89c9693b9b398819d15f1921ba15fe719acb3178215a7db", size = 276084, upload-time = "2025-09-25T19:50:16.699Z" }, - { url = "https://files.pythonhosted.org/packages/e7/c4/fa6e16145e145e87f1fa351bbd54b429354fd72145cd3d4e0c5157cf4c70/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a71f70ee269671460b37a449f5ff26982a6f2ba493b3eabdd687b4bf35f875ac", size = 297185, upload-time = "2025-09-25T19:50:18.525Z" }, - { url = "https://files.pythonhosted.org/packages/24/b4/11f8a31d8b67cca3371e046db49baa7c0594d71eb40ac8121e2fc0888db0/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8429e1c410b4073944f03bd778a9e066e7fad723564a52ff91841d278dfc822", size = 278656, upload-time = "2025-09-25T19:50:19.809Z" }, - { url = "https://files.pythonhosted.org/packages/ac/31/79f11865f8078e192847d2cb526e3fa27c200933c982c5b2869720fa5fce/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:edfcdcedd0d0f05850c52ba3127b1fce70b9f89e0fe5ff16517df7e81fa3cbb8", size = 275662, upload-time = "2025-09-25T19:50:21.567Z" }, - { url = "https://files.pythonhosted.org/packages/d4/8d/5e43d9584b3b3591a6f9b68f755a4da879a59712981ef5ad2a0ac1379f7a/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:611f0a17aa4a25a69362dcc299fda5c8a3d4f160e2abb3831041feb77393a14a", size = 278240, upload-time = "2025-09-25T19:50:23.305Z" }, - { url = "https://files.pythonhosted.org/packages/89/48/44590e3fc158620f680a978aafe8f87a4c4320da81ed11552f0323aa9a57/bcrypt-5.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:db99dca3b1fdc3db87d7c57eac0c82281242d1eabf19dcb8a6b10eb29a2e72d1", size = 311152, upload-time = "2025-09-25T19:50:24.597Z" }, - { url = "https://files.pythonhosted.org/packages/5f/85/e4fbfc46f14f47b0d20493669a625da5827d07e8a88ee460af6cd9768b44/bcrypt-5.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:5feebf85a9cefda32966d8171f5db7e3ba964b77fdfe31919622256f80f9cf42", size = 313284, upload-time = "2025-09-25T19:50:26.268Z" }, - { url = "https://files.pythonhosted.org/packages/25/ae/479f81d3f4594456a01ea2f05b132a519eff9ab5768a70430fa1132384b1/bcrypt-5.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3ca8a166b1140436e058298a34d88032ab62f15aae1c598580333dc21d27ef10", size = 341643, upload-time = "2025-09-25T19:50:28.02Z" }, - { url = "https://files.pythonhosted.org/packages/df/d2/36a086dee1473b14276cd6ea7f61aef3b2648710b5d7f1c9e032c29b859f/bcrypt-5.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61afc381250c3182d9078551e3ac3a41da14154fbff647ddf52a769f588c4172", size = 359698, upload-time = "2025-09-25T19:50:31.347Z" }, - { url = "https://files.pythonhosted.org/packages/c0/f6/688d2cd64bfd0b14d805ddb8a565e11ca1fb0fd6817175d58b10052b6d88/bcrypt-5.0.0-cp39-abi3-win32.whl", hash = "sha256:64d7ce196203e468c457c37ec22390f1a61c85c6f0b8160fd752940ccfb3a683", size = 153725, upload-time = "2025-09-25T19:50:34.384Z" }, - { url = "https://files.pythonhosted.org/packages/9f/b9/9d9a641194a730bda138b3dfe53f584d61c58cd5230e37566e83ec2ffa0d/bcrypt-5.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:64ee8434b0da054d830fa8e89e1c8bf30061d539044a39524ff7dec90481e5c2", size = 150912, upload-time = "2025-09-25T19:50:35.69Z" }, - { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" }, - { url = "https://files.pythonhosted.org/packages/8a/75/4aa9f5a4d40d762892066ba1046000b329c7cd58e888a6db878019b282dc/bcrypt-5.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7edda91d5ab52b15636d9c30da87d2cc84f426c72b9dba7a9b4fe142ba11f534", size = 271180, upload-time = "2025-09-25T19:50:38.575Z" }, - { url = "https://files.pythonhosted.org/packages/54/79/875f9558179573d40a9cc743038ac2bf67dfb79cecb1e8b5d70e88c94c3d/bcrypt-5.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:046ad6db88edb3c5ece4369af997938fb1c19d6a699b9c1b27b0db432faae4c4", size = 273791, upload-time = "2025-09-25T19:50:39.913Z" }, - { url = "https://files.pythonhosted.org/packages/bc/fe/975adb8c216174bf70fc17535f75e85ac06ed5252ea077be10d9cff5ce24/bcrypt-5.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:dcd58e2b3a908b5ecc9b9df2f0085592506ac2d5110786018ee5e160f28e0911", size = 270746, upload-time = "2025-09-25T19:50:43.306Z" }, - { url = "https://files.pythonhosted.org/packages/e4/f8/972c96f5a2b6c4b3deca57009d93e946bbdbe2241dca9806d502f29dd3ee/bcrypt-5.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:6b8f520b61e8781efee73cba14e3e8c9556ccfb375623f4f97429544734545b4", size = 273375, upload-time = "2025-09-25T19:50:45.43Z" }, -] - -[[package]] -name = "beautifulsoup4" -version = "4.13.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "soupsieve" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/85/2e/3e5079847e653b1f6dc647aa24549d68c6addb4c595cc0d902d1b19308ad/beautifulsoup4-4.13.5.tar.gz", hash = "sha256:5e70131382930e7c3de33450a2f54a63d5e4b19386eab43a5b34d594268f3695", size = 622954, upload-time = "2025-08-24T14:06:13.168Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/eb/f4151e0c7377a6e08a38108609ba5cede57986802757848688aeedd1b9e8/beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a", size = 105113, upload-time = "2025-08-24T14:06:14.884Z" }, -] - [[package]] name = "boto3" version = "1.43.26" @@ -436,22 +314,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/e6/5a5ec1033613e7812e5b19ec8c2a1889834fde336d8812d53019eac6e04a/botocore-1.43.26-py3-none-any.whl", hash = "sha256:eeb92265bae289555182a46341c998a656ab49c0dbdb762c65b30fe354fcc9e8", size = 15183593, upload-time = "2026-06-09T19:34:03.012Z" }, ] -[[package]] -name = "build" -version = "1.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "os_name == 'nt'" }, - { name = "importlib-metadata", marker = "python_full_version < '3.10.2'" }, - { name = "packaging" }, - { name = "pyproject-hooks" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/78/e0/df5e171f685f82f37b12e1f208064e24244911079d7b767447d1af7e0d70/build-1.5.0.tar.gz", hash = "sha256:302c22c3ba2a0fd5f3911918651341ebb3896176cbdec15bd421f80b1afc7647", size = 89796, upload-time = "2026-04-30T03:18:25.17Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/fe/6bea5c9162869c5beba5d9c8abbed835ec85bf1ec1fba05a3822325c45f3/build-1.5.0-py3-none-any.whl", hash = "sha256:13f3eecb844759ab66efec90ca17639bbf14dc06cb2fdf37a9010322d9c50a6f", size = 26018, upload-time = "2026-04-30T03:18:23.644Z" }, -] - [[package]] name = "case-insensitive-dictionary" version = "0.2.1" @@ -657,50 +519,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" }, ] -[[package]] -name = "chromadb" -version = "1.1.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "bcrypt" }, - { name = "build" }, - { name = "grpcio" }, - { name = "httpx" }, - { name = "importlib-resources" }, - { name = "jsonschema" }, - { name = "kubernetes" }, - { name = "mmh3" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "onnxruntime", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "onnxruntime", version = "1.26.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-grpc" }, - { name = "opentelemetry-sdk" }, - { name = "orjson" }, - { name = "overrides" }, - { name = "posthog" }, - { name = "pybase64" }, - { name = "pydantic" }, - { name = "pypika" }, - { name = "pyyaml" }, - { name = "rich" }, - { name = "tenacity" }, - { name = "tokenizers" }, - { name = "tqdm" }, - { name = "typer" }, - { name = "typing-extensions" }, - { name = "uvicorn", extra = ["standard"] }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7f/48/11851dddeadad6abe36ee071fedc99b5bdd2c324df3afa8cb952ae02798b/chromadb-1.1.1.tar.gz", hash = "sha256:ebfce0122753e306a76f1e291d4ddaebe5f01b5979b97ae0bc80b1d4024ff223", size = 1338109, upload-time = "2025-10-05T02:49:14.834Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/39/59/0d881a9b7eb63d8d2446cf67fcbb53fb8ae34991759d2b6024a067e90a9a/chromadb-1.1.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:27fe0e25ef0f83fb09c30355ab084fe6f246808a7ea29e8c19e85cf45785b90d", size = 19175479, upload-time = "2025-10-05T02:49:12.525Z" }, - { url = "https://files.pythonhosted.org/packages/94/4f/5a9fa317c84c98e70af48f74b00aa25589626c03a0428b4381b2095f3d73/chromadb-1.1.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:95aed58869683f12e7dcbf68b039fe5f576dbe9d1b86b8f4d014c9d077ccafd2", size = 18267188, upload-time = "2025-10-05T02:49:09.236Z" }, - { url = "https://files.pythonhosted.org/packages/45/1a/02defe2f1c8d1daedb084bbe85f5b6083510a3ba192ed57797a3649a4310/chromadb-1.1.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06776dad41389a00e7d63d936c3a15c179d502becaf99f75745ee11b062c9b6a", size = 18855754, upload-time = "2025-10-05T02:49:03.299Z" }, - { url = "https://files.pythonhosted.org/packages/5a/0d/80be82717e5dc19839af24558494811b6f2af2b261a8f21c51b872193b09/chromadb-1.1.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bba0096a7f5e975875ead23a91c0d41d977fbd3767f60d3305a011b0ace7afd3", size = 19893681, upload-time = "2025-10-05T02:49:06.481Z" }, - { url = "https://files.pythonhosted.org/packages/2d/6e/956e62975305a4e31daf6114a73b3b0683a8f36f8d70b20aabd466770edb/chromadb-1.1.1-cp39-abi3-win_amd64.whl", hash = "sha256:a77aa026a73a18181fd89bbbdb86191c9a82fd42aa0b549ff18d8cae56394c8b", size = 19844042, upload-time = "2025-10-05T02:49:16.925Z" }, -] - [[package]] name = "click" version = "8.1.8" @@ -840,121 +658,6 @@ toml = [ { name = "tomli", marker = "python_full_version <= '3.11'" }, ] -[[package]] -name = "crewai" -version = "1.14.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiofiles" }, - { name = "aiosqlite" }, - { name = "appdirs" }, - { name = "chromadb" }, - { name = "click" }, - { name = "crewai-cli" }, - { name = "crewai-core" }, - { name = "httpx" }, - { name = "instructor" }, - { name = "json-repair" }, - { name = "json5" }, - { name = "jsonref" }, - { name = "lancedb" }, - { name = "mcp" }, - { name = "openai" }, - { name = "openpyxl" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, - { name = "pdfplumber" }, - { name = "portalocker" }, - { name = "pydantic" }, - { name = "pydantic-settings" }, - { name = "pyjwt" }, - { name = "python-dotenv" }, - { name = "pyyaml" }, - { name = "regex" }, - { name = "tokenizers" }, - { name = "tomli" }, - { name = "tomli-w" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/12/5b/b90f9dcf649040ead9fe83f2b8c0b90c3894325079a9ea093f2f663c1b5a/crewai-1.14.6.tar.gz", hash = "sha256:e8f0cfbee70ded59ede89fce42741cb2608800132bdf9c6d8e753c90b93e246f", size = 7617349, upload-time = "2026-05-28T17:05:34.821Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/e4/5f0911d242f9a7e6863aa76d00e746ab9c574cc97292477fcd544dd5ed58/crewai-1.14.6-py3-none-any.whl", hash = "sha256:8c2fcb9b20a61d266803865510cd7f4c021a4059038cf5625a3284235b104993", size = 976412, upload-time = "2026-05-28T17:05:31.984Z" }, -] - -[package.optional-dependencies] -tools = [ - { name = "crewai-tools" }, -] - -[[package]] -name = "crewai-cli" -version = "1.14.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "appdirs" }, - { name = "certifi" }, - { name = "click" }, - { name = "crewai-core" }, - { name = "cryptography" }, - { name = "httpx" }, - { name = "packaging" }, - { name = "pydantic" }, - { name = "pydantic-settings" }, - { name = "pyjwt" }, - { name = "python-dotenv" }, - { name = "rich" }, - { name = "textual" }, - { name = "tomli" }, - { name = "tomli-w" }, - { name = "uv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/99/c3/deff669aa492d50a3b6c8a39264260c82d06c055ffe12e065b9787244d9d/crewai_cli-1.14.6.tar.gz", hash = "sha256:f9d20bdd5aa48b41ff3a830794c3e5100b1bbf9555895b019d4b02984a986b91", size = 110300, upload-time = "2026-05-28T17:05:38.091Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f3/5f/2dbb2fe392356bd9ed2ec03026a8e84a486110f2cb5fd67e46bff847cbf7/crewai_cli-1.14.6-py3-none-any.whl", hash = "sha256:4d8324c86b5f2456b517b6e00bcf7c6b57d9385d2b067346a2ff5511c6a6cfc8", size = 111466, upload-time = "2026-05-28T17:05:36.659Z" }, -] - -[[package]] -name = "crewai-core" -version = "1.14.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "appdirs" }, - { name = "cryptography" }, - { name = "httpx" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, - { name = "packaging" }, - { name = "portalocker" }, - { name = "pydantic" }, - { name = "pyjwt" }, - { name = "rich" }, - { name = "tomli" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/51/29/ebd9cdf07ec790e333a5d4b269f7f325b6cf78bd8aabec52c94cba60dfd8/crewai_core-1.14.6.tar.gz", hash = "sha256:9eee3c82d29c9e812303659200ee6af9c6f43f5ff2c3cb6d7cc3b2ba371b44e1", size = 20141, upload-time = "2026-05-28T17:05:40.421Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/db/29/fbf5bb6c916592603c179d042d5704da1cdc3445edf4fb1c0546d35ccba6/crewai_core-1.14.6-py3-none-any.whl", hash = "sha256:a02a991900e648ea49fc81cf4db2ad5d776f39dc1191457423de1f19e24eea89", size = 28305, upload-time = "2026-05-28T17:05:39.147Z" }, -] - -[[package]] -name = "crewai-tools" -version = "1.14.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "beautifulsoup4" }, - { name = "crewai" }, - { name = "pymupdf" }, - { name = "python-docx" }, - { name = "pytube" }, - { name = "requests" }, - { name = "tiktoken" }, - { name = "youtube-transcript-api" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1c/4d/d3c4d6b4ea00ff66e4bb74165cb50ce02a4ced4e4f7fb2610c6d39f441d1/crewai_tools-1.14.6.tar.gz", hash = "sha256:a8ac2afe1b648ecdd58fa68cf9c77040ae011c1a9f9a848d512ca0d6c8f97fd7", size = 895013, upload-time = "2026-05-28T17:05:46.644Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/75/da5801576a5bbb81d0b59e4acb0fe5c509dcd0535249f77fda0d4758dd54/crewai_tools-1.14.6-py3-none-any.whl", hash = "sha256:411a09eab6b8fa713a49f2820314e74f6d693a6600aef785a498e8f42ec9eb71", size = 809650, upload-time = "2026-05-28T17:05:44.721Z" }, -] - [[package]] name = "cryptography" version = "48.0.1" @@ -1094,27 +797,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/82/e5d2c1c67d19841e9edc74954c827444ae826978499bde3dfc1d007c8c11/deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00", size = 13475, upload-time = "2024-08-30T05:31:48.659Z" }, ] -[[package]] -name = "defusedxml" -version = "0.7.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, -] - -[[package]] -name = "deprecation" -version = "2.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5a/d3/8ae2869247df154b64c1884d7346d412fed0c49df84db635aab2d1c40e62/deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff", size = 173788, upload-time = "2020-04-20T14:23:38.738Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178, upload-time = "2020-04-20T14:23:36.581Z" }, -] - [[package]] name = "distro" version = "1.9.0" @@ -1139,24 +821,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/5f/ed01f9a3cdffbd5a008556fc7b2a08ddb1cc6ace7effa7340604b1d16699/docstring_parser-0.18.0-py3-none-any.whl", hash = "sha256:b3fcbed555c47d8479be0796ef7e19c2670d428d72e96da63f3a40122860374b", size = 22484, upload-time = "2026-04-14T04:09:18.638Z" }, ] -[[package]] -name = "durationpy" -version = "0.10" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/a4/e44218c2b394e31a6dd0d6b095c4e1f32d0be54c2a4b250032d717647bab/durationpy-0.10.tar.gz", hash = "sha256:1fa6893409a6e739c9c72334fc65cca1f355dbdd93405d30f726deb5bde42fba", size = 3335, upload-time = "2025-05-17T13:52:37.26Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" }, -] - -[[package]] -name = "et-xmlfile" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, -] - [[package]] name = "exceptiongroup" version = "1.3.1" @@ -1207,17 +871,14 @@ dependencies = [ [package.optional-dependencies] ai = [ - { name = "crewai", extra = ["tools"] }, { name = "langchain-core" }, { name = "langsmith" }, { name = "mcp" }, { name = "strands-agents" }, - { name = "uv" }, ] all = [ { name = "anthropic" }, { name = "boto3" }, - { name = "crewai", extra = ["tools"] }, { name = "fastapi" }, { name = "filelock" }, { name = "google-api-python-client" }, @@ -1236,7 +897,6 @@ all = [ { name = "slack-sdk" }, { name = "sqlite-vec" }, { name = "strands-agents" }, - { name = "uv" }, { name = "uvicorn" }, ] anthropic = [ @@ -1245,10 +905,6 @@ anthropic = [ aws = [ { name = "boto3" }, ] -crewai = [ - { name = "crewai", extra = ["tools"] }, - { name = "uv" }, -] dev = [ { name = "coverage", extra = ["toml"] }, { name = "hypothesis" }, @@ -1333,9 +989,6 @@ requires-dist = [ { name = "boto3", marker = "extra == 'aws'", specifier = ">=1.42.92" }, { name = "case-insensitive-dictionary", specifier = ">=0.2.1" }, { name = "coverage", extras = ["toml"], marker = "extra == 'tests'", specifier = ">=7.6.0" }, - { name = "crewai", extras = ["tools"], marker = "extra == 'ai'", specifier = ">=1.14.2rc1" }, - { name = "crewai", extras = ["tools"], marker = "extra == 'all'", specifier = ">=1.14.2rc1" }, - { name = "crewai", extras = ["tools"], marker = "extra == 'crewai'", specifier = ">=1.14.2rc1" }, { name = "deepmerge", specifier = ">=2.0" }, { name = "extended-data", extras = ["tests", "typing"], marker = "extra == 'dev'" }, { name = "fastapi", marker = "extra == 'all'", specifier = ">=0.136.0" }, @@ -1406,16 +1059,13 @@ requires-dist = [ { name = "types-pyyaml", marker = "extra == 'typing'", specifier = ">=6.0.12.20240724" }, { name = "types-requests", marker = "extra == 'typing'", specifier = ">=2.33.0.20260408" }, { name = "typing-extensions", specifier = ">=4.12.2" }, - { name = "uv", marker = "extra == 'ai'", specifier = ">=0.11.7" }, - { name = "uv", marker = "extra == 'all'", specifier = ">=0.11.7" }, - { name = "uv", marker = "extra == 'crewai'", specifier = ">=0.11.7" }, { name = "uvicorn", marker = "extra == 'all'", specifier = ">=0.45.0" }, { name = "uvicorn", marker = "extra == 'webhooks'", specifier = ">=0.45.0" }, { name = "validators", specifier = ">=0.22.0" }, { name = "validators", marker = "extra == 'meshy'", specifier = ">=0.35.0" }, { name = "wrapt", specifier = ">=1.16.0" }, ] -provides-extras = ["aws", "google", "github", "slack", "vault", "zoom", "anthropic", "cursor", "meshy", "secrets", "langchain", "crewai", "strands", "mcp", "ai", "webhooks", "vector", "tests", "typing", "dev", "all"] +provides-extras = ["aws", "google", "github", "slack", "vault", "zoom", "anthropic", "cursor", "meshy", "secrets", "langchain", "strands", "mcp", "ai", "webhooks", "vector", "tests", "typing", "dev", "all"] [[package]] name = "fastapi" @@ -1442,14 +1092,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4c/a0/614c5fe402fd88951df45f4dda2fa3b4e17a99ecd92340771929169b3b95/filelock-3.29.1-py3-none-any.whl", hash = "sha256:85199dfd706869641b72b2e8955d5416a4b2b7dc4b0e8e6d97b4cc1299a6983b", size = 40750, upload-time = "2026-06-03T15:19:02.959Z" }, ] -[[package]] -name = "flatbuffers" -version = "25.12.19" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e8/2d/d2a548598be01649e2d46231d151a6c56d10b964d94043a335ae56ea2d92/flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4", size = 26661, upload-time = "2025-12-19T23:16:13.622Z" }, -] - [[package]] name = "frozenlist" version = "1.8.0" @@ -1874,56 +1516,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2f/90/fd509079dfcab01102c0fdd87f3a9506894bc70afcf9e9785ef6b2b3aff6/httplib2-0.31.2-py3-none-any.whl", hash = "sha256:dbf0c2fa3862acf3c55c078ea9c0bc4481d7dc5117cae71be9514912cf9f8349", size = 91099, upload-time = "2026-01-23T11:04:42.78Z" }, ] -[[package]] -name = "httptools" -version = "0.8.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/e5/d471fcb0e14523fe1c3f4ba58ca52480e7bd70ad7109a3846bc75892f7fb/httptools-0.8.0.tar.gz", hash = "sha256:6b2a32f18d97e16e90827d7a819ffa8dbd8cc245fc4e1fa9d1095b54ef4bd999", size = 271342, upload-time = "2026-05-25T22:17:48.841Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/40/b9/be66eb0decd730d89b9c94f930e4b8d87787b05724bb84af98bfd825f72c/httptools-0.8.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bf3b6f807c8541503cecfbb8a8dffb385640d0d96102f3d112aa8740f9b7c826", size = 208805, upload-time = "2026-05-25T22:16:50.434Z" }, - { url = "https://files.pythonhosted.org/packages/9d/f7/b4d41eaae2869d31356bc4bbf546f44fae83ff298af0a043ca0625b06773/httptools-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:da684f2e1aa2ee9bdcb083f3f3a68c5956750b375bc5df864d3a5f0c42a40b77", size = 113527, upload-time = "2026-05-25T22:16:51.672Z" }, - { url = "https://files.pythonhosted.org/packages/e6/e4/77487e14fc7be47180fd0eb4267c7486d0cc59b74031839a3daf8650136b/httptools-0.8.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a6f21e2a3b0067bbe7f67e34cfd16276af556e5e52f4c7503be0cb5f90e905e4", size = 450035, upload-time = "2026-05-25T22:16:53.313Z" }, - { url = "https://files.pythonhosted.org/packages/da/72/5a8f787e323f56fbd86c32a4be92a86776e4cfe8b4317db999f452028362/httptools-0.8.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea897f0c729581ebf72131a438a7932d9b14efef72d75ada966700cac3caaeb", size = 451101, upload-time = "2026-05-25T22:16:54.696Z" }, - { url = "https://files.pythonhosted.org/packages/ed/41/b44a25560955197674b6744cb903664300e239235a5eaa69df0890d87054/httptools-0.8.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c0d726cc107fceb7d45f978483b4b70dd8caa836f5914d3434bb18628eb73813", size = 436140, upload-time = "2026-05-25T22:16:56.239Z" }, - { url = "https://files.pythonhosted.org/packages/74/b0/054aac84c03d7e097bf4c605fb7e74eec3d65c0276adf64ee97f3a103ff5/httptools-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9878eb2785ba5eb70631ad269b37976f73d647955e26c91d490eb8a4edfda4ba", size = 437041, upload-time = "2026-05-25T22:16:57.716Z" }, - { url = "https://files.pythonhosted.org/packages/bb/e8/86b85bbc0ac7892232f1a99ab96a9aa71936984fa06adfc0afc83ca7789e/httptools-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:b205e5f5523fa039679da0dfe5a10132b2a4abeae6a86fdd1ddc035f7f836557", size = 90454, upload-time = "2026-05-25T22:16:58.871Z" }, - { url = "https://files.pythonhosted.org/packages/f8/d2/c3eedaef57de65c3cc5f8dc244cf12d09c84ad258a479055aad6db23206c/httptools-0.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed377e64805bdba4943c82717333f8f8603a13b09aff9cead2717c6c817fb168", size = 208428, upload-time = "2026-05-25T22:16:59.717Z" }, - { url = "https://files.pythonhosted.org/packages/f1/94/dfe435d90d0ef61ec0f2cc3d480eef78c59727c6c2ce039f433882f6131a/httptools-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9518c406d7b310f05adb1a37f80acabac40504a575d7c0da6d3e365c695ac20d", size = 113366, upload-time = "2026-05-25T22:17:00.795Z" }, - { url = "https://files.pythonhosted.org/packages/cc/d4/13025f1a56e615dcb331e0bbe2d9a1143212b58c263385fc5d2e558f5bac/httptools-0.8.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:57278e6fa0424c42a8a3e454828ab4f0aff27b40cddf9679579b98c6dce6a376", size = 464676, upload-time = "2026-05-25T22:17:02.014Z" }, - { url = "https://files.pythonhosted.org/packages/bf/95/4c1c26c0b985f8a3331682d802598f14e32dc41bf7509266eb2c04ad4801/httptools-0.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bbb8caadb2b742d293169d2b458b5c001ef70e3158704aa3d3ef9597624c5d1d", size = 464235, upload-time = "2026-05-25T22:17:03.109Z" }, - { url = "https://files.pythonhosted.org/packages/a2/82/6735be2b0ca527718c431cdb8e5f70c3862c0844a687df0f572c51e11497/httptools-0.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:52dd695b865fe96d9d2b16b64a895f3f57bf3cb064e8383cd3b5713a069e8085", size = 449809, upload-time = "2026-05-25T22:17:04.443Z" }, - { url = "https://files.pythonhosted.org/packages/b5/f9/5811c74f37a758c8a4aa3dc430375119d335947e883efc4664d8f3559a41/httptools-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:20b4aac66ff65f7db06a375808b78f42a94970aa22e826b3cb2b43eb09174124", size = 452174, upload-time = "2026-05-25T22:17:05.476Z" }, - { url = "https://files.pythonhosted.org/packages/cc/94/97b75870dea07b71e3ec535cebe525b08d723152e4c7d13fa887e51f4de2/httptools-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:a1b4c8e7a489a0d750d91894e9a8cdc295838f1924c0ca903ae993456fddec07", size = 90991, upload-time = "2026-05-25T22:17:06.75Z" }, - { url = "https://files.pythonhosted.org/packages/14/88/1d21a36da8f5cb0fa49eafd4b169eba5608d57e75bbcf61845cbc6243216/httptools-0.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:880490234c10f70a9830743097e8958d6e4b9f5a0ffc24515023afeef984054d", size = 208247, upload-time = "2026-05-25T22:17:07.843Z" }, - { url = "https://files.pythonhosted.org/packages/a5/42/cc4feea2945cb3051038f090c9b36bd5b8a9d7f5a894a506a8983e33fd1c/httptools-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5931891fb7b441b8a3853cf1b85c82c903defce084dd5f6771ca46e31bf862c5", size = 113064, upload-time = "2026-05-25T22:17:09.136Z" }, - { url = "https://files.pythonhosted.org/packages/e3/a6/febbb8b8db0f58b38e44ad6cb946e6a255ae49b55f2e8543408fb7501ccd/httptools-0.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b15fc622b0f869d19207c4089a501d9bcc63ca5e071ffdd2f03f922df882dcb2", size = 523851, upload-time = "2026-05-25T22:17:10.106Z" }, - { url = "https://files.pythonhosted.org/packages/b7/e4/f90a0df0b83beff265b7e3b65f2a4cefd95792d4be0ac3e16049f2acd3c2/httptools-0.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:425f83884fd6343828d8c565f046cb72b6d19063f6924093e11bcd8e1548cd09", size = 518842, upload-time = "2026-05-25T22:17:11.218Z" }, - { url = "https://files.pythonhosted.org/packages/9e/2d/0c9ac76dd2c893841fbf6498d6acec4f2442e1b7067f6e3e316a80e494e8/httptools-0.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7c3c97f4311c7be57e2986629df89d49cb434dbff78eafcd48c2bff986b15a", size = 501238, upload-time = "2026-05-25T22:17:12.728Z" }, - { url = "https://files.pythonhosted.org/packages/ca/42/906adc91ae3a5fa9c59c0a2f21c139725bd7e5b41ae6acd485cd14123ebf/httptools-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a1afd7c9fbff0d9f5d489c4ce2768bd09c84a46ddefc7161e6aa82ae35c85745", size = 509567, upload-time = "2026-05-25T22:17:13.842Z" }, - { url = "https://files.pythonhosted.org/packages/05/0b/4240efeb672751ee5b9b380cb0e3fdc050bc05f68adc7a8aefc4fcd9a69a/httptools-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd96f29b4bab1d42fa6e3d008711c75e0f79e94e06827330160e3a304227f150", size = 90918, upload-time = "2026-05-25T22:17:15.155Z" }, - { url = "https://files.pythonhosted.org/packages/5e/e5/8cfcabc5546e8022f168be28bcdaa128a240a0befdd03b59d558b4f18bd6/httptools-0.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:614ceea8ea606848bece2338ac03b3ce5324bcb4be8dc7d377ed708012fa4db8", size = 205148, upload-time = "2026-05-25T22:17:16.333Z" }, - { url = "https://files.pythonhosted.org/packages/2a/0e/0fb14848c19a686c8062ff9067c1a48793e3224b47bc5b201535b6036fce/httptools-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2d689918c15a013c65ef52d9fd495d766893ab831a2c8d89f2ac5940a5df847c", size = 111368, upload-time = "2026-05-25T22:17:17.586Z" }, - { url = "https://files.pythonhosted.org/packages/2e/1b/46f1cecf06b9bbde8e4b8c88034ac7908989e5ff7a3a388ef38392949c1f/httptools-0.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:eb3028cca2fc0a6d720e52ef61d8ebb62fcbfeb1de56874546d858d3f25a26b7", size = 486447, upload-time = "2026-05-25T22:17:18.564Z" }, - { url = "https://files.pythonhosted.org/packages/77/00/258bfc0837221f81d9725c45f9b948a6a6b2994a147a4fb66e85100c668f/httptools-0.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:88bdd940f2b5d487b4d032c6afa5489a7dc4694410d43de3c38c4fb3af0dc45d", size = 482448, upload-time = "2026-05-25T22:17:19.912Z" }, - { url = "https://files.pythonhosted.org/packages/04/ab/d1cef3b5523f4d272a70f42a776c3169a2dddfe3a54de4b2ce4a36341528/httptools-0.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6a43c9dd399758ccc0531acb0a3c4a6c299ee893ee9400e9c893b7bdcfae0681", size = 464460, upload-time = "2026-05-25T22:17:20.882Z" }, - { url = "https://files.pythonhosted.org/packages/ce/48/5d1d072442277bb2b3434e0e60690b8e8c23840ef7de8b6ea54040a536d3/httptools-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0770728beb05094c809b98e814edff5fef69d26ad7d21185f2f6d5884a0ba683", size = 471312, upload-time = "2026-05-25T22:17:22.085Z" }, - { url = "https://files.pythonhosted.org/packages/0d/66/b96623b27e51a68199ef4efdda0613cced9233fe3062ac74e50749c5ad37/httptools-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:7685df791fad561384bfb139e77fde27a1ffd93134e016f95a0db424ffbf77b1", size = 90117, upload-time = "2026-05-25T22:17:23.074Z" }, - { url = "https://files.pythonhosted.org/packages/1a/12/fa3fbf5f9517b273edea2dc982aa82a8c634091e67c590792b729017bc6f/httptools-0.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:de242a49b5d18e0a8776e654e9f6bf6d89f3875a5c35b425a0e7ce940feb3fd6", size = 206183, upload-time = "2026-05-25T22:17:24.004Z" }, - { url = "https://files.pythonhosted.org/packages/30/fc/5e7c4cb443370f2090a3aba0453a07384d29ff66b7435bb90e77e1037599/httptools-0.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:159e9ab5f701ccd42e555a12f1ad8ff69702910fc1c996cf2bb66e5fcb7a231b", size = 112079, upload-time = "2026-05-25T22:17:25.216Z" }, - { url = "https://files.pythonhosted.org/packages/ba/53/771bd891eb0f236f32145d6a1775777ec85745f3cc983a1f23d1a3b8ddfe/httptools-0.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c4a9f1707e4823d54dfec6c33fa3697d302aed536ed352a7ebb5a061ddb869d0", size = 481596, upload-time = "2026-05-25T22:17:26.186Z" }, - { url = "https://files.pythonhosted.org/packages/62/42/94e15bc68ce3d423243c45d7f1b0c7561f13844f97dc52ae23182fb65628/httptools-0.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d76ad7b951387e3632c8716a9bb03ac5b45c5f16119aa409db0459520887944e", size = 480865, upload-time = "2026-05-25T22:17:27.542Z" }, - { url = "https://files.pythonhosted.org/packages/1c/7c/fe2980fc03723272e30f135b62360b075f513dfe7cc73aef36c7f04012bd/httptools-0.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a3b7387147361c3fd47a0bde763c5c91b5b4cd4dc9989b8ece84ff436c99843b", size = 463189, upload-time = "2026-05-25T22:17:28.546Z" }, - { url = "https://files.pythonhosted.org/packages/15/1b/47fc5fff68acd1bfa20b4734059c9a06cadb88119dcd5258b5b0d21d91c8/httptools-0.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f256d6ce930c52ca1cb2a960b7da03548c454e7d28b06059ad41bfe789036ce0", size = 466610, upload-time = "2026-05-25T22:17:29.816Z" }, - { url = "https://files.pythonhosted.org/packages/60/bd/07b13c93ffd9bec9546e0d43f8e19378dd696dbd278511406bc07371ef1f/httptools-0.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:19d1ee275bb59ba2643ba9a3a1e51cc0c788caf2b8df506368e03f56fdd08527", size = 92705, upload-time = "2026-05-25T22:17:31.133Z" }, - { url = "https://files.pythonhosted.org/packages/fd/c4/121648f68ce066d7bd762d6b6d97e620847642d38d54f3d90ff11d947629/httptools-0.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:de1ed58a974e75d56560acc7e7fed01a454994429456f65209789992e41f2568", size = 215023, upload-time = "2026-05-25T22:17:32.401Z" }, - { url = "https://files.pythonhosted.org/packages/b9/b0/312a062ae741ae3e8baa8c8bf20be81b2e67337b259ab4349bebc7b6142e/httptools-0.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e93c227b595c6926c1acee96891dd9da4be338cfbe82e5cd3bb9d8dd7dc4ac0b", size = 117405, upload-time = "2026-05-25T22:17:33.742Z" }, - { url = "https://files.pythonhosted.org/packages/fc/37/fccd705f795386bb05bf413012fecff2a33e5aa8c2f069096de3e9fd8702/httptools-0.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2a021c3a8e65cc125390d72f59b968afca3bdcaff25bd67965e0a055a14946ca", size = 558497, upload-time = "2026-05-25T22:17:34.732Z" }, - { url = "https://files.pythonhosted.org/packages/bd/39/f172e8003576de35f5ba77ff417cf0e34429d35dc014deef15afa337a72c/httptools-0.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48774d39cbb70e2b1f71f88852a3087ae1d3a1eb80482bb48c13067ab080c14f", size = 571585, upload-time = "2026-05-25T22:17:35.813Z" }, - { url = "https://files.pythonhosted.org/packages/3e/b9/f5564760af99f3dbbf3f9104dc00e5da27e96cf433c6bdcf77617f70bf3f/httptools-0.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:88eead8ec8680a9f146c655bc88445a325bd7921cfd8194c7337e9467282427d", size = 543297, upload-time = "2026-05-25T22:17:37.08Z" }, - { url = "https://files.pythonhosted.org/packages/99/67/8d9f2c313618e161b82f3873188e7196126da1d6e29688df40eb3997c77a/httptools-0.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2c032fa028f46871ec7e1fc59fc15e8023eab3e6bbe6ece786a1611719a5d081", size = 539535, upload-time = "2026-05-25T22:17:38.032Z" }, - { url = "https://files.pythonhosted.org/packages/48/63/b906c01e53f50d432c0defe43ce52764a111dc1bdd028bafbeb54dcfd008/httptools-0.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:384c17174464c8e873398b7af24f0b1f44d992c820328413951a625323155d77", size = 108209, upload-time = "2026-05-25T22:17:39.473Z" }, -] - [[package]] name = "httpx" version = "0.28.1" @@ -2014,15 +1606,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, ] -[[package]] -name = "importlib-resources" -version = "7.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e4/06/b56dfa750b44e86157093bc8fca0ab81dccbf5260510de4eaf1cb69b5b99/importlib_resources-7.1.0.tar.gz", hash = "sha256:0722d4c6212489c530f2a145a34c0a7a3b4721bc96a15fada5930e2a0b760708", size = 44985, upload-time = "2026-04-12T16:36:09.232Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/db/55a262f3606bebcae07cc14095338471ad7c0bbcaa37707e6f0ee49725b7/importlib_resources-7.1.0-py3-none-any.whl", hash = "sha256:1bd7b48b4088eddb2cd16382150bb515af0bd2c70128194392725f82ad2c96a1", size = 37232, upload-time = "2026-04-12T16:36:08.219Z" }, -] - [[package]] name = "inflection" version = "0.5.1" @@ -2041,28 +1624,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] -[[package]] -name = "instructor" -version = "1.15.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "docstring-parser" }, - { name = "jinja2" }, - { name = "jiter" }, - { name = "openai" }, - { name = "pydantic" }, - { name = "pydantic-core" }, - { name = "requests" }, - { name = "rich" }, - { name = "tenacity" }, - { name = "typer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/dc/a4/832cfb15420360e26d2d85bd9d5fe1e4b839d52587574d389bc31284bf6f/instructor-1.15.1.tar.gz", hash = "sha256:c72406469d9025b742e83cf0c13e914b317db2089d08d889944e74fcd659ef94", size = 69948370, upload-time = "2026-04-03T01:51:30.107Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d8/c8/36c5d9b80aaf40ba9a7084a8fc18c967db6bf248a4cc8d0f0816b14284be/instructor-1.15.1-py3-none-any.whl", hash = "sha256:be81d17ba2b154a04ab4720808f24f9d6b598f80992f82eaf9cc79006099cf6c", size = 178156, upload-time = "2026-04-03T01:51:23.098Z" }, -] - [[package]] name = "jinja2" version = "3.1.6" @@ -2190,24 +1751,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, ] -[[package]] -name = "json-repair" -version = "0.25.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7c/60/484ee009c1867ddc5ffe0ff2131b82e80bbf13fdb59f3d93834f98e56a9f/json_repair-0.25.3.tar.gz", hash = "sha256:4ee970581a05b0b258b749eb8bcac21de380edda97c3717a4edfafc519ec21a4", size = 20619, upload-time = "2024-07-10T13:42:18.977Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/9e/2ab68cc0ff030e1ef78329d7b933473d3ad2c7d0e66aede6a7c87f74753c/json_repair-0.25.3-py3-none-any.whl", hash = "sha256:f00b510dd21b31ebe72581bdb07e66381df2883d6f640c89605e482882c12b17", size = 12812, upload-time = "2024-07-10T13:42:16.918Z" }, -] - -[[package]] -name = "json5" -version = "0.10.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/85/3d/bbe62f3d0c05a689c711cff57b2e3ac3d3e526380adb7c781989f075115c/json5-0.10.0.tar.gz", hash = "sha256:e66941c8f0a02026943c52c2eb34ebeb2a6f819a0be05920a6f5243cd30fd559", size = 48202, upload-time = "2024-11-26T19:56:37.823Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/42/797895b952b682c3dafe23b1834507ee7f02f4d6299b65aaa61425763278/json5-0.10.0-py3-none-any.whl", hash = "sha256:19b23410220a7271e8377f81ba8aacba2fdd56947fbb137ee5977cbe1f5e8dfa", size = 34049, upload-time = "2024-11-26T19:56:36.649Z" }, -] - [[package]] name = "jsonpatch" version = "1.33" @@ -2229,15 +1772,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/6a/a83720e953b1682d2d109d3c2dbb0bc9bf28cc1cbc205be4ef4be5da709d/jsonpointer-3.1.1-py3-none-any.whl", hash = "sha256:8ff8b95779d071ba472cf5bc913028df06031797532f08a7d5b602d8b2a488ca", size = 7659, upload-time = "2026-03-23T22:32:31.568Z" }, ] -[[package]] -name = "jsonref" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload-time = "2023-01-16T16:10:04.455Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload-time = "2023-01-16T16:10:02.255Z" }, -] - [[package]] name = "jsonschema" version = "4.26.0" @@ -2266,78 +1800,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] -[[package]] -name = "kubernetes" -version = "36.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "certifi" }, - { name = "durationpy" }, - { name = "python-dateutil" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "requests-oauthlib" }, - { name = "six" }, - { name = "urllib3" }, - { name = "websocket-client" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2f/57/8b538af5076bc3372949d76f70ba3449bdfe52f9e6488170fa5d4f7cbe70/kubernetes-36.0.2.tar.gz", hash = "sha256:03551fcb49cae1f708f63624041e37403545b7aaed10cbf54e2b01a37a5438e3", size = 2336738, upload-time = "2026-06-01T18:20:30.785Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/46/2c/5c160dbdef7123f8cc97fd8ece7e0198627a426a2a49614845e9086feb8d/kubernetes-36.0.2-py2.py3-none-any.whl", hash = "sha256:faf9b5241b58de0c4a5069f2a0ffc8ac06fece7215156cd3d3ba081a78a858b6", size = 4617568, upload-time = "2026-06-01T18:20:28.737Z" }, -] - -[[package]] -name = "lance-namespace" -version = "0.8.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "lance-namespace-urllib3-client" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/33/fd/3a8731b2ed83ba198b15b5963c6df4836736057f23206107b0ab4a5f57fd/lance_namespace-0.8.2.tar.gz", hash = "sha256:78cd6ad2f2764bccded1d8b64474419cc5571956b68a23ad2770977ddaeb03a1", size = 11281, upload-time = "2026-06-05T04:46:23.696Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/cb/7f3cc83b8b35a27a27539c3086562d11010f10ca113808ce1078308ca5c0/lance_namespace-0.8.2-py3-none-any.whl", hash = "sha256:6531a4d8b95f201835b954a949f890d03cbc3124aca5f1dd21d999157a08935f", size = 13113, upload-time = "2026-06-05T04:46:27.781Z" }, -] - -[[package]] -name = "lance-namespace-urllib3-client" -version = "0.8.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5d/98/a0bb656a4f2d5989e1267a62acbb5a9ed8eb15ac45fbfe380b5a59dba642/lance_namespace_urllib3_client-0.8.2.tar.gz", hash = "sha256:82f0a5c9b6b7fde67326d6038b89ed807e8d14692e461246f1a7df5c36b804d6", size = 222291, upload-time = "2026-06-05T04:46:24.958Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ff/58/6a993bf50375170547d0e0bfe9189cc9b378b89482dc2c7bb75ef170a49a/lance_namespace_urllib3_client-0.8.2-py3-none-any.whl", hash = "sha256:cb8dc098fcd42f848eb5206fb49ebc3b5f162ee32b5c4155a5048ffd30a7cd37", size = 364909, upload-time = "2026-06-05T04:46:26.504Z" }, -] - -[[package]] -name = "lancedb" -version = "0.30.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "deprecation" }, - { name = "lance-namespace" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "overrides", marker = "python_full_version < '3.12'" }, - { name = "packaging" }, - { name = "pyarrow" }, - { name = "pydantic" }, - { name = "tqdm" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/2f/1577778ad57dba0c55dc13d87230583e14541c82562483ecf8bb2f8e8a00/lancedb-0.30.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:be2a9a43a65c330ccfd08115afb26106cd8d16788522fe7693d3a1f4e01ad321", size = 41959907, upload-time = "2026-03-16T23:03:04.551Z" }, - { url = "https://files.pythonhosted.org/packages/f1/ca/8c2a04ce499a2a97d1a0de2b7e84fa8166f988a9a495e1ada860110489c2/lancedb-0.30.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be6a4ba2a1799a426cbf2ba5ea2559a7389a569e9a31f2409d531ceb59d42f35", size = 43873070, upload-time = "2026-03-16T23:11:01.352Z" }, - { url = "https://files.pythonhosted.org/packages/16/68/e01bf7837454a5ce9e2f6773905e07b09a949bc88136c0773c8166ed7729/lancedb-0.30.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a967ec05f9930770aeb077bc5579769b1bedf559fcd03a592d9644084625918", size = 46891197, upload-time = "2026-03-16T23:14:39.18Z" }, - { url = "https://files.pythonhosted.org/packages/43/d1/9085ad17abd98f3a180d7860df3190b2d76f99f533c76d7c7494cec4139d/lancedb-0.30.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:05c66f40f7d4f6f24208e786c40f84b87b1b8e55505305849dd3fed3b78431a3", size = 43877660, upload-time = "2026-03-16T23:11:00.837Z" }, - { url = "https://files.pythonhosted.org/packages/ea/69/504ee25c57c3f23c80276b5b7b5e4c0f98a5197a7e9e51d3c50500d2b53a/lancedb-0.30.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:bdcd27d98554ed11b6f345b14d1307b0e2332d5654767e9ee2e23d9b2d6513d1", size = 46932144, upload-time = "2026-03-16T23:15:00.474Z" }, - { url = "https://files.pythonhosted.org/packages/2c/85/d5550f22023e672af1945394f7a06a578fcab2980ecc6666acef3428a771/lancedb-0.30.0-cp39-abi3-win_amd64.whl", hash = "sha256:4751ff0446b90be4d4dccfe05f6c105f403a05f3b8531ab99eedc1c656aca950", size = 51121310, upload-time = "2026-03-16T23:43:23.89Z" }, -] - [[package]] name = "langchain-core" version = "1.4.3" @@ -2485,136 +1947,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/62/b40b382fa0c66fee1478073eb8db352a4a6beda4a1adccf1df911d8c289c/librt-0.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dee008f20b542e3cd162ba338a7f9ec0f6d23d395f66fe8aeeec3c9d067ea253", size = 102572, upload-time = "2026-05-10T18:17:06.809Z" }, ] -[[package]] -name = "linkify-it-py" -version = "2.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "uc-micro-py" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2e/c9/06ea13676ef354f0af6169587ae292d3e2406e212876a413bf9eece4eb23/linkify_it_py-2.1.0.tar.gz", hash = "sha256:43360231720999c10e9328dc3691160e27a718e280673d444c38d7d3aaa3b98b", size = 29158, upload-time = "2026-03-01T07:48:47.683Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/de/88b3be5c31b22333b3ca2f6ff1de4e863d8fe45aaea7485f591970ec1d3e/linkify_it_py-2.1.0-py3-none-any.whl", hash = "sha256:0d252c1594ecba2ecedc444053db5d3a9b7ec1b0dd929c8f1d74dce89f86c05e", size = 19878, upload-time = "2026-03-01T07:48:46.098Z" }, -] - -[[package]] -name = "lxml" -version = "6.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/05/3b/aab6728cae887456f409b4d75e8a01856e4f04bd510de38052a47768b680/lxml-6.1.1.tar.gz", hash = "sha256:ba96ae44888e0185281e937633a743ea90d5a196c6000f82565ebb0580012d40", size = 4197430, upload-time = "2026-05-18T19:19:06.424Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/12/da/dbe4dfc01ac226fb0504fad035f4d69f3202f3502e20e68537631daddd96/lxml-6.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:09dd5b7075dc2f7709654a46543ba1ea3c2e217b2ed8fbd413a8a945a0f40f60", size = 8541124, upload-time = "2026-05-18T19:17:11.589Z" }, - { url = "https://files.pythonhosted.org/packages/78/20/f7095ed9fc2c025f9cfe71cc6ec9f1feb05624edc1812423b5f1aecf3d4b/lxml-6.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f6ac4ef4d82dff54670227a69c67782ae0b811b5cf6b17954f1e8f7502fc0d1d", size = 4602783, upload-time = "2026-05-18T19:17:20.888Z" }, - { url = "https://files.pythonhosted.org/packages/4a/a4/65c63ca98bd129f6cff7b8c2fa48953ab058cc6005b541354e7dd54d8000/lxml-6.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:556e94a63c9b04716f8e4de2abb65775061f846e89331b6c5be79183a24f98ea", size = 5002687, upload-time = "2026-05-18T19:17:01.738Z" }, - { url = "https://files.pythonhosted.org/packages/96/1d/ab7a5c4b5a394d98a94e2d0fc67bab8297597426770dd4978370fbdaf531/lxml-6.1.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6bf403fbb3b3e348a561a5f4f0b9961835657981c802a1df03653eef8a9074", size = 5155099, upload-time = "2026-05-18T19:17:05.159Z" }, - { url = "https://files.pythonhosted.org/packages/d0/b1/07603bfeeb891a2596d5c2a68f7d2f70f7d11c841ebe391412c69c2857b0/lxml-6.1.1-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1dde6131244bba38a17c745836ba190bc753fd73c9291666287fd0a3fa3dcf30", size = 5057225, upload-time = "2026-05-18T19:17:08.117Z" }, - { url = "https://files.pythonhosted.org/packages/7a/16/cb391ee4b90186fa16d9ebcbe3ea96c71b8da3b0686386c8dcbcc3c67d44/lxml-6.1.1-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98fc784c2c1440667aeedf8465bdfe10208acf0ead656a2c68627299f546b315", size = 5287643, upload-time = "2026-05-18T19:17:11.507Z" }, - { url = "https://files.pythonhosted.org/packages/eb/d6/b619717f918fd76747448fdbaee0e769edbc70e659b5b5d0112b7020b7a3/lxml-6.1.1-cp310-cp310-manylinux_2_28_i686.whl", hash = "sha256:add8cf6ddf9a65116119a28ece0f7886e30af27ba724a7594305f1d1b58a92a1", size = 5412445, upload-time = "2026-05-18T19:17:22.182Z" }, - { url = "https://files.pythonhosted.org/packages/c6/80/12bc5390ac0a3edeb579d9535e5049a5dda663438728e179d52fb319c33a/lxml-6.1.1-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:cf9d57306d848218f3601fee7601fab1a327c942d56e2e97610583cb4dd74206", size = 4770864, upload-time = "2026-05-18T19:17:26.851Z" }, - { url = "https://files.pythonhosted.org/packages/0b/59/6500c09da3137f54f020e908d81cfc5ee3e8888e908fd380207afad7c2e6/lxml-6.1.1-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:88136950da4d13c318bde414ce10219931937851327f44328f2df4d2c4614067", size = 5359594, upload-time = "2026-05-18T19:17:32.527Z" }, - { url = "https://files.pythonhosted.org/packages/f2/9b/f64b4cc6b7ebcf75d95af3cde934d254b5f2f10d4163928d838d86b6eb48/lxml-6.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cecdd5dfdc87b1fd87dbf81d4b037a544f47f4c744200a67013771682d67686a", size = 5107713, upload-time = "2026-05-18T19:17:04.402Z" }, - { url = "https://files.pythonhosted.org/packages/16/19/c7388ad5d3a72315d2832dc1458cbf4f2af7f2b990b606ff4876efd04511/lxml-6.1.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:cd312b9692e831d2ffcad61eab31d91d4b4655a962e61de8fb410472cbcd37aa", size = 4803973, upload-time = "2026-05-18T19:17:06.545Z" }, - { url = "https://files.pythonhosted.org/packages/3f/22/76197f0bbf165f0b9e75be59be4997e5259cde973f12f098c1b54c7f5d60/lxml-6.1.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:5b7328b46d49fc9477d91ae8f6d55340347d827b7734ba3ea33faae0efef1383", size = 5349925, upload-time = "2026-05-18T19:17:09.743Z" }, - { url = "https://files.pythonhosted.org/packages/24/52/d2a0cfeccb9bcdc47c7ee05cdae5d69b48c9acf20997790a6338bb0d0b3b/lxml-6.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37a58976370f36d9329d118ad0b953c5aeb9119ac9c6a4e258942a225d0573a1", size = 5309825, upload-time = "2026-05-18T19:17:13.831Z" }, - { url = "https://files.pythonhosted.org/packages/19/4a/b30944266776c2f49749ef2445aa7e78898194134b80ad776386f61b56ae/lxml-6.1.1-cp310-cp310-win32.whl", hash = "sha256:cea3f4c1af79af13cdb2da0c028111d8f8522d4f22a000c82385535f24e5cf3a", size = 3598402, upload-time = "2026-05-18T19:17:08.21Z" }, - { url = "https://files.pythonhosted.org/packages/9e/97/33691c66a4d7ec1a5a98e7c909a5b83ee45c7f7ba4cf92b1c4cf26e98079/lxml-6.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:3abf332af33a74288675d936fe861fd4344da0dd6622193fbc4f2bfbb35536b5", size = 4021295, upload-time = "2026-05-18T19:17:28.638Z" }, - { url = "https://files.pythonhosted.org/packages/d0/5f/26a4dd0e12b9456ff7b12a21af5b491eb6629680d1edd73f4140fd386bcf/lxml-6.1.1-cp310-cp310-win_arm64.whl", hash = "sha256:8dadbe5b217ff35b6a8d16610dd710219b59b76d13f0e3f0d9f36786206e4485", size = 3667717, upload-time = "2026-05-19T19:22:44.474Z" }, - { url = "https://files.pythonhosted.org/packages/62/b0/83f481780d1548750b8ce2ec824073deef2f452d9cd1a6faff8507e3d16d/lxml-6.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:53b7d2b7a10b1c35c0a5e21e9224accf60c1bbfba523990732e521b2b73adef2", size = 8526461, upload-time = "2026-05-18T19:17:25.862Z" }, - { url = "https://files.pythonhosted.org/packages/b9/d5/30fa0f808002c7329397bfbb24e306789c0b29f04aa5842c07b174b4216f/lxml-6.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ff3f333630ab480244a1bff72043e511a91eb22e7595dead8653ee5612dd8f3d", size = 4595375, upload-time = "2026-05-18T19:17:34.555Z" }, - { url = "https://files.pythonhosted.org/packages/4f/d2/edb71cf0e561581a7c5eb2626244320eb04e9f8ce6d563184fd668b45073/lxml-6.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a4bbea04c97f6d78a48e3fbc1cb9116d2780b1b39e03a23f6eb9b603fd61f510", size = 4923654, upload-time = "2026-05-18T19:17:42.917Z" }, - { url = "https://files.pythonhosted.org/packages/4c/77/1bc7eeb0de4577d783fb625aa092cc9357883bba35845a3666bf1259f3dc/lxml-6.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db1d75f6617a49c1c01bc7023713e0ff59ab32c9579ae62a7674c0e34f3b0b0a", size = 5067921, upload-time = "2026-05-18T19:17:49.175Z" }, - { url = "https://files.pythonhosted.org/packages/1b/3c/c0690d74bd2bc17bc03b5b0d093569ead597dd0bfa088bf99eef8c24e19c/lxml-6.1.1-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a12689be69a28ddaa0ab99a5a1137da2afd5f8f16df7b5680b66f616d3eda1d", size = 5002456, upload-time = "2026-05-18T19:17:59.715Z" }, - { url = "https://files.pythonhosted.org/packages/66/8d/d1b3271af0c0f1e27e8472a849e4d2c65bc7766884b9ad2da9e76e145c88/lxml-6.1.1-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b73c339ae29b90fd2d06e58ebd555a751bde9cd6bbd36cc0281b9a2c94e9d8", size = 5202776, upload-time = "2026-05-18T19:18:08.924Z" }, - { url = "https://files.pythonhosted.org/packages/7a/45/689824ffb237fd10125ad273f32b28ff04dc6203c2822c85ff65a93df65e/lxml-6.1.1-cp311-cp311-manylinux_2_28_i686.whl", hash = "sha256:752d3bbfe874715ccd0aec7f88d7fc623c0f1fd7aa7b3238a084e017bad2a009", size = 5329945, upload-time = "2026-05-18T19:18:13.673Z" }, - { url = "https://files.pythonhosted.org/packages/5d/c0/ef73af53767e958fd87d437c170f272e2f6e6c0f854939f133a895f1e711/lxml-6.1.1-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:6b1761fbf9ec984e2e9d9c589ef5f5fd684b7c19f92aadd567a26c5224958db6", size = 4659237, upload-time = "2026-05-18T19:18:18.657Z" }, - { url = "https://files.pythonhosted.org/packages/a0/5e/e1158e40397585e91cb0472374a1f63d0926a1ddeaa92f13d1a1ffe306d5/lxml-6.1.1-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d680fbcb768404c601ecb43519ecd8461f6954cb11c06a78962f666832ccfca8", size = 5265904, upload-time = "2026-05-18T19:18:24.883Z" }, - { url = "https://files.pythonhosted.org/packages/a0/16/8687e5d1400ed1c0bc41dace232ebb7553952b618ea1f2e5fb6e2cfbbe23/lxml-6.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:162af1091cd785f2f27e62d3547ae9bc58ec5c86dd314d67021fd02463708d83", size = 5045225, upload-time = "2026-05-18T19:17:20.073Z" }, - { url = "https://files.pythonhosted.org/packages/ca/18/d877bd1ae2e5ffdfd4836565aba350db31feb2f2656d6ce70316ed66a05e/lxml-6.1.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e9308ff8241c532df3f3e570f9a5aeed6c853f888512ba4b75638d7c11c95ef6", size = 4712721, upload-time = "2026-05-18T19:17:40.512Z" }, - { url = "https://files.pythonhosted.org/packages/44/4d/1f44fd1d770b10dacbf6b5c6e520f4d6e0708744930f719dc04e67cab981/lxml-6.1.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5f6994074ebae6ffb04447268e37dc16edc304f9859cf91acb86e0af6c1b395c", size = 5252549, upload-time = "2026-05-18T19:17:51.236Z" }, - { url = "https://files.pythonhosted.org/packages/64/5d/1d66b84f850089254c230ef6ea6b267a5a54e2e179a5d960036a05d501d7/lxml-6.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80c2dfadb855da477cf73373ad29a333535dedb9b12bad02c9814c8e2b43bf08", size = 5226877, upload-time = "2026-05-18T19:18:00.875Z" }, - { url = "https://files.pythonhosted.org/packages/ad/00/84c4b5302d42a2d0184f38d538c8a197f33b52a50bd4f7bcfe990bce3036/lxml-6.1.1-cp311-cp311-win32.whl", hash = "sha256:30a89d3ac8faec007453fb541f3f46807eeec88edd5826f6e3fe001752a2c621", size = 3594072, upload-time = "2026-05-18T19:17:12.714Z" }, - { url = "https://files.pythonhosted.org/packages/61/9d/2e2f7d876349f45e0f3e29f72da311668853d59b58d473a2dea4f0160135/lxml-6.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:abbefa31eee84842140f67acef1c828e28bba8bbf0c3bc6e5492a9af88152c28", size = 4025469, upload-time = "2026-05-18T19:17:50.566Z" }, - { url = "https://files.pythonhosted.org/packages/b0/d5/570e6390e4110331e6208b2ba83d1482cc9146808ee118b22824a34c1070/lxml-6.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:dcb292aa7fe485ceff7af4f92e46c5af397daec5dff64871a528f0fc47a3cc5b", size = 3667640, upload-time = "2026-05-19T19:22:48.293Z" }, - { url = "https://files.pythonhosted.org/packages/6a/6e/c4add832b6fc1e887125b96f880d7b9b70aae5248718e046b1704bcac4b9/lxml-6.1.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:104c09bda8d2a562824c0e319d0768ce26a779b7601e0931d33b09b53c392ef7", size = 8570821, upload-time = "2026-05-18T19:17:42.068Z" }, - { url = "https://files.pythonhosted.org/packages/22/00/ff3009c88e65de8011630acf8ab5a09cb2becd2aaf47fba2f3449f6224e9/lxml-6.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:25c6997a9a534e016695a0ba06b2f07945de682731ff01065b6d5a4474179da1", size = 4624252, upload-time = "2026-05-18T19:17:47.897Z" }, - { url = "https://files.pythonhosted.org/packages/42/95/bb63f0fd62e554fe078e1fb3c8fe9083c14ddc7ad7fa178d10e57e071ac7/lxml-6.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c921ba5c51e4e9f63b8b00267d06566e1f63407408a0496da2d1d0bfc819c7fc", size = 4930746, upload-time = "2026-05-18T19:18:29.637Z" }, - { url = "https://files.pythonhosted.org/packages/eb/99/0013e8d9b5960f4f041cf0b73e2f80c23eb5205b1f7bfb20203243651359/lxml-6.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:54a7f95e4de5fb94e2f9f4b9055c6ba33bf3d628fd77a1d647c5923caa2cdcdc", size = 5093723, upload-time = "2026-05-18T19:18:34.168Z" }, - { url = "https://files.pythonhosted.org/packages/29/91/317b332636bfc7bddcff828d41b3307f50043f4b237e40849c333d80fa1a/lxml-6.1.1-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f2ec43df44b1f76249ee0a615334f9b5b060e1c8bd90e706dad2d14d02f383", size = 5005557, upload-time = "2026-05-18T19:18:39.798Z" }, - { url = "https://files.pythonhosted.org/packages/42/2f/cc9bf06afe70f9c9093ae60855d9759da9db601ec4080f7473319666ffd7/lxml-6.1.1-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:70ef8a7e102a1508f8121aae5b0867abd663f72c14f0a9c937e6554cb4587b7b", size = 5631036, upload-time = "2026-05-18T19:18:44.858Z" }, - { url = "https://files.pythonhosted.org/packages/08/f6/af32e23e563971ffb0fb86be52bc5be5c2c118858ffc119bf6a9039b173d/lxml-6.1.1-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ebe6af670449830d6d9b752c256a983291c766a1365ba5d5460048f9e33a7818", size = 5240367, upload-time = "2026-05-18T19:18:49.217Z" }, - { url = "https://files.pythonhosted.org/packages/78/83/8555d40948b09ce86f1bd0c68a7ac31d07b1929f92cc1b074006c97ef2d2/lxml-6.1.1-cp312-cp312-manylinux_2_28_i686.whl", hash = "sha256:27acc820660aaffa4f7c087f29120e12980f7779d56d8492d263170111284740", size = 5350171, upload-time = "2026-05-18T19:18:52.779Z" }, - { url = "https://files.pythonhosted.org/packages/63/75/5d92da93729b7bad783689e6496049fa40927b45bec7bf183c981de3ca70/lxml-6.1.1-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:1db753c9115ec7100d073b744d17e25e88a8f90f5c39b2f5dd878149af59671f", size = 4694874, upload-time = "2026-05-18T19:18:55.139Z" }, - { url = "https://files.pythonhosted.org/packages/c5/b5/3aad415a9a25b822e783f15deeb4dffccf5113030f1afa2222dd929313d9/lxml-6.1.1-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4f469aebd783bb741c2ecb2a681008fd26bfe5c16a9a72ed5467f834e810df2", size = 5244492, upload-time = "2026-05-18T19:19:01.28Z" }, - { url = "https://files.pythonhosted.org/packages/f1/a1/5fcf7eb9904b80086aa47dcf0027de07b1bb990afad2e6823144c368ae04/lxml-6.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:766b010012d59470072c1816b5b6c69f1d243e5db36ea5968e94accf430a4635", size = 5048232, upload-time = "2026-05-18T19:18:12.67Z" }, - { url = "https://files.pythonhosted.org/packages/77/74/1f601b63c7a69fcdf10fa9b148c81da8442204194f6c55509cc485c786b9/lxml-6.1.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b8d812c6011c08b8111a15e54dd990b8923692d80adf35488bee34026c35accf", size = 4777023, upload-time = "2026-05-18T19:18:15.928Z" }, - { url = "https://files.pythonhosted.org/packages/a2/b9/7a78f51aec95b1bf780d78e12705a9f6533284f8693dc5c0e6724fa53d3f/lxml-6.1.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:fe0306bd29505a9177aac19f1877174b0e7422c222a59f70b2cd41633448c3dc", size = 5645773, upload-time = "2026-05-18T19:18:23.223Z" }, - { url = "https://files.pythonhosted.org/packages/a5/6e/98a7b7ad54e4e74fa1f20fff776913980619d0ebe5558232d7da6580bdd8/lxml-6.1.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5ba186ad207446c65d3bb3d3e0412b032b1d9f595e59861e2354798c5703d955", size = 5233088, upload-time = "2026-05-18T19:18:31.433Z" }, - { url = "https://files.pythonhosted.org/packages/65/d1/bc0ed2427bf609f2ee10da303a6a226f9c8bce94f945dc29a32ce55de6e4/lxml-6.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aa366a1e55b8ebfe8ca8ddc3cfe75c8ebade181aeb0f661d0cb05986b647f72a", size = 5260995, upload-time = "2026-05-18T19:18:37.091Z" }, - { url = "https://files.pythonhosted.org/packages/69/8b/6772e1a4b513fc50a8d931f19edde0e13ae6918510a1e13ff67864f3e5ed/lxml-6.1.1-cp312-cp312-win32.whl", hash = "sha256:126c93f7f56f0eda92f6d8c619edc463a4f23d9252f1c9d0405a76f25fa9f11a", size = 3596382, upload-time = "2026-05-18T19:17:18.37Z" }, - { url = "https://files.pythonhosted.org/packages/1b/89/45198e9624762af2dfd2cb8782598477ceb29f6e59caab560388ae1f4ec1/lxml-6.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:26e6eda8d38c1fcab1090dd196ee87cbd13788e531937610e2589085de074e77", size = 3997255, upload-time = "2026-05-18T19:17:56.781Z" }, - { url = "https://files.pythonhosted.org/packages/90/a9/7a54b6834088d9ae528a7b780584ba6a39a9457b0ac330479f20ffbc9449/lxml-6.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:6540377fbd53fe1b629172288c464fb18db11ce1fa7dc15891da10aa9dcc3e7f", size = 3659610, upload-time = "2026-05-19T19:22:50.843Z" }, - { url = "https://files.pythonhosted.org/packages/a5/eb/7e6f37c5584ccbb2ff267f56fd0339016938c1c8684cfefab9b33ffc2f36/lxml-6.1.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:68a9198d0fc122d14bb76837de9aa80cf84caed990b5b237f532ed87d3706736", size = 8559780, upload-time = "2026-05-18T19:17:57.661Z" }, - { url = "https://files.pythonhosted.org/packages/a1/36/587c2521cf23a2cd6c9c22108aa7528f683a1f195ed7ccd23a4b1786ad36/lxml-6.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7d47866cb32fb503450b6edc9df355d10dc49836af2e89901bd6ac6b0896d9d9", size = 4618006, upload-time = "2026-05-18T19:18:04.452Z" }, - { url = "https://files.pythonhosted.org/packages/6e/ca/ab7bfe2bf4c972af5e7878262845ead3a24a929a9b04bc11c7c1ece6c82a/lxml-6.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb7c9811bfaa8b1ed5ed319f5d370dfbcaa59d52ea64be2a5a85e18195930354", size = 4924139, upload-time = "2026-05-18T19:19:04.873Z" }, - { url = "https://files.pythonhosted.org/packages/6b/55/a0c72851dfee5ecc689f949723a73dea457758912542cb955b108eaf0d8f/lxml-6.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:762ff394d5bd56da0cf034a23dcce4e13923f15321a2adfa2ac00201dc6d3fca", size = 5082329, upload-time = "2026-05-18T19:19:09.728Z" }, - { url = "https://files.pythonhosted.org/packages/f0/b6/0608f7d61a3b96cc67e5648a3d906e31a5082093e10e7be65b3886289938/lxml-6.1.1-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a088f287f7d8275a33c07f2cac6c50b9319309a0200a39e7e75d80c707723099", size = 4993564, upload-time = "2026-05-18T19:19:13.608Z" }, - { url = "https://files.pythonhosted.org/packages/4c/66/ae227524b066d29d55bf0b453d93d2d793c40218657d643dcbbca13b8faf/lxml-6.1.1-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e902da4b04e6b52e5893900d4b8ab46068f75f3561f01bf1080957f9fd932ed6", size = 5613467, upload-time = "2026-05-18T19:19:16.228Z" }, - { url = "https://files.pythonhosted.org/packages/a6/76/dbe4a00b50385e40194231dcfe5a12c059de7cf90e89c83407d2b085b719/lxml-6.1.1-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1d4962d4c66bf830a7e59ed6cfc17d148149898a3aefa8ec6e59763e6e3ed085", size = 5228304, upload-time = "2026-05-18T19:19:19.354Z" }, - { url = "https://files.pythonhosted.org/packages/1c/01/00b1b8442ed2041793336868ba0b9ea4b13d7da7c085c6404c207a63bf79/lxml-6.1.1-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:581d4c8ae690a6609e64862dd6b7c2489635c2d13907fc2b20f2bc200ff1d21e", size = 5341607, upload-time = "2026-05-18T19:19:22.297Z" }, - { url = "https://files.pythonhosted.org/packages/63/36/1ad29931e9a4638bb707869f01d423a6c815f82152138d1a40dfcfde2b95/lxml-6.1.1-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:876e1ff5930ed8bf295ec5ef9a8155e9b6b1876bbf1deed8b3a8069311875a8f", size = 4700168, upload-time = "2026-05-18T19:19:25.133Z" }, - { url = "https://files.pythonhosted.org/packages/3c/d1/a9536cecf9be18a0dc72d32bead283a2332d1ffebd2dd3ac70ce444686e5/lxml-6.1.1-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9eb9b5a968f6e0f6d640092a567e14529ff8cea2e29d00da6f78a79fa49f013c", size = 5232487, upload-time = "2026-05-18T19:19:28.603Z" }, - { url = "https://files.pythonhosted.org/packages/0e/77/b4fb1e03bf5d130e879214d3100092e386418807fb74dd0adc4b0a48f351/lxml-6.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:aa49e06d94aba782c6a02eecb7e507969e7e7a41b267f1b359bb35585f295d5b", size = 5044231, upload-time = "2026-05-18T19:18:42.246Z" }, - { url = "https://files.pythonhosted.org/packages/26/4c/d00daeeb0a5530c4028a9232aa1b93db3ef4ed2158c116ea73c79a9765b3/lxml-6.1.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:70cdfd80589d59e43e18005dd7244e8895e93db8ab6a620b7e23df5445a4e3d2", size = 4769450, upload-time = "2026-05-18T19:18:48.013Z" }, - { url = "https://files.pythonhosted.org/packages/ed/6a/715a3a8d156ce42f29cf014706f5410c2ff3b02267774110fc23266409fe/lxml-6.1.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:aad9aa39483ed8ec44d6d2e59e5b98a0d80676ef0d92f44bfc374836111f62f5", size = 5635874, upload-time = "2026-05-18T19:18:51.914Z" }, - { url = "https://files.pythonhosted.org/packages/45/37/0544bc21dde2a88f3a17b504e6fc79c0e01d25a33c2f6079724e9e72b9c7/lxml-6.1.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d49514be2f28d895c38cf9d2b72d7b9a07d00314519f456c0b50b53cfcf4c785", size = 5223987, upload-time = "2026-05-18T19:18:59.715Z" }, - { url = "https://files.pythonhosted.org/packages/4d/f8/f6a5e8185bcb28c2befae3d31f8e3df3b811cb0f47746517a81279fcafe1/lxml-6.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:47402e62c52ff5988c1e8c6c63177f5708bccf48e366dea4e3dcf1e645e04947", size = 5250276, upload-time = "2026-05-18T19:19:03.834Z" }, - { url = "https://files.pythonhosted.org/packages/c7/f2/1a2b9f1b7a49d45495369be7ef9ad05b262930f2eab3e3145706fca8083f/lxml-6.1.1-cp313-cp313-win32.whl", hash = "sha256:3483644525531e1d5762b0c44a8e18b6efba321b6dcf8a8952de10b037618bca", size = 3596903, upload-time = "2026-05-18T19:17:29.863Z" }, - { url = "https://files.pythonhosted.org/packages/e6/99/f4ffb024f238eec2131aaa09f3278fb6129cf892741bf68e1fc1afb8c100/lxml-6.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:a10bd2fd62e8ce916ececb342f348f190724a098c1faa056fdfb2a22ad5e8660", size = 3995869, upload-time = "2026-05-18T19:18:02.596Z" }, - { url = "https://files.pythonhosted.org/packages/d1/53/70eb8c5c6037f27448f1e3c54ebede9545a801ae63f0a7254afca4fe8e45/lxml-6.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:424aa57aca0897eb922aef34395bd1289b3b6f04e6bae20ea123c0c7e333cffc", size = 3658490, upload-time = "2026-05-19T19:22:53.846Z" }, - { url = "https://files.pythonhosted.org/packages/13/e2/2e325795566de01d0d7c3bb57d3c370616b2d07b01214e84eec5d3b10963/lxml-6.1.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:19b7ab10b210b0b3ad7985d9ac4eb66ab09a90b20fe6e2f7ba55d01a234345d0", size = 8577146, upload-time = "2026-05-18T19:18:17.765Z" }, - { url = "https://files.pythonhosted.org/packages/93/cf/5630b5e4be7d2e6bee8efe83865c925221103cf0221303b104ce134b01e2/lxml-6.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c08e5c694306507275f2290073350c4f32e383db15213b2c69e7ff39c1193840", size = 4623866, upload-time = "2026-05-18T19:18:30.669Z" }, - { url = "https://files.pythonhosted.org/packages/d2/51/3904907c063451cf8d4a5c9fe0cad95fa1f4ec57f4e3884fa0731bd7a305/lxml-6.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:74a9717fd0d82effef5c2854f0d917231d5324b5a3eb7275c43ac9fa32f97a14", size = 4950022, upload-time = "2026-05-18T19:19:31.958Z" }, - { url = "https://files.pythonhosted.org/packages/94/cd/9c7611a51c37a2830928405817cc5d56a97f64fab83cc3f628748b135749/lxml-6.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:efe0374196335f93b53269acd811b944f2e6bdc88e8894f214bd636455484909", size = 5086695, upload-time = "2026-05-18T19:19:34.764Z" }, - { url = "https://files.pythonhosted.org/packages/da/d6/24e3b5906abb0b674ff2ae195bc3ce59708df2bcd17cf17703b2d7dd643a/lxml-6.1.1-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac931cdc9442c1763b8a8f6cd62c0c938737eafc5be75eff88df55fc73bc0d00", size = 5031642, upload-time = "2026-05-18T19:19:37.771Z" }, - { url = "https://files.pythonhosted.org/packages/2d/db/6ec54f99019838bff54785c51da07f189eb4676861c5f2730962b0d8d665/lxml-6.1.1-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:aee395f5d0927f947758b4ec119fd5fc8ec71f07a1c5c52077b30b04c0fa6955", size = 5647338, upload-time = "2026-05-18T19:19:40.553Z" }, - { url = "https://files.pythonhosted.org/packages/42/3d/ef4dcfffd22d27a61805d8ed9f7fb888495bc6aa88648fa07c1eaa5586b6/lxml-6.1.1-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9395002973c827b3ed67db77e6ec09f092919a587022174554096a269378fb13", size = 5239528, upload-time = "2026-05-18T19:19:43.657Z" }, - { url = "https://files.pythonhosted.org/packages/62/bb/37fb3f0dff146bdcfa78eec47879273820b2a0bf350ec236ce14bd0b1c26/lxml-6.1.1-cp314-cp314-manylinux_2_28_i686.whl", hash = "sha256:73bc2086f141224ebddb7fc5c6a36ca58b31b94b561e1dfe8e073e3270fad1e7", size = 5350730, upload-time = "2026-05-18T19:19:46.307Z" }, - { url = "https://files.pythonhosted.org/packages/90/42/43253f168388df4fae1f38c01df36ddb9bee39e2048167b54cdcbae85ea3/lxml-6.1.1-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:3779def59032b81e44a5f70096ef6bf2082f8d901937dca354474ba09782e245", size = 4697530, upload-time = "2026-05-18T19:19:49.889Z" }, - { url = "https://files.pythonhosted.org/packages/eb/a8/c5a8504f81bbdfc8e7094c2c850cdb4ed6777fc4d5ddd9e5ab819f3b0d54/lxml-6.1.1-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:86c89b9d55ebf820ad7c90bc533410f0d098054f293351f10603c0c46ff598f5", size = 5250670, upload-time = "2026-05-18T19:19:53.199Z" }, - { url = "https://files.pythonhosted.org/packages/77/b7/c7e76ab18744d75e21f320ebf9ff9d1ceae2b54dd431ea5a64caf26c9672/lxml-6.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19607c6bbff2a44cf3fe8250abccd20942d3462473e0a721d01d379ed017e462", size = 5084485, upload-time = "2026-05-18T19:19:08.422Z" }, - { url = "https://files.pythonhosted.org/packages/31/31/b35c53f8ef7b7c31cacd23d3638652fff7bcd1deb6eedb709ab43b685908/lxml-6.1.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:c6ed5141a5c7507cf3ee76bd363b0d6f801e3321adc35b5d825a23115faa5465", size = 4737635, upload-time = "2026-05-18T19:19:12.321Z" }, - { url = "https://files.pythonhosted.org/packages/d9/06/31f23c813a7fe8e0cb1b175e915b08c9bf4e86d225b210feadbdbe519667/lxml-6.1.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:62aeb7e85b5d60320b9d77eef2e773994e2c0ce10121b277e0a19804e1654a5a", size = 5670681, upload-time = "2026-05-18T19:19:15.001Z" }, - { url = "https://files.pythonhosted.org/packages/1a/bc/ce619bccc89b1fd9ad8a8e1330ee3f3beff9f2ff95b712d7bbcdd6e22fc3/lxml-6.1.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b1b963fd8f5caa68e99dfae060d54de1fe9cba899b8718b44a00cdca53c3e590", size = 5238229, upload-time = "2026-05-18T19:19:18.131Z" }, - { url = "https://files.pythonhosted.org/packages/2f/5d/b329acbbedc0b619ebc2be6cf7ee9ed07e80892c88d4dfd612c33805789a/lxml-6.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:63876be28efefa04a1df615b46770e82042cce445cfdce55160522f57b231ccb", size = 5264191, upload-time = "2026-05-18T19:19:21.118Z" }, - { url = "https://files.pythonhosted.org/packages/d6/85/be36fb1425b30db3c3f9df75fe86343ebffb79e6320bd7f588e25bfeac39/lxml-6.1.1-cp314-cp314-win32.whl", hash = "sha256:7f7a92e8583f06b1fd49d01158143b8461cfcd135dcb10ec807270a3051bd603", size = 3657202, upload-time = "2026-05-18T19:17:39.509Z" }, - { url = "https://files.pythonhosted.org/packages/b8/ce/3cf9a827342269f54d405a6202397de63f07c69cbd6ce7d183a3f0cba1e9/lxml-6.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:b2d444f2e66624d68e9c6b211e28a76e22fff5fcabcfff4deac18b529b7d4137", size = 4064497, upload-time = "2026-05-18T19:18:14.662Z" }, - { url = "https://files.pythonhosted.org/packages/d9/3e/1a957bde8f0760039e627f94699f82caa782c9d838d86c3d28245ee67212/lxml-6.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:3fd9728a2735fda14f4e8235830c86b539e9661e849665bf926d3f867943b4bf", size = 3741991, upload-time = "2026-05-19T19:22:59.111Z" }, - { url = "https://files.pythonhosted.org/packages/78/b2/00ed55b3a2efa4658fb795c38d1090ec9b3e8a6c3683d4441fa517f09c3b/lxml-6.1.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:787b2496d0dbe8cd180984e8d29e3a6f76e7ea34db781cb3bd55e4ba1ef8b4ee", size = 8827545, upload-time = "2026-05-18T19:18:41.193Z" }, - { url = "https://files.pythonhosted.org/packages/c0/73/74573db19baa618d5f266f2407898b087ff6927115b00b71e5fc1b700847/lxml-6.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2c8daa471358dc2d6fcf02165e80ec68f77871a286df95bc5cc3816153b0fd2c", size = 4735736, upload-time = "2026-05-18T19:18:46.761Z" }, - { url = "https://files.pythonhosted.org/packages/16/02/6f7061f4f95f51e545d48e87647c54791d204a4e881be4156e7a26ba5338/lxml-6.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:acd7d70b64c0aae0c7922cca83d288a16f5f6da523637697872253415269baef", size = 4970291, upload-time = "2026-05-18T19:19:56.215Z" }, - { url = "https://files.pythonhosted.org/packages/b0/02/55fc057d8283427dea7d6edb102e7a840239c77a64a983d92f62a304c0e9/lxml-6.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4f0dd2f01f9f8a89f565d000e03abcf0a13d692a346c8d22f628d49af098777a", size = 5102822, upload-time = "2026-05-18T19:19:59.223Z" }, - { url = "https://files.pythonhosted.org/packages/e4/48/8e1cf78d89d66850121d9255a2a24414c98f775da93b90cf976956c24b14/lxml-6.1.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b7e8a14c8634bf6f7a568634cb395305a6d964aeb5b7ee32248094bed3a7e2c", size = 5027923, upload-time = "2026-05-18T19:20:01.549Z" }, - { url = "https://files.pythonhosted.org/packages/ed/00/0632a0647612c8af24d26997b3b961397daa9d5b2581444805933629a4cb/lxml-6.1.1-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:86281fbdd6a8162756f8d603f37e3435bfa38043adb79c6dc6a2dfee065e7525", size = 5595843, upload-time = "2026-05-18T19:20:03.93Z" }, - { url = "https://files.pythonhosted.org/packages/bc/86/ab008a7dc360711b66858d61c80a5979a70a09f2aa2b05d9698df80b803d/lxml-6.1.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5d7152ec39ca7c402d8fb9bad86140a15b9503bd0c54484e3f1bbe3dd37ceca", size = 5224515, upload-time = "2026-05-18T19:20:06.381Z" }, - { url = "https://files.pythonhosted.org/packages/75/c6/2702ff375e728e34f56d9a45339a9cf7e4427e917f542225242d63a05afa/lxml-6.1.1-cp314-cp314t-manylinux_2_28_i686.whl", hash = "sha256:88d8cb75b9d82858497a5393e3c63cfbf03035225e4b35a49ed7ccb151e4dc0e", size = 5312511, upload-time = "2026-05-18T19:20:09.308Z" }, - { url = "https://files.pythonhosted.org/packages/b7/57/a5807c98f87a86f10ef9ffab35516df7c0f0c4b6d5d33e9f608ab9c04a31/lxml-6.1.1-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:f64ec5397ea6a41fc1b4af0380d79b44a755b5531dcaccd9940fb260dca93038", size = 4639206, upload-time = "2026-05-18T19:20:11.704Z" }, - { url = "https://files.pythonhosted.org/packages/1f/e1/8a0a2c35734812395f4da4eaf33748a7e5705bfb2a58b128da764339d5ec/lxml-6.1.1-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d34bbf07dbc7ca5970671b1512e928991fb5e9d95365636c9b2d8b4f53af405e", size = 5232404, upload-time = "2026-05-18T19:20:14.064Z" }, - { url = "https://files.pythonhosted.org/packages/c2/e2/0e6a4dd5ad84d01d99aa7bae7cfefd4a760a0e0f8176818241de17d9b6c0/lxml-6.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:17e0e18d4ad8adbd0399291bc44845b69d9dd68439a3cdebdf35ff902ec05072", size = 5083769, upload-time = "2026-05-18T19:19:23.758Z" }, - { url = "https://files.pythonhosted.org/packages/a0/7e/161f33d463f6ffc1c7679104b65086dea120080d49dde4d238f015aaee2f/lxml-6.1.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:3ab541146f1f6968c462d6c2ac495148e8cdba2f8347700b2141b6ec5a75bf52", size = 4758936, upload-time = "2026-05-18T19:19:27.256Z" }, - { url = "https://files.pythonhosted.org/packages/f1/fb/2369825e3f6ca99305bf9f7b7085fda91c8b0922a89e54d900974aa3ef85/lxml-6.1.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:2a0217714657e023ef4293500f65aa20fce6164c8fd6b08fa5bd4a859fb14b9b", size = 5620296, upload-time = "2026-05-18T19:19:29.993Z" }, - { url = "https://files.pythonhosted.org/packages/30/90/d61e383146f74c5ab683947ea14dc7b82778838ab9b95ea73a23b60d0191/lxml-6.1.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:05a82eb6e1530a64f26225b55cbd178113bd0b5af1c2b625f25e5296742c26d2", size = 5228598, upload-time = "2026-05-18T19:19:33.523Z" }, - { url = "https://files.pythonhosted.org/packages/76/2d/2dafd8149e94b05bb070690efd5bb2680720681e03ff03fc57d2b70a1105/lxml-6.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9e36f163528fc50cbef305f02a5fd66d404edf7049cdaff211dbc2cba5a7013e", size = 5247845, upload-time = "2026-05-18T19:19:36.649Z" }, - { url = "https://files.pythonhosted.org/packages/ce/68/b30e913340c380ddac9580c6e6230991fc37240ec4f64704833e4f3e2769/lxml-6.1.1-cp314-cp314t-win32.whl", hash = "sha256:649dda677cf3bd6ac9ae14007ba0c824ded8ce5808b53fc7431d9140399118c1", size = 3897345, upload-time = "2026-05-18T19:17:33.562Z" }, - { url = "https://files.pythonhosted.org/packages/3c/4e/9eb2af5335545f9fbcd7af57bcf87c6025d31eaa31b14ec184a6c8675328/lxml-6.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:793033d6c5cdf33a573f910d9bea14ef8f5771820411d118da8e1182edb53d5e", size = 4393350, upload-time = "2026-05-18T19:18:10.076Z" }, - { url = "https://files.pythonhosted.org/packages/7f/2c/0f1e93c636720e8a3eb59af2bfda99d98b55891e1c53bc30c2e0e865f01b/lxml-6.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:58bb955caba94e467d2a96da17660d2d704e0675894cba21ab8a775b8621fd1c", size = 3817223, upload-time = "2026-05-19T19:22:56.823Z" }, - { url = "https://files.pythonhosted.org/packages/b5/32/86a3f0f724a3a402d4627937a7fc27b160e45e7012b4adf47f6e1e844511/lxml-6.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:31033dc34636ea6b7d5cc11b1ddbda78a14de858ba9d3e1ed4b69a3085bc521e", size = 3930127, upload-time = "2026-05-18T19:19:02.27Z" }, - { url = "https://files.pythonhosted.org/packages/40/44/d832e82af08723761556d004b1d04d281c09f9a8cecd7d3148548c9941a3/lxml-6.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3893c14c4b6ac5b2d54ba8cf03e99fe5104e592de491f19bd6b82756c09f8004", size = 4210769, upload-time = "2026-05-18T19:20:41.427Z" }, - { url = "https://files.pythonhosted.org/packages/6d/39/0dc5949f759ed7d951e0bb8c2f2d9d7aca1908d22352fa84a8afd2ea54af/lxml-6.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c07da4cebf6889f03ebac8d238f62318e29f495de0aa18a51ea14e61ae907e2e", size = 4318163, upload-time = "2026-05-18T19:20:44.702Z" }, - { url = "https://files.pythonhosted.org/packages/e6/fb/8ab3845fe046ba4cbf74536bcf6801a774b7caf4350de1c5d37f1f0a9e90/lxml-6.1.1-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6f0ce10945fab9c4c06ce14e22af9059d1a87493a9af4501a5b0b9187e21cf2", size = 4250945, upload-time = "2026-05-18T19:20:47.385Z" }, - { url = "https://files.pythonhosted.org/packages/68/1b/7553ab136894374ffae8851ec06f98f511cd8e66246e41b6be059d0a7289/lxml-6.1.1-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f8844cd288697c6425c9beba919302241e3278871dc6519515e72b04e987abcf", size = 4401664, upload-time = "2026-05-18T19:20:50.489Z" }, - { url = "https://files.pythonhosted.org/packages/db/a4/441aee36c6f6b249823d20fd91f9be9ab89d7c5a8ae542a4a4ca6d342d56/lxml-6.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:ed21202aec73cda4d55d1ce57b389aadb90ffb044e6cd1080b8347efe1b1ec84", size = 3508989, upload-time = "2026-05-18T19:18:38.158Z" }, -] - [[package]] name = "markdown-it-py" version = "4.2.0" @@ -2627,11 +1959,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687, upload-time = "2026-05-07T12:08:27.182Z" }, ] -[package.optional-dependencies] -linkify = [ - { name = "linkify-it-py" }, -] - [[package]] name = "markupsafe" version = "3.0.3" @@ -2742,18 +2069,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" }, ] -[[package]] -name = "mdit-py-plugins" -version = "0.6.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markdown-it-py" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/59/fc/f8d0863f8862f25602c0404d75568e89fb6b4109804645e5cdfb1be5cf56/mdit_py_plugins-0.6.1.tar.gz", hash = "sha256:a2bca0f039f39dbd35fb74ae1b5f998608c437463371f0ff7f49a19a17a114d0", size = 56114, upload-time = "2026-05-13T09:03:38.91Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/69/6da5581c6a7fede7dc261bf4e67d6adca4196f176b43288b55b3db395b6e/mdit_py_plugins-0.6.1-py3-none-any.whl", hash = "sha256:214c82fb2ac524472ab6a5bcab1de80f73b50443e187f401bfd77efbc7c6481d", size = 66663, upload-time = "2026-05-13T09:03:37.76Z" }, -] - [[package]] name = "mdurl" version = "0.1.2" @@ -2763,120 +2078,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] -[[package]] -name = "mmh3" -version = "5.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/91/1a/edb23803a168f070ded7a3014c6d706f63b90c84ccc024f89d794a3b7a6d/mmh3-5.2.1.tar.gz", hash = "sha256:bbea5b775f0ac84945191fb83f845a6fd9a21a03ea7f2e187defac7e401616ad", size = 33775, upload-time = "2026-03-05T15:55:57.716Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/bb/88ee54afa5644b0f35ab5b435f208394feb963e5bb47c4e404deb625ffa4/mmh3-5.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5d87a3584093e1a89987e3d36d82c98d9621b2cb944e22a420aa1401e096758f", size = 56080, upload-time = "2026-03-05T15:53:40.452Z" }, - { url = "https://files.pythonhosted.org/packages/cc/bf/5404c2fd6ac84819e8ff1b7e34437b37cf55a2b11318894909e7bb88de3f/mmh3-5.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:30e4d2084df019880d55f6f7bea35328d9b464ebee090baa372c096dc77556fb", size = 40462, upload-time = "2026-03-05T15:53:41.751Z" }, - { url = "https://files.pythonhosted.org/packages/de/0b/52bffad0b52ae4ea53e222b594bd38c08ecac1fc410323220a7202e43da5/mmh3-5.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0bbc17250b10d3466875a40a52520a6bac3c02334ca709207648abd3c223ed5c", size = 40077, upload-time = "2026-03-05T15:53:42.753Z" }, - { url = "https://files.pythonhosted.org/packages/a0/9e/326c93d425b9fa4cbcdc71bc32aaba520db37577d632a24d25d927594eca/mmh3-5.2.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:76219cd1eefb9bf4af7856e3ae563d15158efa145c0aab01e9933051a1954045", size = 95302, upload-time = "2026-03-05T15:53:43.867Z" }, - { url = "https://files.pythonhosted.org/packages/c6/b1/e20d5f0d19c4c0f3df213fa7dcfa0942c4fb127d38e11f398ae8ddf6cccc/mmh3-5.2.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb9d44c25244e11c8be3f12c938ca8ba8404620ef8092245d2093c6ab3df260f", size = 101174, upload-time = "2026-03-05T15:53:45.194Z" }, - { url = "https://files.pythonhosted.org/packages/7f/4a/1a9bb3e33c18b1e1cee2c249a3053c4d4d9c93ecb30738f39a62249a7e86/mmh3-5.2.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2d5d542bf2abd0fd0361e8017d03f7cb5786214ceb4a40eef1539d6585d93386", size = 103979, upload-time = "2026-03-05T15:53:46.334Z" }, - { url = "https://files.pythonhosted.org/packages/ff/8d/dab9ee7545429e7acdd38d23d0104471d31de09a0c695f1b751e0ff34532/mmh3-5.2.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:08043f7cb1fb9467c3fbbbaea7896986e7fbc81f4d3fd9289a73d9110ab6207a", size = 110898, upload-time = "2026-03-05T15:53:47.443Z" }, - { url = "https://files.pythonhosted.org/packages/72/08/408f11af7fe9e76b883142bb06536007cc7f237be2a5e9ad4e837716e627/mmh3-5.2.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:add7ac388d1e0bf57259afbcf9ed05621a3bf11ce5ee337e7536f1e1aaf056b0", size = 118308, upload-time = "2026-03-05T15:53:49.1Z" }, - { url = "https://files.pythonhosted.org/packages/86/2d/0551be7fe0000736d9ad12ffa1f130d7a0c17b49193d6dc41c82bd9404c6/mmh3-5.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:41105377f6282e8297f182e393a79cfffd521dde37ace52b106373bdcd9ca5cb", size = 101671, upload-time = "2026-03-05T15:53:50.317Z" }, - { url = "https://files.pythonhosted.org/packages/44/17/6e4f80c4e6ad590139fa2017c3aeca54e7cc9ef68e08aa142a0c90f40a97/mmh3-5.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3cb61db880ec11e984348227b333259994c2c85caa775eb7875decb3768db890", size = 96682, upload-time = "2026-03-05T15:53:51.48Z" }, - { url = "https://files.pythonhosted.org/packages/ad/a7/b82fccd38c1fa815de72e94ebe9874562964a10e21e6c1bc3b01d3f15a0e/mmh3-5.2.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e8b5378de2b139c3a830f0209c1e91f7705919a4b3e563a10955104f5097a70a", size = 110287, upload-time = "2026-03-05T15:53:52.68Z" }, - { url = "https://files.pythonhosted.org/packages/a8/a1/2644069031c8cec0be46f0346f568a53f42fddd843f03cc890306699c1e2/mmh3-5.2.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e904f2417f0d6f6d514f3f8b836416c360f306ddaee1f84de8eef1e722d212e5", size = 111899, upload-time = "2026-03-05T15:53:53.791Z" }, - { url = "https://files.pythonhosted.org/packages/51/7b/6614f3eb8fb33f931fa7616c6d477247e48ec6c5082b02eeeee998cffa94/mmh3-5.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f1fbb0a99125b1287c6d9747f937dc66621426836d1a2d50d05aecfc81911b57", size = 100078, upload-time = "2026-03-05T15:53:55.234Z" }, - { url = "https://files.pythonhosted.org/packages/27/9a/dd4d5a5fb893e64f71b42b69ecae97dd78db35075412488b24036bc5599c/mmh3-5.2.1-cp310-cp310-win32.whl", hash = "sha256:b4cce60d0223074803c9dbe0721ad3fa51dafe7d462fee4b656a1aa01ee07518", size = 40756, upload-time = "2026-03-05T15:53:56.319Z" }, - { url = "https://files.pythonhosted.org/packages/c9/34/0b25889450f8aeffcec840aa73251e853f059c1b72ed1d1c027b956f95f5/mmh3-5.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:6f01f044112d43a20be2f13a11683666d87151542ad627fe41a18b9791d2802f", size = 41519, upload-time = "2026-03-05T15:53:57.41Z" }, - { url = "https://files.pythonhosted.org/packages/fd/31/8fd42e3c526d0bcb1db7f569c0de6729e180860a0495e387a53af33c2043/mmh3-5.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:7501e9be34cb21e72fcfe672aafd0eee65c16ba2afa9dcb5500a587d3a0580f0", size = 39285, upload-time = "2026-03-05T15:53:58.697Z" }, - { url = "https://files.pythonhosted.org/packages/65/d7/3312a59df3c1cdd783f4cf0c4ee8e9decff9c5466937182e4cc7dbbfe6c5/mmh3-5.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:dae0f0bd7d30c0ad61b9a504e8e272cb8391eed3f1587edf933f4f6b33437450", size = 56082, upload-time = "2026-03-05T15:53:59.702Z" }, - { url = "https://files.pythonhosted.org/packages/61/96/6f617baa098ca0d2989bfec6d28b5719532cd8d8848782662f5b755f657f/mmh3-5.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9aeaf53eaa075dd63e81512522fd180097312fb2c9f476333309184285c49ce0", size = 40458, upload-time = "2026-03-05T15:54:01.548Z" }, - { url = "https://files.pythonhosted.org/packages/c1/b4/9cd284bd6062d711e13d26c04d4778ab3f690c1c38a4563e3c767ec8802e/mmh3-5.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0634581290e6714c068f4aa24020acf7880927d1f0084fa753d9799ae9610082", size = 40079, upload-time = "2026-03-05T15:54:02.743Z" }, - { url = "https://files.pythonhosted.org/packages/f6/09/a806334ce1d3d50bf782b95fcee8b3648e1e170327d4bb7b4bad2ad7d956/mmh3-5.2.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080c0637aea036f35507e803a4778f119a9b436617694ae1c5c366805f1e997", size = 97242, upload-time = "2026-03-05T15:54:04.536Z" }, - { url = "https://files.pythonhosted.org/packages/ee/93/723e317dd9e041c4dc4566a2eb53b01ad94de31750e0b834f1643905e97c/mmh3-5.2.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:db0562c5f71d18596dcd45e854cf2eeba27d7543e1a3acdafb7eef728f7fe85d", size = 103082, upload-time = "2026-03-05T15:54:06.387Z" }, - { url = "https://files.pythonhosted.org/packages/61/b5/f96121e69cc48696075071531cf574f112e1ffd08059f4bffb41210e6fc5/mmh3-5.2.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d9f9a3ce559a5267014b04b82956993270f63ec91765e13e9fd73daf2d2738e", size = 106054, upload-time = "2026-03-05T15:54:07.506Z" }, - { url = "https://files.pythonhosted.org/packages/82/49/192b987ec48d0b2aecf8ac285a9b11fbc00030f6b9c694664ae923458dde/mmh3-5.2.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:960b1b3efa39872ac8b6cc3a556edd6fb90ed74f08c9c45e028f1005b26aa55d", size = 112910, upload-time = "2026-03-05T15:54:09.403Z" }, - { url = "https://files.pythonhosted.org/packages/cf/a1/03e91fd334ed0144b83343a76eb11f17434cd08f746401488cfeafb2d241/mmh3-5.2.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d30b650595fdbe32366b94cb14f30bb2b625e512bd4e1df00611f99dc5c27fd4", size = 120551, upload-time = "2026-03-05T15:54:10.587Z" }, - { url = "https://files.pythonhosted.org/packages/93/b9/b89a71d2ff35c3a764d1c066c7313fc62c7cc48fa48a4b3b0304a4a0146f/mmh3-5.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:82f3802bfc4751f420d591c5c864de538b71cea117fce67e4595c2afede08a15", size = 99096, upload-time = "2026-03-05T15:54:11.76Z" }, - { url = "https://files.pythonhosted.org/packages/36/b5/613772c1c6ed5f7b63df55eb131e887cc43720fec392777b95a79d34e640/mmh3-5.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:915e7a2418f10bd1151b1953df06d896db9783c9cfdb9a8ee1f9b3a4331ab503", size = 98524, upload-time = "2026-03-05T15:54:13.122Z" }, - { url = "https://files.pythonhosted.org/packages/5e/0e/1524566fe8eaf871e4f7bc44095929fcd2620488f402822d848df19d679c/mmh3-5.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:fc78739b5ec6e4fb02301984a3d442a91406e7700efbe305071e7fd1c78278f2", size = 106239, upload-time = "2026-03-05T15:54:14.601Z" }, - { url = "https://files.pythonhosted.org/packages/04/94/21adfa7d90a7a697137ad6de33eeff6445420ca55e433a5d4919c79bc3b5/mmh3-5.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:41aac7002a749f08727cb91babff1daf8deac317c0b1f317adc69be0e6c375d1", size = 109797, upload-time = "2026-03-05T15:54:15.819Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e6/1aacc3a219e1aa62fa65669995d4a3562b35be5200ec03680c7e4bec9676/mmh3-5.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9d8089d853c7963a8ce87fff93e2a67075c0bc08684a08ea6ad13577c38ffc38", size = 97228, upload-time = "2026-03-05T15:54:16.992Z" }, - { url = "https://files.pythonhosted.org/packages/f1/b9/5e4cca8dcccf298add0a27f3c357bc8cf8baf821d35cdc6165e4bd5a48b0/mmh3-5.2.1-cp311-cp311-win32.whl", hash = "sha256:baeb47635cb33375dee4924cd93d7f5dcaa786c740b08423b0209b824a1ee728", size = 40751, upload-time = "2026-03-05T15:54:18.714Z" }, - { url = "https://files.pythonhosted.org/packages/72/fc/5b11d49247f499bcda591171e9cf3b6ee422b19e70aa2cef2e0ae65ca3b9/mmh3-5.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:1e4ecee40ba19e6975e1120829796770325841c2f153c0e9aecca927194c6a2a", size = 41517, upload-time = "2026-03-05T15:54:19.764Z" }, - { url = "https://files.pythonhosted.org/packages/8a/5f/2a511ee8a1c2a527c77726d5231685b72312c5a1a1b7639ad66a9652aa84/mmh3-5.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:c302245fd6c33d96bd169c7ccf2513c20f4c1e417c07ce9dce107c8bc3f8411f", size = 39287, upload-time = "2026-03-05T15:54:20.904Z" }, - { url = "https://files.pythonhosted.org/packages/92/94/bc5c3b573b40a328c4d141c20e399039ada95e5e2a661df3425c5165fd84/mmh3-5.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0cc21533878e5586b80d74c281d7f8da7932bc8ace50b8d5f6dbf7e3935f63f1", size = 56087, upload-time = "2026-03-05T15:54:21.92Z" }, - { url = "https://files.pythonhosted.org/packages/f6/80/64a02cc3e95c3af0aaa2590849d9ed24a9f14bb93537addde688e039b7c3/mmh3-5.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4eda76074cfca2787c8cf1bec603eaebdddd8b061ad5502f85cddae998d54f00", size = 40500, upload-time = "2026-03-05T15:54:22.953Z" }, - { url = "https://files.pythonhosted.org/packages/8b/72/e6d6602ce18adf4ddcd0e48f2e13590cc92a536199e52109f46f259d3c46/mmh3-5.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:eee884572b06bbe8a2b54f424dbd996139442cf83c76478e1ec162512e0dd2c7", size = 40034, upload-time = "2026-03-05T15:54:23.943Z" }, - { url = "https://files.pythonhosted.org/packages/59/c2/bf4537a8e58e21886ef16477041238cab5095c836496e19fafc34b7445d2/mmh3-5.2.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d0b7e803191db5f714d264044e06189c8ccd3219e936cc184f07106bd17fd7b", size = 97292, upload-time = "2026-03-05T15:54:25.335Z" }, - { url = "https://files.pythonhosted.org/packages/e5/e2/51ed62063b44d10b06d975ac87af287729eeb5e3ed9772f7584a17983e90/mmh3-5.2.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e6c219e375f6341d0959af814296372d265a8ca1af63825f65e2e87c618f006", size = 103274, upload-time = "2026-03-05T15:54:26.44Z" }, - { url = "https://files.pythonhosted.org/packages/75/ce/12a7524dca59eec92e5b31fdb13ede1e98eda277cf2b786cf73bfbc24e81/mmh3-5.2.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26fb5b9c3946bf7f1daed7b37e0c03898a6f062149127570f8ede346390a0825", size = 106158, upload-time = "2026-03-05T15:54:28.578Z" }, - { url = "https://files.pythonhosted.org/packages/86/1f/d3ba6dd322d01ab5d44c46c8f0c38ab6bbbf9b5e20e666dfc05bf4a23604/mmh3-5.2.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3c38d142c706201db5b2345166eeef1e7740e3e2422b470b8ba5c8727a9b4c7a", size = 113005, upload-time = "2026-03-05T15:54:29.767Z" }, - { url = "https://files.pythonhosted.org/packages/b6/a9/15d6b6f913294ea41b44d901741298e3718e1cb89ee626b3694625826a43/mmh3-5.2.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50885073e2909251d4718634a191c49ae5f527e5e1736d738e365c3e8be8f22b", size = 120744, upload-time = "2026-03-05T15:54:30.931Z" }, - { url = "https://files.pythonhosted.org/packages/76/b3/70b73923fd0284c439860ff5c871b20210dfdbe9a6b9dd0ee6496d77f174/mmh3-5.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3f99e1756fc48ad507b95e5d86f2fb21b3d495012ff13e6592ebac14033f166", size = 99111, upload-time = "2026-03-05T15:54:32.353Z" }, - { url = "https://files.pythonhosted.org/packages/dd/38/99f7f75cd27d10d8b899a1caafb9d531f3903e4d54d572220e3d8ac35e89/mmh3-5.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:62815d2c67f2dd1be76a253d88af4e1da19aeaa1820146dec52cf8bee2958b16", size = 98623, upload-time = "2026-03-05T15:54:33.801Z" }, - { url = "https://files.pythonhosted.org/packages/fd/68/6e292c0853e204c44d2f03ea5f090be3317a0e2d9417ecb62c9eb27687df/mmh3-5.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8f767ba0911602ddef289404e33835a61168314ebd3c729833db2ed685824211", size = 106437, upload-time = "2026-03-05T15:54:35.177Z" }, - { url = "https://files.pythonhosted.org/packages/dd/c6/fedd7284c459cfb58721d461fcf5607a4c1f5d9ab195d113d51d10164d16/mmh3-5.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:67e41a497bac88cc1de96eeba56eeb933c39d54bc227352f8455aa87c4ca4000", size = 110002, upload-time = "2026-03-05T15:54:36.673Z" }, - { url = "https://files.pythonhosted.org/packages/3b/ac/ca8e0c19a34f5b71390171d2ff0b9f7f187550d66801a731bb68925126a4/mmh3-5.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d74a03fb57757ece25aa4b3c1c60157a1cece37a020542785f942e2f827eed5", size = 97507, upload-time = "2026-03-05T15:54:37.804Z" }, - { url = "https://files.pythonhosted.org/packages/df/94/6ebb9094cfc7ac5e7950776b9d13a66bb4a34f83814f32ba2abc9494fc68/mmh3-5.2.1-cp312-cp312-win32.whl", hash = "sha256:7374d6e3ef72afe49697ecd683f3da12f4fc06af2d75433d0580c6746d2fa025", size = 40773, upload-time = "2026-03-05T15:54:40.077Z" }, - { url = "https://files.pythonhosted.org/packages/5b/3c/cd3527198cf159495966551c84a5f36805a10ac17b294f41f67b83f6a4d6/mmh3-5.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:3a9fed49c6ce4ed7e73f13182760c65c816da006debe67f37635580dfb0fae00", size = 41560, upload-time = "2026-03-05T15:54:41.148Z" }, - { url = "https://files.pythonhosted.org/packages/15/96/6fe5ebd0f970a076e3ed5512871ce7569447b962e96c125528a2f9724470/mmh3-5.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:bbfcb95d9a744e6e2827dfc66ad10e1020e0cac255eb7f85652832d5a264c2fc", size = 39313, upload-time = "2026-03-05T15:54:42.171Z" }, - { url = "https://files.pythonhosted.org/packages/25/a5/9daa0508a1569a54130f6198d5462a92deda870043624aa3ea72721aa765/mmh3-5.2.1-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:723b2681ed4cc07d3401bbea9c201ad4f2a4ca6ba8cddaff6789f715dd2b391e", size = 40832, upload-time = "2026-03-05T15:54:43.212Z" }, - { url = "https://files.pythonhosted.org/packages/0a/6b/3230c6d80c1f4b766dedf280a92c2241e99f87c1504ff74205ec8cebe451/mmh3-5.2.1-cp313-cp313-android_21_x86_64.whl", hash = "sha256:3619473a0e0d329fd4aec8075628f8f616be2da41605300696206d6f36920c3d", size = 41964, upload-time = "2026-03-05T15:54:44.204Z" }, - { url = "https://files.pythonhosted.org/packages/62/fb/648bfddb74a872004b6ee751551bfdda783fe6d70d2e9723bad84dbe5311/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:e48d4dbe0f88e53081da605ae68644e5182752803bbc2beb228cca7f1c4454d6", size = 39114, upload-time = "2026-03-05T15:54:45.205Z" }, - { url = "https://files.pythonhosted.org/packages/95/c2/ab7901f87af438468b496728d11264cb397b3574d41506e71b92128e0373/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a482ac121de6973897c92c2f31defc6bafb11c83825109275cffce54bb64933f", size = 39819, upload-time = "2026-03-05T15:54:46.509Z" }, - { url = "https://files.pythonhosted.org/packages/2f/ed/6f88dda0df67de1612f2e130ffea34cf84aaee5bff5b0aff4dbff2babe34/mmh3-5.2.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:17fbb47f0885ace8327ce1235d0416dc86a211dcd8cc1e703f41523be32cfec8", size = 40330, upload-time = "2026-03-05T15:54:47.864Z" }, - { url = "https://files.pythonhosted.org/packages/3d/66/7516d23f53cdf90f43fce24ab80c28f45e6851d78b46bef8c02084edf583/mmh3-5.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d51fde50a77f81330523562e3c2734ffdca9c4c9e9d355478117905e1cfe16c6", size = 56078, upload-time = "2026-03-05T15:54:48.9Z" }, - { url = "https://files.pythonhosted.org/packages/bc/34/4d152fdf4a91a132cb226b671f11c6b796eada9ab78080fb5ce1e95adaab/mmh3-5.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:19bbd3b841174ae6ed588536ab5e1b1fe83d046e668602c20266547298d939a9", size = 40498, upload-time = "2026-03-05T15:54:49.942Z" }, - { url = "https://files.pythonhosted.org/packages/d4/4c/8e3af1b6d85a299767ec97bd923f12b06267089c1472c27c1696870d1175/mmh3-5.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be77c402d5e882b6fbacfd90823f13da8e0a69658405a39a569c6b58fdb17b03", size = 40033, upload-time = "2026-03-05T15:54:50.994Z" }, - { url = "https://files.pythonhosted.org/packages/8b/f2/966ea560e32578d453c9e9db53d602cbb1d0da27317e232afa7c38ceba11/mmh3-5.2.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fd96476f04db5ceba1cfa0f21228f67c1f7402296f0e73fee3513aa680ad237b", size = 97320, upload-time = "2026-03-05T15:54:52.072Z" }, - { url = "https://files.pythonhosted.org/packages/bb/0d/2c5f9893b38aeb6b034d1a44ecd55a010148054f6a516abe53b5e4057297/mmh3-5.2.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:707151644085dd0f20fe4f4b573d28e5130c4aaa5f587e95b60989c5926653b5", size = 103299, upload-time = "2026-03-05T15:54:53.569Z" }, - { url = "https://files.pythonhosted.org/packages/1c/fc/2ebaef4a4d4376f89761274dc274035ffd96006ab496b4ee5af9b08f21a9/mmh3-5.2.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3737303ca9ea0f7cb83028781148fcda4f1dac7821db0c47672971dabcf63593", size = 106222, upload-time = "2026-03-05T15:54:55.092Z" }, - { url = "https://files.pythonhosted.org/packages/57/09/ea7ffe126d0ba0406622602a2d05e1e1a6841cc92fc322eb576c95b27fad/mmh3-5.2.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2778fed822d7db23ac5008b181441af0c869455b2e7d001f4019636ac31b6fe4", size = 113048, upload-time = "2026-03-05T15:54:56.305Z" }, - { url = "https://files.pythonhosted.org/packages/85/57/9447032edf93a64aa9bef4d9aa596400b1756f40411890f77a284f6293ca/mmh3-5.2.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d57dea657357230cc780e13920d7fa7db059d58fe721c80020f94476da4ca0a1", size = 120742, upload-time = "2026-03-05T15:54:57.453Z" }, - { url = "https://files.pythonhosted.org/packages/53/82/a86cc87cc88c92e9e1a598fee509f0409435b57879a6129bf3b3e40513c7/mmh3-5.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:169e0d178cb59314456ab30772429a802b25d13227088085b0d49b9fe1533104", size = 99132, upload-time = "2026-03-05T15:54:58.583Z" }, - { url = "https://files.pythonhosted.org/packages/54/f7/6b16eb1b40ee89bb740698735574536bc20d6cdafc65ae702ea235578e05/mmh3-5.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7e4e1f580033335c6f76d1e0d6b56baf009d1a64d6a4816347e4271ba951f46d", size = 98686, upload-time = "2026-03-05T15:55:00.078Z" }, - { url = "https://files.pythonhosted.org/packages/e8/88/a601e9f32ad1410f438a6d0544298ea621f989bd34a0731a7190f7dec799/mmh3-5.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:2bd9f19f7f1fcebd74e830f4af0f28adad4975d40d80620be19ffb2b2af56c9f", size = 106479, upload-time = "2026-03-05T15:55:01.532Z" }, - { url = "https://files.pythonhosted.org/packages/d6/5c/ce29ae3dfc4feec4007a437a1b7435fb9507532a25147602cd5b52be86db/mmh3-5.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c88653877aeb514c089d1b3d473451677b8b9a6d1497dbddf1ae7934518b06d2", size = 110030, upload-time = "2026-03-05T15:55:02.934Z" }, - { url = "https://files.pythonhosted.org/packages/13/30/ae444ef2ff87c805d525da4fa63d27cda4fe8a48e77003a036b8461cfd5c/mmh3-5.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fceef7fe67c81e1585198215e42ad3fdba3a25644beda8fbdaf85f4d7b93175a", size = 97536, upload-time = "2026-03-05T15:55:04.135Z" }, - { url = "https://files.pythonhosted.org/packages/4b/f9/dc3787ee5c813cc27fe79f45ad4500d9b5437f23a7402435cc34e07c7718/mmh3-5.2.1-cp313-cp313-win32.whl", hash = "sha256:54b64fb2433bc71488e7a449603bf8bd31fbcf9cb56fbe1eb6d459e90b86c37b", size = 40769, upload-time = "2026-03-05T15:55:05.277Z" }, - { url = "https://files.pythonhosted.org/packages/43/67/850e0b5a1e97799822ebfc4ca0e8c6ece3ed8baf7dcdf64de817dfdda2ca/mmh3-5.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:cae6383181f1e345317742d2ddd88f9e7d2682fa4c9432e3a74e47d92dce0229", size = 41563, upload-time = "2026-03-05T15:55:06.283Z" }, - { url = "https://files.pythonhosted.org/packages/c0/cc/98c90b28e1da5458e19fbfaf4adb5289208d3bfccd45dd14eab216a2f0bb/mmh3-5.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:022aa1a528604e6c83d0a7705fdef0b5355d897a9e0fa3a8d26709ceaa06965d", size = 39310, upload-time = "2026-03-05T15:55:07.323Z" }, - { url = "https://files.pythonhosted.org/packages/63/b4/65bc1fb2bb7f83e91c30865023b1847cf89a5f237165575e8c83aa536584/mmh3-5.2.1-cp314-cp314-android_24_arm64_v8a.whl", hash = "sha256:d771f085fcdf4035786adfb1d8db026df1eb4b41dac1c3d070d1e49512843227", size = 40794, upload-time = "2026-03-05T15:55:09.773Z" }, - { url = "https://files.pythonhosted.org/packages/c4/86/7168b3d83be8eb553897b1fac9da8bbb06568e5cfe555ffc329ebb46f59d/mmh3-5.2.1-cp314-cp314-android_24_x86_64.whl", hash = "sha256:7f196cd7910d71e9d9860da0ff7a77f64d22c1ad931f1dd18559a06e03109fc0", size = 41923, upload-time = "2026-03-05T15:55:10.924Z" }, - { url = "https://files.pythonhosted.org/packages/bf/9b/b653ab611c9060ce8ff0ba25c0226757755725e789292f3ca138a58082cd/mmh3-5.2.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:b1f12bd684887a0a5d55e6363ca87056f361e45451105012d329b86ec19dbe0b", size = 39131, upload-time = "2026-03-05T15:55:11.961Z" }, - { url = "https://files.pythonhosted.org/packages/9b/b4/5a2e0d34ab4d33543f01121e832395ea510132ea8e52cdf63926d9d81754/mmh3-5.2.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d106493a60dcb4aef35a0fac85105e150a11cf8bc2b0d388f5a33272d756c966", size = 39825, upload-time = "2026-03-05T15:55:13.013Z" }, - { url = "https://files.pythonhosted.org/packages/bd/69/81699a8f39a3f8d368bec6443435c0c392df0d200ad915bf0d222b588e03/mmh3-5.2.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:44983e45310ee5b9f73397350251cdf6e63a466406a105f1d16cb5baa659270b", size = 40344, upload-time = "2026-03-05T15:55:14.026Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b3/71c8c775807606e8fd8acc5c69016e1caf3200d50b50b6dd4b40ce10b76c/mmh3-5.2.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:368625fb01666655985391dbad3860dc0ba7c0d6b9125819f3121ee7292b4ac8", size = 56291, upload-time = "2026-03-05T15:55:15.137Z" }, - { url = "https://files.pythonhosted.org/packages/6f/75/2c24517d4b2ce9e4917362d24f274d3d541346af764430249ddcc4cb3a08/mmh3-5.2.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:72d1cc63bcc91e14933f77d51b3df899d6a07d184ec515ea7f56bff659e124d7", size = 40575, upload-time = "2026-03-05T15:55:16.518Z" }, - { url = "https://files.pythonhosted.org/packages/bf/b9/e4a360164365ac9f07a25f0f7928e3a66eb9ecc989384060747aa170e6aa/mmh3-5.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e8b4b5580280b9265af3e0409974fb79c64cf7523632d03fbf11df18f8b0181e", size = 40052, upload-time = "2026-03-05T15:55:17.735Z" }, - { url = "https://files.pythonhosted.org/packages/97/ca/120d92223a7546131bbbc31c9174168ee7a73b1366f5463ffe69d9e691fe/mmh3-5.2.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4cbbde66f1183db040daede83dd86c06d663c5bb2af6de1142b7c8c37923dd74", size = 97311, upload-time = "2026-03-05T15:55:18.959Z" }, - { url = "https://files.pythonhosted.org/packages/b6/71/c1a60c1652b8813ef9de6d289784847355417ee0f2980bca002fe87f4ae5/mmh3-5.2.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8ff038d52ef6aa0f309feeba00c5095c9118d0abf787e8e8454d6048db2037fc", size = 103279, upload-time = "2026-03-05T15:55:20.448Z" }, - { url = "https://files.pythonhosted.org/packages/48/29/ad97f4be1509cdcb28ae32c15593ce7c415db47ace37f8fad35b493faa9a/mmh3-5.2.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4130d0b9ce5fad6af07421b1aecc7e079519f70d6c05729ab871794eded8617", size = 106290, upload-time = "2026-03-05T15:55:21.6Z" }, - { url = "https://files.pythonhosted.org/packages/77/29/1f86d22e281bd8827ba373600a4a8b0c0eae5ca6aa55b9a8c26d2a34decc/mmh3-5.2.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6e0bfe77d238308839699944164b96a2eeccaf55f2af400f54dc20669d8d5f2", size = 113116, upload-time = "2026-03-05T15:55:22.826Z" }, - { url = "https://files.pythonhosted.org/packages/a7/7c/339971ea7ed4c12d98f421f13db3ea576a9114082ccb59d2d1a0f00ccac1/mmh3-5.2.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f963eafc0a77a6c0562397da004f5876a9bcf7265a7bcc3205e29636bc4a1312", size = 120740, upload-time = "2026-03-05T15:55:24.3Z" }, - { url = "https://files.pythonhosted.org/packages/e4/92/3c7c4bdb8e926bb3c972d1e2907d77960c1c4b250b41e8366cf20c6e4373/mmh3-5.2.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:92883836caf50d5255be03d988d75bc93e3f86ba247b7ca137347c323f731deb", size = 99143, upload-time = "2026-03-05T15:55:25.456Z" }, - { url = "https://files.pythonhosted.org/packages/df/0a/33dd8706e732458c8375eae63c981292de07a406bad4ec03e5269654aa2c/mmh3-5.2.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:57b52603e89355ff318025dd55158f6e71396c0f1f609d548e9ea9c94cc6ce0a", size = 98703, upload-time = "2026-03-05T15:55:26.723Z" }, - { url = "https://files.pythonhosted.org/packages/51/04/76bbce05df76cbc3d396f13b2ea5b1578ef02b6a5187e132c6c33f99d596/mmh3-5.2.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f40a95186a72fa0b67d15fef0f157bfcda00b4f59c8a07cbe5530d41ac35d105", size = 106484, upload-time = "2026-03-05T15:55:28.214Z" }, - { url = "https://files.pythonhosted.org/packages/d3/8f/c6e204a2c70b719c1f62ffd9da27aef2dddcba875ea9c31ca0e87b975a46/mmh3-5.2.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:58370d05d033ee97224c81263af123dea3d931025030fd34b61227a768a8858a", size = 110012, upload-time = "2026-03-05T15:55:29.532Z" }, - { url = "https://files.pythonhosted.org/packages/e3/37/7181efd8e39db386c1ebc3e6b7d1f702a09d7c1197a6f2742ed6b5c16597/mmh3-5.2.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7be6dfb49e48fd0a7d91ff758a2b51336f1cd21f9d44b20f6801f072bd080cdd", size = 97508, upload-time = "2026-03-05T15:55:31.01Z" }, - { url = "https://files.pythonhosted.org/packages/42/0f/afa7ca2615fd85e1469474bb860e381443d0b868c083b62b41cb1d7ca32f/mmh3-5.2.1-cp314-cp314-win32.whl", hash = "sha256:54fe8518abe06a4c3852754bfd498b30cc58e667f376c513eac89a244ce781a4", size = 41387, upload-time = "2026-03-05T15:55:32.403Z" }, - { url = "https://files.pythonhosted.org/packages/71/0d/46d42a260ee1357db3d486e6c7a692e303c017968e14865e00efa10d09fc/mmh3-5.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:3f796b535008708846044c43302719c6956f39ca2d93f2edda5319e79a29efbb", size = 42101, upload-time = "2026-03-05T15:55:33.646Z" }, - { url = "https://files.pythonhosted.org/packages/a4/7b/848a8378059d96501a41159fca90d6a99e89736b0afbe8e8edffeac8c74b/mmh3-5.2.1-cp314-cp314-win_arm64.whl", hash = "sha256:cd471ede0d802dd936b6fab28188302b2d497f68436025857ca72cd3810423fe", size = 39836, upload-time = "2026-03-05T15:55:35.026Z" }, - { url = "https://files.pythonhosted.org/packages/27/61/1dabea76c011ba8547c25d30c91c0ec22544487a8750997a27a0c9e1180b/mmh3-5.2.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:5174a697ce042fa77c407e05efe41e03aa56dae9ec67388055820fb48cf4c3ba", size = 57727, upload-time = "2026-03-05T15:55:36.162Z" }, - { url = "https://files.pythonhosted.org/packages/b7/32/731185950d1cf2d5e28979cc8593016ba1619a295faba10dda664a4931b5/mmh3-5.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:0a3984146e414684a6be2862d84fcb1035f4984851cb81b26d933bab6119bf00", size = 41308, upload-time = "2026-03-05T15:55:37.254Z" }, - { url = "https://files.pythonhosted.org/packages/76/aa/66c76801c24b8c9418b4edde9b5e57c75e72c94e29c48f707e3962534f18/mmh3-5.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:bd6e7d363aa93bd3421b30b6af97064daf47bc96005bddba67c5ffbc6df426b8", size = 40758, upload-time = "2026-03-05T15:55:38.61Z" }, - { url = "https://files.pythonhosted.org/packages/9e/bb/79a1f638a02f0ae389f706d13891e2fbf7d8c0a22ecde67ba828951bb60a/mmh3-5.2.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:113f78e7463a36dbbcea05bfe688efd7fa759d0f0c56e73c974d60dcfec3dfcc", size = 109670, upload-time = "2026-03-05T15:55:40.13Z" }, - { url = "https://files.pythonhosted.org/packages/26/94/8cd0e187a288985bcfc79bf5144d1d712df9dee74365f59d26e3a1865be6/mmh3-5.2.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e8ec5f606e0809426d2440e0683509fb605a8820a21ebd120dcdba61b74ef7f", size = 117399, upload-time = "2026-03-05T15:55:42.076Z" }, - { url = "https://files.pythonhosted.org/packages/42/94/dfea6059bd5c5beda565f58a4096e43f4858fb6d2862806b8bbd12cbb284/mmh3-5.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22b0f9971ec4e07e8223f2beebe96a6cfc779d940b6f27d26604040dd74d3a44", size = 120386, upload-time = "2026-03-05T15:55:43.481Z" }, - { url = "https://files.pythonhosted.org/packages/47/cb/f9c45e62aaa67220179f487772461d891bb582bb2f9783c944832c60efd9/mmh3-5.2.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:85ffc9920ffc39c5eee1e3ac9100c913a0973996fbad5111f939bbda49204bb7", size = 125924, upload-time = "2026-03-05T15:55:44.638Z" }, - { url = "https://files.pythonhosted.org/packages/a5/83/fe54a4a7c11bc9f623dfc1707decd034245602b076dfc1dcc771a4163170/mmh3-5.2.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7aec798c2b01aaa65a55f1124f3405804184373abb318a3091325aece235f67c", size = 135280, upload-time = "2026-03-05T15:55:45.866Z" }, - { url = "https://files.pythonhosted.org/packages/97/67/fe7e9e9c143daddd210cd22aef89cbc425d58ecf238d2b7d9eb0da974105/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:55dbbd8ffbc40d1697d5e2d0375b08599dae8746b0b08dea05eee4ce81648fac", size = 110050, upload-time = "2026-03-05T15:55:47.074Z" }, - { url = "https://files.pythonhosted.org/packages/43/c4/6d4b09fcbef80794de447c9378e39eefc047156b290fa3dd2d5257ca8227/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6c85c38a279ca9295a69b9b088a2e48aa49737bb1b34e6a9dc6297c110e8d912", size = 111158, upload-time = "2026-03-05T15:55:48.239Z" }, - { url = "https://files.pythonhosted.org/packages/81/a6/ca51c864bdb30524beb055a6d8826db3906af0834ec8c41d097a6e8573d5/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:6290289fa5fb4c70fd7f72016e03633d60388185483ff3b162912c81205ae2cf", size = 116890, upload-time = "2026-03-05T15:55:49.405Z" }, - { url = "https://files.pythonhosted.org/packages/cc/04/5a1fe2e2ad843d03e89af25238cbc4f6840a8bb6c4329a98ab694c71deda/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:4fc6cd65dc4d2fdb2625e288939a3566e36127a84811a4913f02f3d5931da52d", size = 123121, upload-time = "2026-03-05T15:55:50.61Z" }, - { url = "https://files.pythonhosted.org/packages/af/4d/3c820c6f4897afd25905270a9f2330a23f77a207ea7356f7aadace7273c0/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:623f938f6a039536cc02b7582a07a080f13fdfd48f87e63201d92d7e34d09a18", size = 110187, upload-time = "2026-03-05T15:55:52.143Z" }, - { url = "https://files.pythonhosted.org/packages/21/54/1d71cd143752361c0aebef16ad3f55926a6faf7b112d355745c1f8a25f7f/mmh3-5.2.1-cp314-cp314t-win32.whl", hash = "sha256:29bc3973676ae334412efdd367fcd11d036b7be3efc1ce2407ef8676dabfeb82", size = 41934, upload-time = "2026-03-05T15:55:53.564Z" }, - { url = "https://files.pythonhosted.org/packages/9d/e4/63a2a88f31d93dea03947cccc2a076946857e799ea4f7acdecbf43b324aa/mmh3-5.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:28cfab66577000b9505a0d068c731aee7ca85cd26d4d63881fab17857e0fe1fb", size = 43036, upload-time = "2026-03-05T15:55:55.252Z" }, - { url = "https://files.pythonhosted.org/packages/a0/0f/59204bf136d1201f8d7884cfbaf7498c5b4674e87a4c693f9bde63741ce1/mmh3-5.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:dfd51b4c56b673dfbc43d7d27ef857dd91124801e2806c69bb45585ce0fa019b", size = 40391, upload-time = "2026-03-05T15:55:56.697Z" }, -] - [[package]] name = "more-itertools" version = "11.1.0" @@ -3451,130 +2652,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a8/64/3708a90d1ebe202ffdeb7185f878a3c84d15c2b2c31858da2ce0583e2def/nvidia_nvtx-13.0.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb7780edb6b14107373c835bf8b72e7a178bac7367e23da7acb108f973f157a6", size = 148878, upload-time = "2025-09-04T08:28:53.627Z" }, ] -[[package]] -name = "oauthlib" -version = "3.3.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, -] - -[[package]] -name = "onnxruntime" -version = "1.24.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.11'", -] -dependencies = [ - { name = "flatbuffers", marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "packaging", marker = "python_full_version < '3.11'" }, - { name = "protobuf", marker = "python_full_version < '3.11'" }, - { name = "sympy", marker = "python_full_version < '3.11'" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/15/41/3253db975a90c3ce1d475e2a230773a21cd7998537f0657947df6fb79861/onnxruntime-1.24.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3e6456801c66b095c5cd68e690ca25db970ea5202bd0c5b84a2c3ef7731c5a3c", size = 17332766, upload-time = "2026-03-05T17:18:59.714Z" }, - { url = "https://files.pythonhosted.org/packages/7e/c5/3af6b325f1492d691b23844d88ed26844c1164620860c5efe95c0e22782d/onnxruntime-1.24.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b2ebc54c6d8281dccff78d4b06e47d4cf07535937584ab759448390a70f4978", size = 15130330, upload-time = "2026-03-05T16:34:53.831Z" }, - { url = "https://files.pythonhosted.org/packages/03/4b/f96b46c1866a293ed23ca2cf5e5a63d413ad3a951da60dd877e3c56cbbca/onnxruntime-1.24.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb56575d7794bf0781156955610c9e651c9504c64d42ec880784b6106244882d", size = 17213247, upload-time = "2026-03-05T17:17:59.812Z" }, - { url = "https://files.pythonhosted.org/packages/36/13/27cf4d8df2578747584e8758aeb0b673b60274048510257f1f084b15e80e/onnxruntime-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:c958222ef9eff54018332beecd32d5d94a3ab079d8821937b333811bf4da0d39", size = 12595530, upload-time = "2026-03-05T17:18:49.356Z" }, - { url = "https://files.pythonhosted.org/packages/19/8c/6d9f31e6bae72a8079be12ed8ba36c4126a571fad38ded0a1b96f60f6896/onnxruntime-1.24.3-cp311-cp311-win_arm64.whl", hash = "sha256:a8f761857ebaf58a85b9e42422d03207f1d39e6bb8fecfdbf613bac5b9710723", size = 12261715, upload-time = "2026-03-05T17:18:39.699Z" }, - { url = "https://files.pythonhosted.org/packages/d0/7f/dfdc4e52600fde4c02d59bfe98c4b057931c1114b701e175aee311a9bc11/onnxruntime-1.24.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:0d244227dc5e00a9ae15a7ac1eba4c4460d7876dfecafe73fb00db9f1d914d91", size = 17342578, upload-time = "2026-03-05T17:19:02.403Z" }, - { url = "https://files.pythonhosted.org/packages/1c/dc/1f5489f7b21817d4ad352bf7a92a252bd5b438bcbaa7ad20ea50814edc79/onnxruntime-1.24.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a9847b870b6cb462652b547bc98c49e0efb67553410a082fde1918a38707452", size = 15150105, upload-time = "2026-03-05T16:34:56.897Z" }, - { url = "https://files.pythonhosted.org/packages/28/7c/fd253da53594ab8efbefdc85b3638620ab1a6aab6eb7028a513c853559ce/onnxruntime-1.24.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b354afce3333f2859c7e8706d84b6c552beac39233bcd3141ce7ab77b4cabb5d", size = 17237101, upload-time = "2026-03-05T17:18:02.561Z" }, - { url = "https://files.pythonhosted.org/packages/71/5f/eaabc5699eeed6a9188c5c055ac1948ae50138697a0428d562ac970d7db5/onnxruntime-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:44ea708c34965439170d811267c51281d3897ecfc4aa0087fa25d4a4c3eb2e4a", size = 12597638, upload-time = "2026-03-05T17:18:52.141Z" }, - { url = "https://files.pythonhosted.org/packages/cc/5c/d8066c320b90610dbeb489a483b132c3b3879b2f93f949fb5d30cfa9b119/onnxruntime-1.24.3-cp312-cp312-win_arm64.whl", hash = "sha256:48d1092b44ca2ba6f9543892e7c422c15a568481403c10440945685faf27a8d8", size = 12270943, upload-time = "2026-03-05T17:18:42.006Z" }, - { url = "https://files.pythonhosted.org/packages/51/8d/487ece554119e2991242d4de55de7019ac6e47ee8dfafa69fcf41d37f8ed/onnxruntime-1.24.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:34a0ea5ff191d8420d9c1332355644148b1bf1a0d10c411af890a63a9f662aa7", size = 17342706, upload-time = "2026-03-05T16:35:10.813Z" }, - { url = "https://files.pythonhosted.org/packages/dd/25/8b444f463c1ac6106b889f6235c84f01eec001eaf689c3eff8c69cf48fae/onnxruntime-1.24.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fd2ec7bb0fabe42f55e8337cfc9b1969d0d14622711aac73d69b4bd5abb5ed7", size = 15149956, upload-time = "2026-03-05T16:34:59.264Z" }, - { url = "https://files.pythonhosted.org/packages/34/fc/c9182a3e1ab46940dd4f30e61071f59eee8804c1f641f37ce6e173633fb6/onnxruntime-1.24.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df8e70e732fe26346faaeec9147fa38bef35d232d2495d27e93dd221a2d473a9", size = 17237370, upload-time = "2026-03-05T17:18:05.258Z" }, - { url = "https://files.pythonhosted.org/packages/05/7e/3b549e1f4538514118bff98a1bcd6481dd9a17067f8c9af77151621c9a5c/onnxruntime-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:2d3706719be6ad41d38a2250998b1d87758a20f6ea4546962e21dc79f1f1fd2b", size = 12597939, upload-time = "2026-03-05T17:18:54.772Z" }, - { url = "https://files.pythonhosted.org/packages/80/41/9696a5c4631a0caa75cc8bc4efd30938fd483694aa614898d087c3ee6d29/onnxruntime-1.24.3-cp313-cp313-win_arm64.whl", hash = "sha256:b082f3ba9519f0a1a1e754556bc7e635c7526ef81b98b3f78da4455d25f0437b", size = 12270705, upload-time = "2026-03-05T17:18:44.774Z" }, - { url = "https://files.pythonhosted.org/packages/b7/65/a26c5e59e3b210852ee04248cf8843c81fe7d40d94cf95343b66efe7eec9/onnxruntime-1.24.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72f956634bc2e4bd2e8b006bef111849bd42c42dea37bd0a4c728404fdaf4d34", size = 15161796, upload-time = "2026-03-05T16:35:02.871Z" }, - { url = "https://files.pythonhosted.org/packages/f3/25/2035b4aa2ccb5be6acf139397731ec507c5f09e199ab39d3262b22ffa1ac/onnxruntime-1.24.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:78d1f25eed4ab9959db70a626ed50ee24cf497e60774f59f1207ac8556399c4d", size = 17240936, upload-time = "2026-03-05T17:18:09.534Z" }, - { url = "https://files.pythonhosted.org/packages/f9/a4/b3240ea84b92a3efb83d49cc16c04a17ade1ab47a6a95c4866d15bf0ac35/onnxruntime-1.24.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:a6b4bce87d96f78f0a9bf5cefab3303ae95d558c5bfea53d0bf7f9ea207880a8", size = 17344149, upload-time = "2026-03-05T16:35:13.382Z" }, - { url = "https://files.pythonhosted.org/packages/bb/4a/4b56757e51a56265e8c56764d9c36d7b435045e05e3b8a38bedfc5aedba3/onnxruntime-1.24.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d48f36c87b25ab3b2b4c88826c96cf1399a5631e3c2c03cc27d6a1e5d6b18eb4", size = 15151571, upload-time = "2026-03-05T16:35:05.679Z" }, - { url = "https://files.pythonhosted.org/packages/cf/14/c6fb84980cec8f682a523fcac7c2bdd6b311e7f342c61ce48d3a9cb87fc6/onnxruntime-1.24.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e104d33a409bf6e3f30f0e8198ec2aaf8d445b8395490a80f6e6ad56da98e400", size = 17238951, upload-time = "2026-03-05T17:18:12.394Z" }, - { url = "https://files.pythonhosted.org/packages/57/14/447e1400165aca8caf35dabd46540eb943c92f3065927bb4d9bcbc91e221/onnxruntime-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:e785d73fbd17421c2513b0bb09eb25d88fa22c8c10c3f5d6060589efa5537c5b", size = 12903820, upload-time = "2026-03-05T17:18:57.123Z" }, - { url = "https://files.pythonhosted.org/packages/1d/ec/6b2fa5702e4bbba7339ca5787a9d056fc564a16079f8833cc6ba4798da1c/onnxruntime-1.24.3-cp314-cp314-win_arm64.whl", hash = "sha256:951e897a275f897a05ffbcaa615d98777882decaeb80c9216c68cdc62f849f53", size = 12594089, upload-time = "2026-03-05T17:18:47.169Z" }, - { url = "https://files.pythonhosted.org/packages/12/dc/cd06cba3ddad92ceb17b914a8e8d49836c79e38936e26bde6e368b62c1fe/onnxruntime-1.24.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4d4e70ce578aa214c74c7a7a9226bc8e229814db4a5b2d097333b81279ecde36", size = 15162789, upload-time = "2026-03-05T16:35:08.282Z" }, - { url = "https://files.pythonhosted.org/packages/a6/d6/413e98ab666c6fb9e8be7d1c6eb3bd403b0bea1b8d42db066dab98c7df07/onnxruntime-1.24.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02aaf6ddfa784523b6873b4176a79d508e599efe12ab0ea1a3a6e7314408b7aa", size = 17240738, upload-time = "2026-03-05T17:18:15.203Z" }, -] - -[[package]] -name = "onnxruntime" -version = "1.26.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.15'", - "python_full_version == '3.14.*'", - "python_full_version == '3.13.*'", - "python_full_version >= '3.11' and python_full_version < '3.13'", -] -dependencies = [ - { name = "flatbuffers", marker = "python_full_version >= '3.11'" }, - { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "packaging", marker = "python_full_version >= '3.11'" }, - { name = "protobuf", marker = "python_full_version >= '3.11'" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/81/29a9eb470994a75eb7b3ccf32be314d7c66675a00ac7b50294816cc2db27/onnxruntime-1.26.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ee1109ef4ef27cad90e823399e61e03b3c6c7bfe0fb820b4baf3678c15be8b3c", size = 18005108, upload-time = "2026-05-08T19:08:11.728Z" }, - { url = "https://files.pythonhosted.org/packages/66/c7/73efa6c8a4000c38fcc14947d84f234a17e5d66f203b37b7f1ad4a7b46eb/onnxruntime-1.26.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35c7c7b0ac2e02001d28fab6c9fc24e9abc5e6faa35e6e19c63cecf1406ba89f", size = 16043752, upload-time = "2026-05-08T19:07:10.707Z" }, - { url = "https://files.pythonhosted.org/packages/b6/3f/8de630f595daf6ce884d4dd95afd2a60e70ec6572e52bfee3aa2229befab/onnxruntime-1.26.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11a8df4dcfe9ad5ff0bd71a7571dbed019fabc7594676c89fe8b86ea029c246f", size = 18176043, upload-time = "2026-05-08T19:07:33.735Z" }, - { url = "https://files.pythonhosted.org/packages/9c/21/9f041de20787cd85498bd48e0ec4d098bf2a6c486e25b24b8dae1bf492b2/onnxruntime-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:e6456718125fd777c673f3b78d4a9ab58d6adea641e9afae85ee6444f0e0e9a9", size = 13023165, upload-time = "2026-05-08T19:08:00.633Z" }, - { url = "https://files.pythonhosted.org/packages/0e/82/3b9fe0ead2557cc3adf74c74c141bd1c7c4c6a9548c610af37df199f4512/onnxruntime-1.26.0-cp311-cp311-win_arm64.whl", hash = "sha256:cd920e45b730e4a87833e2910d8ca375aaca9da6ccc09e24bce463b3356d637f", size = 12789514, upload-time = "2026-05-08T19:07:49.433Z" }, - { url = "https://files.pythonhosted.org/packages/81/b1/d111b1df656761f980d9e298a60039a9cb66036b1d039e777537743d0ac3/onnxruntime-1.26.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05b028781b322ad74b57ce5b50aa5280bb1fe96ceec334628ade681e0b24c1ac", size = 18016624, upload-time = "2026-05-12T00:41:01.735Z" }, - { url = "https://files.pythonhosted.org/packages/f6/a0/3f9d896a0385a36bd04345d6d0b802821a5782adde562e7e135f6bb71c73/onnxruntime-1.26.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91f2bb870a4b9224eba0a6728c1fa7a9e552b8e59e1083c51fbbc3d013f2b5c0", size = 16052692, upload-time = "2026-05-08T19:07:13.829Z" }, - { url = "https://files.pythonhosted.org/packages/7c/43/2a4e04f8dbeffad19bbcced4bcd4289bf478921518437404d6b92bdf213b/onnxruntime-1.26.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b6dd70599005bd1bf29779f04a91978b92b5e719c11a20068a8f8e535f725b6", size = 18185439, upload-time = "2026-05-08T19:07:36.299Z" }, - { url = "https://files.pythonhosted.org/packages/44/fc/026d0a7162b9c2153dac292baea9e027c42304dc1d9dc6f8ff5b4cfbaedd/onnxruntime-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:a26374dc7fbcaae593601086b242120e13f2310558df0991da6dd8b8fac00414", size = 13026427, upload-time = "2026-05-08T19:08:03.503Z" }, - { url = "https://files.pythonhosted.org/packages/3e/27/1dcf88e45e4c69db5f7b106f2dacc3801ba98994e082ca03e1dfdf7bfe57/onnxruntime-1.26.0-cp312-cp312-win_arm64.whl", hash = "sha256:54a8053410fd31fd66469bd754fcfe8a4df9f7eb44756b4b5479bf50c842d948", size = 12796647, upload-time = "2026-05-08T19:07:52.108Z" }, - { url = "https://files.pythonhosted.org/packages/cf/a2/c801242685e0ce48a4ca51dfafbb588765e0446397e123be53ba5598f3f5/onnxruntime-1.26.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccce19c5f771b8268902f77d9fed9e88f9499465d6780808faa6611a789d33f0", size = 18016563, upload-time = "2026-05-08T19:07:28.081Z" }, - { url = "https://files.pythonhosted.org/packages/e2/64/0492c0b1db04e29b2630c87cfa36f9d6872b1ca8614b90c5cad58fac7d76/onnxruntime-1.26.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdbed8cf3b672b66acb032f33a253bc27f42bce6ece48ae3fab4fa483a5e96e0", size = 16052634, upload-time = "2026-05-08T19:07:16.885Z" }, - { url = "https://files.pythonhosted.org/packages/3d/26/4d09ddc755a84fc8d5e192991626b0e0680e8f6c5d58f4f1d05c42bc48cf/onnxruntime-1.26.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c07af6fc6d5557835f2b6ee7a96d8b3235d0c57a8e230efdedaee106a8a3cbc6", size = 18185632, upload-time = "2026-05-08T19:07:38.756Z" }, - { url = "https://files.pythonhosted.org/packages/77/89/3e52249aa08fa301e217ecba07b5246a8338fa2b401e109326e3fc5be0f9/onnxruntime-1.26.0-cp313-cp313-win_amd64.whl", hash = "sha256:61bec80655efa460591c2bc655392d57d2650ce85533a6b9b3b7a790d7ea7916", size = 13026751, upload-time = "2026-05-08T19:08:06.2Z" }, - { url = "https://files.pythonhosted.org/packages/06/b3/c1c8782b14af6797c303de132d6eef26a9fb80dfacd3750ce57911d11c6b/onnxruntime-1.26.0-cp313-cp313-win_arm64.whl", hash = "sha256:a6677545ff451e3539a02746d2f207d8c5baa4a0a818886bb9d6a6eb9511ee89", size = 12796807, upload-time = "2026-05-08T19:07:54.879Z" }, - { url = "https://files.pythonhosted.org/packages/c3/f5/47b0676408abec652c14b84d7173e389837832d850c24f87184277313e8d/onnxruntime-1.26.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e016edc15d3c19f36807e1c6b10be5b27807688c32720f91b5ae480a95215d0", size = 16057265, upload-time = "2026-05-08T19:07:19.603Z" }, - { url = "https://files.pythonhosted.org/packages/3b/45/33ab6deeef010ca844c877dd618cebc079590bbe52d2a3678e7223b1b908/onnxruntime-1.26.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f5fc48a91a046a6a5c9b147f83fb41d65d24d24923373b222cdd248f0f4f4aac", size = 18197590, upload-time = "2026-05-08T19:07:41.422Z" }, - { url = "https://files.pythonhosted.org/packages/40/89/17546c1c20f6bfc3ae41c22152378a26edfea918af3129e2139dcd7c99f3/onnxruntime-1.26.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:33a791f31432a3af1a96db5e54818b37aba5e5eefc2e6af5794c10a9118a9993", size = 18019724, upload-time = "2026-05-08T19:07:30.723Z" }, - { url = "https://files.pythonhosted.org/packages/bb/24/89457a35f6af29538a76647f2c18c3a28277e6c19234c847e7b4b7c19860/onnxruntime-1.26.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e90c00732c4553618103149d93f688e8c3063017938f8983e21a71d9f3b6d22e", size = 16054821, upload-time = "2026-05-08T19:07:22.348Z" }, - { url = "https://files.pythonhosted.org/packages/12/f9/15b2e1815cf570d238e0135529f80d2dce64e8e8818a1489cae83823c5c6/onnxruntime-1.26.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01498e80ba8988428d08c2d51b1338f89e3de2a93e6ffe555f79c68f26a5c06b", size = 18185815, upload-time = "2026-05-08T19:07:44.179Z" }, - { url = "https://files.pythonhosted.org/packages/d7/65/2e11055faf015e4b07f45b513fa49b391baf2e19d92d77d73ebee13c1004/onnxruntime-1.26.0-cp314-cp314-win_amd64.whl", hash = "sha256:7ead61450d8405167c87dd3a31d8da1d576b490a57dab1aa8b82a7da6825f5aa", size = 13349887, upload-time = "2026-05-08T19:08:08.671Z" }, - { url = "https://files.pythonhosted.org/packages/19/e4/0f9d1a5718b1781c610c1e354765a3820597081754277a6a9a2b50705702/onnxruntime-1.26.0-cp314-cp314-win_arm64.whl", hash = "sha256:31d71a53490e46910877d0902b5ad99c69a5955e5c7ea6c82863519410e1ba7c", size = 13140121, upload-time = "2026-05-08T19:07:57.804Z" }, - { url = "https://files.pythonhosted.org/packages/1c/42/3b8e635f067d06d9f45bede470b8d539d101a4166c272213158dfd08b6ce/onnxruntime-1.26.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b6d258fb78fdfcf049795bcfaa74dcb90ae7baa277afd21e6fd28b83f2c496", size = 16057240, upload-time = "2026-05-08T19:07:25.163Z" }, - { url = "https://files.pythonhosted.org/packages/93/99/f2be40a31b908d96b861ae0ce98582fa376c18a7f816b9d5eb4cd6aa0a4c/onnxruntime-1.26.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4eefd386a45202aefb7a5132b94f32df9d506c9edcc7faf2fc60d65183f4b183", size = 18197382, upload-time = "2026-05-08T19:07:46.965Z" }, -] - -[[package]] -name = "openai" -version = "2.41.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "distro" }, - { name = "httpx" }, - { name = "jiter" }, - { name = "pydantic" }, - { name = "sniffio" }, - { name = "tqdm" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3c/a6/5815fe2e2aca74b36c650d1bd43b69827cee568073d0d2d9b6fc5aaac80c/openai-2.41.0.tar.gz", hash = "sha256:db5c362acd6604b84f076abbefa66826ea4b46ecba2954ed866e6a149a1352c0", size = 783525, upload-time = "2026-06-03T22:39:40.719Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/be/51/d82bb424e8aa372190c5233253a2ceb399a778747d18b42cff487411e663/openai-2.41.0-py3-none-any.whl", hash = "sha256:20cc7952e8501c7e5773dd2ef7be437bae9cb549044902e1041a83a54516e375", size = 1353378, upload-time = "2026-06-03T22:39:38.964Z" }, -] - -[[package]] -name = "openpyxl" -version = "3.1.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "et-xmlfile" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, -] - [[package]] name = "opentelemetry-api" version = "1.34.1" @@ -3588,54 +2665,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a5/3a/2ba85557e8dc024c0842ad22c570418dc02c36cbd1ab4b832a93edf071b8/opentelemetry_api-1.34.1-py3-none-any.whl", hash = "sha256:b7df4cb0830d5a6c29ad0c0691dbae874d8daefa934b8b1d642de48323d32a8c", size = 65767, upload-time = "2025-06-10T08:54:56.717Z" }, ] -[[package]] -name = "opentelemetry-exporter-otlp-proto-common" -version = "1.34.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-proto" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/86/f0/ff235936ee40db93360233b62da932d4fd9e8d103cd090c6bcb9afaf5f01/opentelemetry_exporter_otlp_proto_common-1.34.1.tar.gz", hash = "sha256:b59a20a927facd5eac06edaf87a07e49f9e4a13db487b7d8a52b37cb87710f8b", size = 20817, upload-time = "2025-06-10T08:55:22.55Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/72/e8/8b292a11cc8d8d87ec0c4089ae21b6a58af49ca2e51fa916435bc922fdc7/opentelemetry_exporter_otlp_proto_common-1.34.1-py3-none-any.whl", hash = "sha256:8e2019284bf24d3deebbb6c59c71e6eef3307cd88eff8c633e061abba33f7e87", size = 18834, upload-time = "2025-06-10T08:55:00.806Z" }, -] - -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.34.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "googleapis-common-protos" }, - { name = "grpcio" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-common" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/41/f7/bb63837a3edb9ca857aaf5760796874e7cecddc88a2571b0992865a48fb6/opentelemetry_exporter_otlp_proto_grpc-1.34.1.tar.gz", hash = "sha256:7c841b90caa3aafcfc4fee58487a6c71743c34c6dc1787089d8b0578bbd794dd", size = 22566, upload-time = "2025-06-10T08:55:23.214Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/42/0a4dd47e7ef54edf670c81fc06a83d68ea42727b82126a1df9dd0477695d/opentelemetry_exporter_otlp_proto_grpc-1.34.1-py3-none-any.whl", hash = "sha256:04bb8b732b02295be79f8a86a4ad28fae3d4ddb07307a98c7aa6f331de18cca6", size = 18615, upload-time = "2025-06-10T08:55:02.214Z" }, -] - -[[package]] -name = "opentelemetry-exporter-otlp-proto-http" -version = "1.34.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "googleapis-common-protos" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-common" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "requests" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/19/8f/954bc725961cbe425a749d55c0ba1df46832a5999eae764d1a7349ac1c29/opentelemetry_exporter_otlp_proto_http-1.34.1.tar.gz", hash = "sha256:aaac36fdce46a8191e604dcf632e1f9380c7d5b356b27b3e0edb5610d9be28ad", size = 15351, upload-time = "2025-06-10T08:55:24.657Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/54/b05251c04e30c1ac70cf4a7c5653c085dfcf2c8b98af71661d6a252adc39/opentelemetry_exporter_otlp_proto_http-1.34.1-py3-none-any.whl", hash = "sha256:5251f00ca85872ce50d871f6d3cc89fe203b94c3c14c964bbdc3883366c705d8", size = 17744, upload-time = "2025-06-10T08:55:03.802Z" }, -] - [[package]] name = "opentelemetry-instrumentation" version = "0.55b1" @@ -3665,18 +2694,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/c9/183ad41a7ba0374030b3eab335ec6f3eff6acca057aba2b393183e18639e/opentelemetry_instrumentation_threading-0.55b1-py3-none-any.whl", hash = "sha256:f865542b32b219c8fd01deb03b8c3c9ba2eb3f0501ae303338403fd2242962c7", size = 9313, upload-time = "2025-06-10T08:58:02.884Z" }, ] -[[package]] -name = "opentelemetry-proto" -version = "1.34.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "protobuf" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/66/b3/c3158dd012463bb7c0eb7304a85a6f63baeeb5b4c93a53845cf89f848c7e/opentelemetry_proto-1.34.1.tar.gz", hash = "sha256:16286214e405c211fc774187f3e4bbb1351290b8dfb88e8948af209ce85b719e", size = 34344, upload-time = "2025-06-10T08:55:32.25Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/28/ab/4591bfa54e946350ce8b3f28e5c658fe9785e7cd11e9c11b1671a867822b/opentelemetry_proto-1.34.1-py3-none-any.whl", hash = "sha256:eb4bb5ac27f2562df2d6857fc557b3a481b5e298bc04f94cc68041f00cebcbd2", size = 55692, upload-time = "2025-06-10T08:55:14.904Z" }, -] - [[package]] name = "opentelemetry-sdk" version = "1.34.1" @@ -3785,15 +2802,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/21/5a3f1e8913103b703a436a5664238e5b965ec392b555fe68943ea3691e6b/orjson-3.11.9-cp314-cp314-win_arm64.whl", hash = "sha256:eebdbdeef0094e4f5aefa20dcd4eb2368ab5e7a3b4edea27f1e7b2892e009cf9", size = 126687, upload-time = "2026-05-06T15:11:06.602Z" }, ] -[[package]] -name = "overrides" -version = "7.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812, upload-time = "2024-01-27T21:01:33.423Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832, upload-time = "2024-01-27T21:01:31.393Z" }, -] - [[package]] name = "packaging" version = "26.2" @@ -3812,140 +2820,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/d9/7fb5aa316bc299258e68c73ba3bddbc499654a07f151cba08f6153988714/pathspec-1.1.1-py3-none-any.whl", hash = "sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189", size = 57328, upload-time = "2026-04-27T01:46:07.06Z" }, ] -[[package]] -name = "pdfminer-six" -version = "20251230" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "charset-normalizer" }, - { name = "cryptography" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/46/9a/d79d8fa6d47a0338846bb558b39b9963b8eb2dfedec61867c138c1b17eeb/pdfminer_six-20251230.tar.gz", hash = "sha256:e8f68a14c57e00c2d7276d26519ea64be1b48f91db1cdc776faa80528ca06c1e", size = 8511285, upload-time = "2025-12-30T15:49:13.104Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/65/d7/b288ea32deb752a09aab73c75e1e7572ab2a2b56c3124a5d1eb24c62ceb3/pdfminer_six-20251230-py3-none-any.whl", hash = "sha256:9ff2e3466a7dfc6de6fd779478850b6b7c2d9e9405aa2a5869376a822771f485", size = 6591909, upload-time = "2025-12-30T15:49:10.76Z" }, -] - -[[package]] -name = "pdfplumber" -version = "0.11.9" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pdfminer-six" }, - { name = "pillow" }, - { name = "pypdfium2" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/38/37/9ca3519e92a8434eb93be570b131476cc0a4e840bb39c62ddb7813a39d53/pdfplumber-0.11.9.tar.gz", hash = "sha256:481224b678b2bbdbf376e2c39bf914144eef7c3d301b4a28eebf0f7f6109d6dc", size = 102768, upload-time = "2026-01-05T08:10:29.072Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/c8/cdbc975f5b634e249cfa6597e37c50f3078412474f21c015e508bfbfe3c3/pdfplumber-0.11.9-py3-none-any.whl", hash = "sha256:33ec5580959ba524e9100138746e090879504c42955df1b8a997604dd326c443", size = 60045, upload-time = "2026-01-05T08:10:27.512Z" }, -] - -[[package]] -name = "pillow" -version = "12.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8c/21/c2bcdd5906101a30244eaffc1b6e6ce71a31bd0742a01eb89e660ebfac2d/pillow-12.2.0.tar.gz", hash = "sha256:a830b1a40919539d07806aa58e1b114df53ddd43213d9c8b75847eee6c0182b5", size = 46987819, upload-time = "2026-04-01T14:46:17.687Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/aa/d0b28e1c811cd4d5f5c2bfe2e022292bd255ae5744a3b9ac7d6c8f72dd75/pillow-12.2.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:a4e8f36e677d3336f35089648c8955c51c6d386a13cf6ee9c189c5f5bd713a9f", size = 5354355, upload-time = "2026-04-01T14:42:15.402Z" }, - { url = "https://files.pythonhosted.org/packages/27/8e/1d5b39b8ae2bd7650d0c7b6abb9602d16043ead9ebbfef4bc4047454da2a/pillow-12.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e589959f10d9824d39b350472b92f0ce3b443c0a3442ebf41c40cb8361c5b97", size = 4695871, upload-time = "2026-04-01T14:42:18.234Z" }, - { url = "https://files.pythonhosted.org/packages/f0/c5/dcb7a6ca6b7d3be41a76958e90018d56c8462166b3ef223150360850c8da/pillow-12.2.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a52edc8bfff4429aaabdf4d9ee0daadbbf8562364f940937b941f87a4290f5ff", size = 6269734, upload-time = "2026-04-01T14:42:20.608Z" }, - { url = "https://files.pythonhosted.org/packages/ea/f1/aa1bb13b2f4eba914e9637893c73f2af8e48d7d4023b9d3750d4c5eb2d0c/pillow-12.2.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:975385f4776fafde056abb318f612ef6285b10a1f12b8570f3647ad0d74b48ec", size = 8076080, upload-time = "2026-04-01T14:42:23.095Z" }, - { url = "https://files.pythonhosted.org/packages/a1/2a/8c79d6a53169937784604a8ae8d77e45888c41537f7f6f65ed1f407fe66d/pillow-12.2.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd9c0c7a0c681a347b3194c500cb1e6ca9cab053ea4d82a5cf45b6b754560136", size = 6382236, upload-time = "2026-04-01T14:42:25.82Z" }, - { url = "https://files.pythonhosted.org/packages/b5/42/bbcb6051030e1e421d103ce7a8ecadf837aa2f39b8f82ef1a8d37c3d4ebc/pillow-12.2.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:88d387ff40b3ff7c274947ed3125dedf5262ec6919d83946753b5f3d7c67ea4c", size = 7070220, upload-time = "2026-04-01T14:42:28.68Z" }, - { url = "https://files.pythonhosted.org/packages/3f/e1/c2a7d6dd8cfa6b231227da096fd2d58754bab3603b9d73bf609d3c18b64f/pillow-12.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:51c4167c34b0d8ba05b547a3bb23578d0ba17b80a5593f93bd8ecb123dd336a3", size = 6493124, upload-time = "2026-04-01T14:42:31.579Z" }, - { url = "https://files.pythonhosted.org/packages/5f/41/7c8617da5d32e1d2f026e509484fdb6f3ad7efaef1749a0c1928adbb099e/pillow-12.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:34c0d99ecccea270c04882cb3b86e7b57296079c9a4aff88cb3b33563d95afaa", size = 7194324, upload-time = "2026-04-01T14:42:34.615Z" }, - { url = "https://files.pythonhosted.org/packages/2d/de/a777627e19fd6d62f84070ee1521adde5eeda4855b5cf60fe0b149118bca/pillow-12.2.0-cp310-cp310-win32.whl", hash = "sha256:b85f66ae9eb53e860a873b858b789217ba505e5e405a24b85c0464822fe88032", size = 6376363, upload-time = "2026-04-01T14:42:37.19Z" }, - { url = "https://files.pythonhosted.org/packages/e7/34/fc4cb5204896465842767b96d250c08410f01f2f28afc43b257de842eed5/pillow-12.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:673aa32138f3e7531ccdbca7b3901dba9b70940a19ccecc6a37c77d5fdeb05b5", size = 7083523, upload-time = "2026-04-01T14:42:39.62Z" }, - { url = "https://files.pythonhosted.org/packages/2d/a0/32852d36bc7709f14dc3f64f929a275e958ad8c19a6deba9610d458e28b3/pillow-12.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:3e080565d8d7c671db5802eedfb438e5565ffa40115216eabb8cd52d0ecce024", size = 2463318, upload-time = "2026-04-01T14:42:42.063Z" }, - { url = "https://files.pythonhosted.org/packages/68/e1/748f5663efe6edcfc4e74b2b93edfb9b8b99b67f21a854c3ae416500a2d9/pillow-12.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:8be29e59487a79f173507c30ddf57e733a357f67881430449bb32614075a40ab", size = 5354347, upload-time = "2026-04-01T14:42:44.255Z" }, - { url = "https://files.pythonhosted.org/packages/47/a1/d5ff69e747374c33a3b53b9f98cca7889fce1fd03d79cdc4e1bccc6c5a87/pillow-12.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:71cde9a1e1551df7d34a25462fc60325e8a11a82cc2e2f54578e5e9a1e153d65", size = 4695873, upload-time = "2026-04-01T14:42:46.452Z" }, - { url = "https://files.pythonhosted.org/packages/df/21/e3fbdf54408a973c7f7f89a23b2cb97a7ef30c61ab4142af31eee6aebc88/pillow-12.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f490f9368b6fc026f021db16d7ec2fbf7d89e2edb42e8ec09d2c60505f5729c7", size = 6280168, upload-time = "2026-04-01T14:42:49.228Z" }, - { url = "https://files.pythonhosted.org/packages/d3/f1/00b7278c7dd52b17ad4329153748f87b6756ec195ff786c2bdf12518337d/pillow-12.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8bd7903a5f2a4545f6fd5935c90058b89d30045568985a71c79f5fd6edf9b91e", size = 8088188, upload-time = "2026-04-01T14:42:51.735Z" }, - { url = "https://files.pythonhosted.org/packages/ad/cf/220a5994ef1b10e70e85748b75649d77d506499352be135a4989c957b701/pillow-12.2.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3997232e10d2920a68d25191392e3a4487d8183039e1c74c2297f00ed1c50705", size = 6394401, upload-time = "2026-04-01T14:42:54.343Z" }, - { url = "https://files.pythonhosted.org/packages/e9/bd/e51a61b1054f09437acfbc2ff9106c30d1eb76bc1453d428399946781253/pillow-12.2.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e74473c875d78b8e9d5da2a70f7099549f9eb37ded4e2f6a463e60125bccd176", size = 7079655, upload-time = "2026-04-01T14:42:56.954Z" }, - { url = "https://files.pythonhosted.org/packages/6b/3d/45132c57d5fb4b5744567c3817026480ac7fc3ce5d4c47902bc0e7f6f853/pillow-12.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:56a3f9c60a13133a98ecff6197af34d7824de9b7b38c3654861a725c970c197b", size = 6503105, upload-time = "2026-04-01T14:42:59.847Z" }, - { url = "https://files.pythonhosted.org/packages/7d/2e/9df2fc1e82097b1df3dce58dc43286aa01068e918c07574711fcc53e6fb4/pillow-12.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:90e6f81de50ad6b534cab6e5aef77ff6e37722b2f5d908686f4a5c9eba17a909", size = 7203402, upload-time = "2026-04-01T14:43:02.664Z" }, - { url = "https://files.pythonhosted.org/packages/bd/2e/2941e42858ebb67e50ae741473de81c2984e6eff7b397017623c676e2e8d/pillow-12.2.0-cp311-cp311-win32.whl", hash = "sha256:8c984051042858021a54926eb597d6ee3012393ce9c181814115df4c60b9a808", size = 6378149, upload-time = "2026-04-01T14:43:05.274Z" }, - { url = "https://files.pythonhosted.org/packages/69/42/836b6f3cd7f3e5fa10a1f1a5420447c17966044c8fbf589cc0452d5502db/pillow-12.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e6b2a0c538fc200b38ff9eb6628228b77908c319a005815f2dde585a0664b60", size = 7082626, upload-time = "2026-04-01T14:43:08.557Z" }, - { url = "https://files.pythonhosted.org/packages/c2/88/549194b5d6f1f494b485e493edc6693c0a16f4ada488e5bd974ed1f42fad/pillow-12.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:9a8a34cc89c67a65ea7437ce257cea81a9dad65b29805f3ecee8c8fe8ff25ffe", size = 2463531, upload-time = "2026-04-01T14:43:10.743Z" }, - { url = "https://files.pythonhosted.org/packages/58/be/7482c8a5ebebbc6470b3eb791812fff7d5e0216c2be3827b30b8bb6603ed/pillow-12.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2d192a155bbcec180f8564f693e6fd9bccff5a7af9b32e2e4bf8c9c69dbad6b5", size = 5308279, upload-time = "2026-04-01T14:43:13.246Z" }, - { url = "https://files.pythonhosted.org/packages/d8/95/0a351b9289c2b5cbde0bacd4a83ebc44023e835490a727b2a3bd60ddc0f4/pillow-12.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3f40b3c5a968281fd507d519e444c35f0ff171237f4fdde090dd60699458421", size = 4695490, upload-time = "2026-04-01T14:43:15.584Z" }, - { url = "https://files.pythonhosted.org/packages/de/af/4e8e6869cbed569d43c416fad3dc4ecb944cb5d9492defaed89ddd6fe871/pillow-12.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:03e7e372d5240cc23e9f07deca4d775c0817bffc641b01e9c3af208dbd300987", size = 6284462, upload-time = "2026-04-01T14:43:18.268Z" }, - { url = "https://files.pythonhosted.org/packages/e9/9e/c05e19657fd57841e476be1ab46c4d501bffbadbafdc31a6d665f8b737b6/pillow-12.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b86024e52a1b269467a802258c25521e6d742349d760728092e1bc2d135b4d76", size = 8094744, upload-time = "2026-04-01T14:43:20.716Z" }, - { url = "https://files.pythonhosted.org/packages/2b/54/1789c455ed10176066b6e7e6da1b01e50e36f94ba584dc68d9eebfe9156d/pillow-12.2.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7371b48c4fa448d20d2714c9a1f775a81155050d383333e0a6c15b1123dda005", size = 6398371, upload-time = "2026-04-01T14:43:23.443Z" }, - { url = "https://files.pythonhosted.org/packages/43/e3/fdc657359e919462369869f1c9f0e973f353f9a9ee295a39b1fea8ee1a77/pillow-12.2.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62f5409336adb0663b7caa0da5c7d9e7bdbaae9ce761d34669420c2a801b2780", size = 7087215, upload-time = "2026-04-01T14:43:26.758Z" }, - { url = "https://files.pythonhosted.org/packages/8b/f8/2f6825e441d5b1959d2ca5adec984210f1ec086435b0ed5f52c19b3b8a6e/pillow-12.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:01afa7cf67f74f09523699b4e88c73fb55c13346d212a59a2db1f86b0a63e8c5", size = 6509783, upload-time = "2026-04-01T14:43:29.56Z" }, - { url = "https://files.pythonhosted.org/packages/67/f9/029a27095ad20f854f9dba026b3ea6428548316e057e6fc3545409e86651/pillow-12.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc3d34d4a8fbec3e88a79b92e5465e0f9b842b628675850d860b8bd300b159f5", size = 7212112, upload-time = "2026-04-01T14:43:32.091Z" }, - { url = "https://files.pythonhosted.org/packages/be/42/025cfe05d1be22dbfdb4f264fe9de1ccda83f66e4fc3aac94748e784af04/pillow-12.2.0-cp312-cp312-win32.whl", hash = "sha256:58f62cc0f00fd29e64b29f4fd923ffdb3859c9f9e6105bfc37ba1d08994e8940", size = 6378489, upload-time = "2026-04-01T14:43:34.601Z" }, - { url = "https://files.pythonhosted.org/packages/5d/7b/25a221d2c761c6a8ae21bfa3874988ff2583e19cf8a27bf2fee358df7942/pillow-12.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7f84204dee22a783350679a0333981df803dac21a0190d706a50475e361c93f5", size = 7084129, upload-time = "2026-04-01T14:43:37.213Z" }, - { url = "https://files.pythonhosted.org/packages/10/e1/542a474affab20fd4a0f1836cb234e8493519da6b76899e30bcc5d990b8b/pillow-12.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:af73337013e0b3b46f175e79492d96845b16126ddf79c438d7ea7ff27783a414", size = 2463612, upload-time = "2026-04-01T14:43:39.421Z" }, - { url = "https://files.pythonhosted.org/packages/4a/01/53d10cf0dbad820a8db274d259a37ba50b88b24768ddccec07355382d5ad/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:8297651f5b5679c19968abefd6bb84d95fe30ef712eb1b2d9b2d31ca61267f4c", size = 4100837, upload-time = "2026-04-01T14:43:41.506Z" }, - { url = "https://files.pythonhosted.org/packages/0f/98/f3a6657ecb698c937f6c76ee564882945f29b79bad496abcba0e84659ec5/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:50d8520da2a6ce0af445fa6d648c4273c3eeefbc32d7ce049f22e8b5c3daecc2", size = 4176528, upload-time = "2026-04-01T14:43:43.773Z" }, - { url = "https://files.pythonhosted.org/packages/69/bc/8986948f05e3ea490b8442ea1c1d4d990b24a7e43d8a51b2c7d8b1dced36/pillow-12.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:766cef22385fa1091258ad7e6216792b156dc16d8d3fa607e7545b2b72061f1c", size = 3640401, upload-time = "2026-04-01T14:43:45.87Z" }, - { url = "https://files.pythonhosted.org/packages/34/46/6c717baadcd62bc8ed51d238d521ab651eaa74838291bda1f86fe1f864c9/pillow-12.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5d2fd0fa6b5d9d1de415060363433f28da8b1526c1c129020435e186794b3795", size = 5308094, upload-time = "2026-04-01T14:43:48.438Z" }, - { url = "https://files.pythonhosted.org/packages/71/43/905a14a8b17fdb1ccb58d282454490662d2cb89a6bfec26af6d3520da5ec/pillow-12.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56b25336f502b6ed02e889f4ece894a72612fe885889a6e8c4c80239ff6e5f5f", size = 4695402, upload-time = "2026-04-01T14:43:51.292Z" }, - { url = "https://files.pythonhosted.org/packages/73/dd/42107efcb777b16fa0393317eac58f5b5cf30e8392e266e76e51cff28c3d/pillow-12.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f1c943e96e85df3d3478f7b691f229887e143f81fedab9b20205349ab04d73ed", size = 6280005, upload-time = "2026-04-01T14:43:54.242Z" }, - { url = "https://files.pythonhosted.org/packages/a8/68/b93e09e5e8549019e61acf49f65b1a8530765a7f812c77a7461bca7e4494/pillow-12.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03f6fab9219220f041c74aeaa2939ff0062bd5c364ba9ce037197f4c6d498cd9", size = 8090669, upload-time = "2026-04-01T14:43:57.335Z" }, - { url = "https://files.pythonhosted.org/packages/4b/6e/3ccb54ce8ec4ddd1accd2d89004308b7b0b21c4ac3d20fa70af4760a4330/pillow-12.2.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cdfebd752ec52bf5bb4e35d9c64b40826bc5b40a13df7c3cda20a2c03a0f5ed", size = 6395194, upload-time = "2026-04-01T14:43:59.864Z" }, - { url = "https://files.pythonhosted.org/packages/67/ee/21d4e8536afd1a328f01b359b4d3997b291ffd35a237c877b331c1c3b71c/pillow-12.2.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eedf4b74eda2b5a4b2b2fb4c006d6295df3bf29e459e198c90ea48e130dc75c3", size = 7082423, upload-time = "2026-04-01T14:44:02.74Z" }, - { url = "https://files.pythonhosted.org/packages/78/5f/e9f86ab0146464e8c133fe85df987ed9e77e08b29d8d35f9f9f4d6f917ba/pillow-12.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:00a2865911330191c0b818c59103b58a5e697cae67042366970a6b6f1b20b7f9", size = 6505667, upload-time = "2026-04-01T14:44:05.381Z" }, - { url = "https://files.pythonhosted.org/packages/ed/1e/409007f56a2fdce61584fd3acbc2bbc259857d555196cedcadc68c015c82/pillow-12.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e1757442ed87f4912397c6d35a0db6a7b52592156014706f17658ff58bbf795", size = 7208580, upload-time = "2026-04-01T14:44:08.39Z" }, - { url = "https://files.pythonhosted.org/packages/23/c4/7349421080b12fb35414607b8871e9534546c128a11965fd4a7002ccfbee/pillow-12.2.0-cp313-cp313-win32.whl", hash = "sha256:144748b3af2d1b358d41286056d0003f47cb339b8c43a9ea42f5fea4d8c66b6e", size = 6375896, upload-time = "2026-04-01T14:44:11.197Z" }, - { url = "https://files.pythonhosted.org/packages/3f/82/8a3739a5e470b3c6cbb1d21d315800d8e16bff503d1f16b03a4ec3212786/pillow-12.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:390ede346628ccc626e5730107cde16c42d3836b89662a115a921f28440e6a3b", size = 7081266, upload-time = "2026-04-01T14:44:13.947Z" }, - { url = "https://files.pythonhosted.org/packages/c3/25/f968f618a062574294592f668218f8af564830ccebdd1fa6200f598e65c5/pillow-12.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:8023abc91fba39036dbce14a7d6535632f99c0b857807cbbbf21ecc9f4717f06", size = 2463508, upload-time = "2026-04-01T14:44:16.312Z" }, - { url = "https://files.pythonhosted.org/packages/4d/a4/b342930964e3cb4dce5038ae34b0eab4653334995336cd486c5a8c25a00c/pillow-12.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:042db20a421b9bafecc4b84a8b6e444686bd9d836c7fd24542db3e7df7baad9b", size = 5309927, upload-time = "2026-04-01T14:44:18.89Z" }, - { url = "https://files.pythonhosted.org/packages/9f/de/23198e0a65a9cf06123f5435a5d95cea62a635697f8f03d134d3f3a96151/pillow-12.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd025009355c926a84a612fecf58bb315a3f6814b17ead51a8e48d3823d9087f", size = 4698624, upload-time = "2026-04-01T14:44:21.115Z" }, - { url = "https://files.pythonhosted.org/packages/01/a6/1265e977f17d93ea37aa28aa81bad4fa597933879fac2520d24e021c8da3/pillow-12.2.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88ddbc66737e277852913bd1e07c150cc7bb124539f94c4e2df5344494e0a612", size = 6321252, upload-time = "2026-04-01T14:44:23.663Z" }, - { url = "https://files.pythonhosted.org/packages/3c/83/5982eb4a285967baa70340320be9f88e57665a387e3a53a7f0db8231a0cd/pillow-12.2.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d362d1878f00c142b7e1a16e6e5e780f02be8195123f164edf7eddd911eefe7c", size = 8126550, upload-time = "2026-04-01T14:44:26.772Z" }, - { url = "https://files.pythonhosted.org/packages/4e/48/6ffc514adce69f6050d0753b1a18fd920fce8cac87620d5a31231b04bfc5/pillow-12.2.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c727a6d53cb0018aadd8018c2b938376af27914a68a492f59dfcaca650d5eea", size = 6433114, upload-time = "2026-04-01T14:44:29.615Z" }, - { url = "https://files.pythonhosted.org/packages/36/a3/f9a77144231fb8d40ee27107b4463e205fa4677e2ca2548e14da5cf18dce/pillow-12.2.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:efd8c21c98c5cc60653bcb311bef2ce0401642b7ce9d09e03a7da87c878289d4", size = 7115667, upload-time = "2026-04-01T14:44:32.773Z" }, - { url = "https://files.pythonhosted.org/packages/c1/fc/ac4ee3041e7d5a565e1c4fd72a113f03b6394cc72ab7089d27608f8aaccb/pillow-12.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9f08483a632889536b8139663db60f6724bfcb443c96f1b18855860d7d5c0fd4", size = 6538966, upload-time = "2026-04-01T14:44:35.252Z" }, - { url = "https://files.pythonhosted.org/packages/c0/a8/27fb307055087f3668f6d0a8ccb636e7431d56ed0750e07a60547b1e083e/pillow-12.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dac8d77255a37e81a2efcbd1fc05f1c15ee82200e6c240d7e127e25e365c39ea", size = 7238241, upload-time = "2026-04-01T14:44:37.875Z" }, - { url = "https://files.pythonhosted.org/packages/ad/4b/926ab182c07fccae9fcb120043464e1ff1564775ec8864f21a0ebce6ac25/pillow-12.2.0-cp313-cp313t-win32.whl", hash = "sha256:ee3120ae9dff32f121610bb08e4313be87e03efeadfc6c0d18f89127e24d0c24", size = 6379592, upload-time = "2026-04-01T14:44:40.336Z" }, - { url = "https://files.pythonhosted.org/packages/c2/c4/f9e476451a098181b30050cc4c9a3556b64c02cf6497ea421ac047e89e4b/pillow-12.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:325ca0528c6788d2a6c3d40e3568639398137346c3d6e66bb61db96b96511c98", size = 7085542, upload-time = "2026-04-01T14:44:43.251Z" }, - { url = "https://files.pythonhosted.org/packages/00/a4/285f12aeacbe2d6dc36c407dfbbe9e96d4a80b0fb710a337f6d2ad978c75/pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453", size = 2465765, upload-time = "2026-04-01T14:44:45.996Z" }, - { url = "https://files.pythonhosted.org/packages/bf/98/4595daa2365416a86cb0d495248a393dfc84e96d62ad080c8546256cb9c0/pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:3adc9215e8be0448ed6e814966ecf3d9952f0ea40eb14e89a102b87f450660d8", size = 4100848, upload-time = "2026-04-01T14:44:48.48Z" }, - { url = "https://files.pythonhosted.org/packages/0b/79/40184d464cf89f6663e18dfcf7ca21aae2491fff1a16127681bf1fa9b8cf/pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:6a9adfc6d24b10f89588096364cc726174118c62130c817c2837c60cf08a392b", size = 4176515, upload-time = "2026-04-01T14:44:51.353Z" }, - { url = "https://files.pythonhosted.org/packages/b0/63/703f86fd4c422a9cf722833670f4f71418fb116b2853ff7da722ea43f184/pillow-12.2.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:6a6e67ea2e6feda684ed370f9a1c52e7a243631c025ba42149a2cc5934dec295", size = 3640159, upload-time = "2026-04-01T14:44:53.588Z" }, - { url = "https://files.pythonhosted.org/packages/71/e0/fb22f797187d0be2270f83500aab851536101b254bfa1eae10795709d283/pillow-12.2.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2bb4a8d594eacdfc59d9e5ad972aa8afdd48d584ffd5f13a937a664c3e7db0ed", size = 5312185, upload-time = "2026-04-01T14:44:56.039Z" }, - { url = "https://files.pythonhosted.org/packages/ba/8c/1a9e46228571de18f8e28f16fabdfc20212a5d019f3e3303452b3f0a580d/pillow-12.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:80b2da48193b2f33ed0c32c38140f9d3186583ce7d516526d462645fd98660ae", size = 4695386, upload-time = "2026-04-01T14:44:58.663Z" }, - { url = "https://files.pythonhosted.org/packages/70/62/98f6b7f0c88b9addd0e87c217ded307b36be024d4ff8869a812b241d1345/pillow-12.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22db17c68434de69d8ecfc2fe821569195c0c373b25cccb9cbdacf2c6e53c601", size = 6280384, upload-time = "2026-04-01T14:45:01.5Z" }, - { url = "https://files.pythonhosted.org/packages/5e/03/688747d2e91cfbe0e64f316cd2e8005698f76ada3130d0194664174fa5de/pillow-12.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7b14cc0106cd9aecda615dd6903840a058b4700fcb817687d0ee4fc8b6e389be", size = 8091599, upload-time = "2026-04-01T14:45:04.5Z" }, - { url = "https://files.pythonhosted.org/packages/f6/35/577e22b936fcdd66537329b33af0b4ccfefaeabd8aec04b266528cddb33c/pillow-12.2.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cbeb542b2ebc6fcdacabf8aca8c1a97c9b3ad3927d46b8723f9d4f033288a0f", size = 6396021, upload-time = "2026-04-01T14:45:07.117Z" }, - { url = "https://files.pythonhosted.org/packages/11/8d/d2532ad2a603ca2b93ad9f5135732124e57811d0168155852f37fbce2458/pillow-12.2.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bfd07bc812fbd20395212969e41931001fd59eb55a60658b0e5710872e95286", size = 7083360, upload-time = "2026-04-01T14:45:09.763Z" }, - { url = "https://files.pythonhosted.org/packages/5e/26/d325f9f56c7e039034897e7380e9cc202b1e368bfd04d4cbe6a441f02885/pillow-12.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9aba9a17b623ef750a4d11b742cbafffeb48a869821252b30ee21b5e91392c50", size = 6507628, upload-time = "2026-04-01T14:45:12.378Z" }, - { url = "https://files.pythonhosted.org/packages/5f/f7/769d5632ffb0988f1c5e7660b3e731e30f7f8ec4318e94d0a5d674eb65a4/pillow-12.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:deede7c263feb25dba4e82ea23058a235dcc2fe1f6021025dc71f2b618e26104", size = 7209321, upload-time = "2026-04-01T14:45:15.122Z" }, - { url = "https://files.pythonhosted.org/packages/6a/7a/c253e3c645cd47f1aceea6a8bacdba9991bf45bb7dfe927f7c893e89c93c/pillow-12.2.0-cp314-cp314-win32.whl", hash = "sha256:632ff19b2778e43162304d50da0181ce24ac5bb8180122cbe1bf4673428328c7", size = 6479723, upload-time = "2026-04-01T14:45:17.797Z" }, - { url = "https://files.pythonhosted.org/packages/cd/8b/601e6566b957ca50e28725cb6c355c59c2c8609751efbecd980db44e0349/pillow-12.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:4e6c62e9d237e9b65fac06857d511e90d8461a32adcc1b9065ea0c0fa3a28150", size = 7217400, upload-time = "2026-04-01T14:45:20.529Z" }, - { url = "https://files.pythonhosted.org/packages/d6/94/220e46c73065c3e2951bb91c11a1fb636c8c9ad427ac3ce7d7f3359b9b2f/pillow-12.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:b1c1fbd8a5a1af3412a0810d060a78b5136ec0836c8a4ef9aa11807f2a22f4e1", size = 2554835, upload-time = "2026-04-01T14:45:23.162Z" }, - { url = "https://files.pythonhosted.org/packages/b6/ab/1b426a3974cb0e7da5c29ccff4807871d48110933a57207b5a676cccc155/pillow-12.2.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:57850958fe9c751670e49b2cecf6294acc99e562531f4bd317fa5ddee2068463", size = 5314225, upload-time = "2026-04-01T14:45:25.637Z" }, - { url = "https://files.pythonhosted.org/packages/19/1e/dce46f371be2438eecfee2a1960ee2a243bbe5e961890146d2dee1ff0f12/pillow-12.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5d38f1411c0ed9f97bcb49b7bd59b6b7c314e0e27420e34d99d844b9ce3b6f3", size = 4698541, upload-time = "2026-04-01T14:45:28.355Z" }, - { url = "https://files.pythonhosted.org/packages/55/c3/7fbecf70adb3a0c33b77a300dc52e424dc22ad8cdc06557a2e49523b703d/pillow-12.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c0a9f29ca8e79f09de89293f82fc9b0270bb4af1d58bc98f540cc4aedf03166", size = 6322251, upload-time = "2026-04-01T14:45:30.924Z" }, - { url = "https://files.pythonhosted.org/packages/1c/3c/7fbc17cfb7e4fe0ef1642e0abc17fc6c94c9f7a16be41498e12e2ba60408/pillow-12.2.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1610dd6c61621ae1cf811bef44d77e149ce3f7b95afe66a4512f8c59f25d9ebe", size = 8127807, upload-time = "2026-04-01T14:45:33.908Z" }, - { url = "https://files.pythonhosted.org/packages/ff/c3/a8ae14d6defd2e448493ff512fae903b1e9bd40b72efb6ec55ce0048c8ce/pillow-12.2.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a34329707af4f73cf1782a36cd2289c0368880654a2c11f027bcee9052d35dd", size = 6433935, upload-time = "2026-04-01T14:45:36.623Z" }, - { url = "https://files.pythonhosted.org/packages/6e/32/2880fb3a074847ac159d8f902cb43278a61e85f681661e7419e6596803ed/pillow-12.2.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e9c4f5b3c546fa3458a29ab22646c1c6c787ea8f5ef51300e5a60300736905e", size = 7116720, upload-time = "2026-04-01T14:45:39.258Z" }, - { url = "https://files.pythonhosted.org/packages/46/87/495cc9c30e0129501643f24d320076f4cc54f718341df18cc70ec94c44e1/pillow-12.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fb043ee2f06b41473269765c2feae53fc2e2fbf96e5e22ca94fb5ad677856f06", size = 6540498, upload-time = "2026-04-01T14:45:41.879Z" }, - { url = "https://files.pythonhosted.org/packages/18/53/773f5edca692009d883a72211b60fdaf8871cbef075eaa9d577f0a2f989e/pillow-12.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f278f034eb75b4e8a13a54a876cc4a5ab39173d2cdd93a638e1b467fc545ac43", size = 7239413, upload-time = "2026-04-01T14:45:44.705Z" }, - { url = "https://files.pythonhosted.org/packages/c9/e4/4b64a97d71b2a83158134abbb2f5bd3f8a2ea691361282f010998f339ec7/pillow-12.2.0-cp314-cp314t-win32.whl", hash = "sha256:6bb77b2dcb06b20f9f4b4a8454caa581cd4dd0643a08bacf821216a16d9c8354", size = 6482084, upload-time = "2026-04-01T14:45:47.568Z" }, - { url = "https://files.pythonhosted.org/packages/ba/13/306d275efd3a3453f72114b7431c877d10b1154014c1ebbedd067770d629/pillow-12.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6562ace0d3fb5f20ed7290f1f929cae41b25ae29528f2af1722966a0a02e2aa1", size = 7225152, upload-time = "2026-04-01T14:45:50.032Z" }, - { url = "https://files.pythonhosted.org/packages/ff/6e/cf826fae916b8658848d7b9f38d88da6396895c676e8086fc0988073aaf8/pillow-12.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:aa88ccfe4e32d362816319ed727a004423aab09c5cea43c01a4b435643fa34eb", size = 2556579, upload-time = "2026-04-01T14:45:52.529Z" }, - { url = "https://files.pythonhosted.org/packages/4e/b7/2437044fb910f499610356d1352e3423753c98e34f915252aafecc64889f/pillow-12.2.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538bd5e05efec03ae613fd89c4ce0368ecd2ba239cc25b9f9be7ed426b0af1f", size = 5273969, upload-time = "2026-04-01T14:45:55.538Z" }, - { url = "https://files.pythonhosted.org/packages/f6/f4/8316e31de11b780f4ac08ef3654a75555e624a98db1056ecb2122d008d5a/pillow-12.2.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:394167b21da716608eac917c60aa9b969421b5dcbbe02ae7f013e7b85811c69d", size = 4659674, upload-time = "2026-04-01T14:45:58.093Z" }, - { url = "https://files.pythonhosted.org/packages/d4/37/664fca7201f8bb2aa1d20e2c3d5564a62e6ae5111741966c8319ca802361/pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5d04bfa02cc2d23b497d1e90a0f927070043f6cbf303e738300532379a4b4e0f", size = 5288479, upload-time = "2026-04-01T14:46:01.141Z" }, - { url = "https://files.pythonhosted.org/packages/49/62/5b0ed78fce87346be7a5cfcfaaad91f6a1f98c26f86bdbafa2066c647ef6/pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0c838a5125cee37e68edec915651521191cef1e6aa336b855f495766e77a366e", size = 7032230, upload-time = "2026-04-01T14:46:03.874Z" }, - { url = "https://files.pythonhosted.org/packages/c3/28/ec0fc38107fc32536908034e990c47914c57cd7c5a3ece4d8d8f7ffd7e27/pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a6c9fa44005fa37a91ebfc95d081e8079757d2e904b27103f4f5fa6f0bf78c0", size = 5355404, upload-time = "2026-04-01T14:46:06.33Z" }, - { url = "https://files.pythonhosted.org/packages/5e/8b/51b0eddcfa2180d60e41f06bd6d0a62202b20b59c68f5a132e615b75aecf/pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:25373b66e0dd5905ed63fa3cae13c82fbddf3079f2c8bf15c6fb6a35586324c1", size = 6002215, upload-time = "2026-04-01T14:46:08.83Z" }, - { url = "https://files.pythonhosted.org/packages/bc/60/5382c03e1970de634027cee8e1b7d39776b778b81812aaf45b694dfe9e28/pillow-12.2.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:bfa9c230d2fe991bed5318a5f119bd6780cda2915cca595393649fc118ab895e", size = 7080946, upload-time = "2026-04-01T14:46:11.734Z" }, -] - -[[package]] -name = "platformdirs" -version = "4.10.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/47/e4501f49c178ae1d9f4a75073fda4204f52647993f075a9db4d14930e0c5/platformdirs-4.10.0.tar.gz", hash = "sha256:31e761a6a0ca04faf7353ea759bdba55652be214725111e5aac52dfa29d4bef7", size = 31224, upload-time = "2026-05-28T03:32:53.587Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/81/e6/cd9575ac904136b3cbf7aa7ee819ef86eedb7274e46f230e94ea4342e729/platformdirs-4.10.0-py3-none-any.whl", hash = "sha256:fb516cdb12eb0d857d0cd85a7c57cea4d060bee4578d6cf5a14dfdf8cbf8784a", size = 22743, upload-time = "2026-05-28T03:32:52.175Z" }, -] - [[package]] name = "pluggy" version = "1.6.0" @@ -3955,34 +2829,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] -[[package]] -name = "portalocker" -version = "2.7.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1f/f8/969e6f280201b40b31bcb62843c619f343dcc351dff83a5891530c9dd60e/portalocker-2.7.0.tar.gz", hash = "sha256:032e81d534a88ec1736d03f780ba073f047a06c478b06e2937486f334e955c51", size = 20183, upload-time = "2023-01-18T23:36:14.436Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/df/d4f711d168524f5aebd7fb30969eaa31e3048cf8979688cde3b08f6e5eb8/portalocker-2.7.0-py2.py3-none-any.whl", hash = "sha256:a07c5b4f3985c3cf4798369631fb7011adb498e2a46d8440efc75a8f29a0f983", size = 15502, upload-time = "2023-01-18T23:36:12.849Z" }, -] - -[[package]] -name = "posthog" -version = "5.4.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "backoff" }, - { name = "distro" }, - { name = "python-dateutil" }, - { name = "requests" }, - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/48/20/60ae67bb9d82f00427946218d49e2e7e80fb41c15dc5019482289ec9ce8d/posthog-5.4.0.tar.gz", hash = "sha256:701669261b8d07cdde0276e5bc096b87f9e200e3b9589c5ebff14df658c5893c", size = 88076, upload-time = "2025-06-20T23:19:23.485Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4f/98/e480cab9a08d1c09b1c59a93dade92c1bb7544826684ff2acbfd10fcfbd4/posthog-5.4.0-py3-none-any.whl", hash = "sha256:284dfa302f64353484420b52d4ad81ff5c2c2d1d607c4e2db602ac72761831bd", size = 105364, upload-time = "2025-06-20T23:19:22.001Z" }, -] - [[package]] name = "propcache" version = "0.5.2" @@ -4137,63 +2983,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/cb/e3065b447186cb70aa65acc70c86baf482d82bf75625bf5a2c4f6919c6a3/protobuf-5.29.6-py3-none-any.whl", hash = "sha256:6b9edb641441b2da9fa8f428760fc136a49cf97a52076010cf22a2ff73438a86", size = 173126, upload-time = "2026-02-04T22:54:39.462Z" }, ] -[[package]] -name = "pyarrow" -version = "24.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/91/13/13e1069b351bdc3881266e11147ffccf687505dbb0ea74036237f5d454a5/pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83", size = 1180261, upload-time = "2026-04-21T10:51:25.837Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/bf/a34fee1d624152124fa8355c42f34195ad5fe5233ce5bb87946432047d52/pyarrow-24.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:7c2b98645d576a0b9616892ead22b64a83a5f043c5e2ca15ebcefcb5b70c80cb", size = 35076681, upload-time = "2026-04-21T08:51:46.845Z" }, - { url = "https://files.pythonhosted.org/packages/1d/41/64180033d7027afce12dc96d0fe1f504c6fa112190582b458acea2399530/pyarrow-24.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:644a246325b8c69c595ad1dd4b463eba4b0cdb731370e4a86137d433208d6147", size = 36684260, upload-time = "2026-04-21T08:51:53.642Z" }, - { url = "https://files.pythonhosted.org/packages/57/02/9b9320e673dd8a99411fac78690f3df92f6dd6f59754c750110bca66d64e/pyarrow-24.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:3a577bd840ca83f646f0a625dbc571dba7044c43c2d1503afc378b570954345c", size = 45698566, upload-time = "2026-04-21T10:46:02.133Z" }, - { url = "https://files.pythonhosted.org/packages/67/33/f75e91b9a64c3f33c787e263c93b871ad91b8a4a68c1d5cebddd9840e835/pyarrow-24.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:e3268e43984d0b1a185c89b4cfff282a7ead12fc93f56cfd7088bdbcbe727041", size = 48835562, upload-time = "2026-04-21T10:46:10.278Z" }, - { url = "https://files.pythonhosted.org/packages/a5/63/097510448e47e4091faa41c43ba92f97cecaab8f4535b56a3d149578f634/pyarrow-24.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2392d954fcb920f42d230284b677605e4e2fbb11f2821e823e642abd67fbb491", size = 49394997, upload-time = "2026-04-21T10:46:18.08Z" }, - { url = "https://files.pythonhosted.org/packages/60/6b/c047d6222ab279024a062742d1807e2fbaf27bba88a98637299ff47b9236/pyarrow-24.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bec9373df11544592b0ba7ec2af0e35059e5f0e7647c6183a854dedd193298f1", size = 51911424, upload-time = "2026-04-21T10:46:25.347Z" }, - { url = "https://files.pythonhosted.org/packages/3a/ba/464cc70761c2a525d97ebd84e21c31ebd47f3ef4bdcee117009f51c46f24/pyarrow-24.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:c42ab9439498270139cc63e18847a02afe5c8b3ed9c931266533cfe378bd3591", size = 27251730, upload-time = "2026-04-21T10:46:30.913Z" }, - { url = "https://files.pythonhosted.org/packages/62/c9/a47ab7ece0d86cbe6678418a0fbd1ac4bb493b9184a3891dfa0e7f287ae0/pyarrow-24.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b0e131f880cda8d04e076cee175a46fc0e8bc8b65c99c6c09dff6669335fde74", size = 35068898, upload-time = "2026-04-21T10:46:36.599Z" }, - { url = "https://files.pythonhosted.org/packages/d1/bc/8db86617a9a58008acf8913d6fed68ea2a46acb6de928db28d724c891a68/pyarrow-24.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:1b2fe7f9a5566401a0ef2571f197eb92358925c1f0c8dba305d6e43ea0871bb3", size = 36679915, upload-time = "2026-04-21T10:46:42.602Z" }, - { url = "https://files.pythonhosted.org/packages/eb/8e/fb178720400ef69db251eb4a9c3ccf4af269bc1feb5055529b8fc87170d1/pyarrow-24.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:0b3537c00fb8d384f15ac1e79b6eb6db04a16514c8c1d22e59a9b95c8ba42868", size = 45697931, upload-time = "2026-04-21T10:46:48.403Z" }, - { url = "https://files.pythonhosted.org/packages/f3/27/99c42abe8e21b44f4917f62631f3aa31404882a2c41d8a4cd5c110e13d52/pyarrow-24.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:14e31a3c9e35f1ab6356c6378f6f72830e6d2d5f1791df3774a7b097d18a6a1e", size = 48837449, upload-time = "2026-04-21T10:46:55.329Z" }, - { url = "https://files.pythonhosted.org/packages/36/b6/333749e2666e9032891125bf9c691146e92901bece62030ac1430e2e7c88/pyarrow-24.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7d9a514e73bc42711e6a35aaccf3587c520024fe0a25d830a1a8a27c15f4f57", size = 49395949, upload-time = "2026-04-21T10:47:01.869Z" }, - { url = "https://files.pythonhosted.org/packages/17/25/c5201706a2dd374e8ba6ee3fd7a8c89fb7ffc16eed5217a91fd2bd7f7626/pyarrow-24.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b196eb3f931862af3fa84c2a253514d859c08e0d8fe020e07be12e75a5a9780c", size = 51912986, upload-time = "2026-04-21T10:47:09.872Z" }, - { url = "https://files.pythonhosted.org/packages/f8/d2/4d1bbba65320b21a49678d6fbdc6ff7c649251359fdcfc03568c4136231d/pyarrow-24.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:35405aecb474e683fb36af650618fd5340ee5471fc65a21b36076a18bbc6c981", size = 27255371, upload-time = "2026-04-21T10:47:15.943Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a9/9686d9f07837f91f775e8932659192e02c74f9d8920524b480b85212cc68/pyarrow-24.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6233c9ed9ab9d1db47de57d9753256d9dcffbf42db341576099f0fd9f6bf4810", size = 34981559, upload-time = "2026-04-21T10:47:22.17Z" }, - { url = "https://files.pythonhosted.org/packages/80/b6/0ddf0e9b6ead3474ab087ae598c76b031fc45532bf6a63f3a553440fb258/pyarrow-24.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f7616236ec1bc2b15bfdec22a71ab38851c86f8f05ff64f379e1278cf20c634a", size = 36663654, upload-time = "2026-04-21T10:47:28.315Z" }, - { url = "https://files.pythonhosted.org/packages/7c/3b/926382efe8ce27ba729071d3566ade6dfb86bdf112f366000196b2f5780a/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1617043b99bd33e5318ae18eb2919af09c71322ef1ca46566cdafc6e6712fb66", size = 45679394, upload-time = "2026-04-21T10:47:34.821Z" }, - { url = "https://files.pythonhosted.org/packages/b3/7a/829f7d9dfd37c207206081d6dad474d81dde29952401f07f2ba507814818/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6165461f55ef6314f026de6638d661188e3455d3ec49834556a0ebbdbace18bb", size = 48863122, upload-time = "2026-04-21T10:47:42.056Z" }, - { url = "https://files.pythonhosted.org/packages/5f/e8/f88ce625fe8babaae64e8db2d417c7653adb3019b08aae85c5ed787dc816/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b13dedfe76a0ad2d1d859b0811b53827a4e9d93a0bcb05cf59333ab4980cc7e", size = 49376032, upload-time = "2026-04-21T10:47:48.967Z" }, - { url = "https://files.pythonhosted.org/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490, upload-time = "2026-04-21T10:47:55.981Z" }, - { url = "https://files.pythonhosted.org/packages/66/1c/e3e72c8014ad2743ca64a701652c733cc5cbcee15c0463a32a8c55518d9e/pyarrow-24.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:295f0a7f2e242dabd513737cf076007dc5b2d59237e3eca37b05c0c6446f3826", size = 27355660, upload-time = "2026-04-21T10:48:01.718Z" }, - { url = "https://files.pythonhosted.org/packages/6f/d3/a1abf004482026ddc17f4503db227787fa3cfe41ec5091ff20e4fea55e57/pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba", size = 34976759, upload-time = "2026-04-21T10:48:07.258Z" }, - { url = "https://files.pythonhosted.org/packages/4f/4a/34f0a36d28a2dd32225301b79daad44e243dc1a2bb77d43b60749be255c4/pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68", size = 36658471, upload-time = "2026-04-21T10:48:13.347Z" }, - { url = "https://files.pythonhosted.org/packages/1f/78/543b94712ae8bb1a6023bcc1acf1a740fbff8286747c289cd9468fced2a5/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2", size = 45675981, upload-time = "2026-04-21T10:48:20.201Z" }, - { url = "https://files.pythonhosted.org/packages/84/9f/8fb7c222b100d314137fa40ec050de56cd8c6d957d1cfff685ce72f15b17/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0", size = 48859172, upload-time = "2026-04-21T10:48:27.541Z" }, - { url = "https://files.pythonhosted.org/packages/a7/d3/1ea72538e6c8b3b475ed78d1049a2c518e655761ea50fe1171fc855fcab7/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495", size = 49385733, upload-time = "2026-04-21T10:48:34.7Z" }, - { url = "https://files.pythonhosted.org/packages/c3/be/c3d8b06a1ba35f2260f8e1f771abbee7d5e345c0937aab90675706b1690a/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f", size = 51934335, upload-time = "2026-04-21T10:48:42.099Z" }, - { url = "https://files.pythonhosted.org/packages/9c/62/89e07a1e7329d2cde3e3c6994ba0839a24977a2beda8be6005ea3d860b99/pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91", size = 27271748, upload-time = "2026-04-21T10:49:42.532Z" }, - { url = "https://files.pythonhosted.org/packages/17/1a/cff3a59f80b5b1658549d46611b67163f65e0664431c076ad728bf9d5af4/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275", size = 35238554, upload-time = "2026-04-21T10:48:48.526Z" }, - { url = "https://files.pythonhosted.org/packages/a8/99/cce0f42a327bfef2c420fb6078a3eb834826e5d6697bf3009fe11d2ad051/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b", size = 36782301, upload-time = "2026-04-21T10:48:55.181Z" }, - { url = "https://files.pythonhosted.org/packages/2a/66/8e560d5ff6793ca29aca213c53eec0dd482dd46cb93b2819e5aab52e4252/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42", size = 45721929, upload-time = "2026-04-21T10:49:03.676Z" }, - { url = "https://files.pythonhosted.org/packages/27/0c/a26e25505d030716e078d9f16eb74973cbf0b33b672884e9f9da1c83b871/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b", size = 48825365, upload-time = "2026-04-21T10:49:11.714Z" }, - { url = "https://files.pythonhosted.org/packages/5f/eb/771f9ecb0c65e73fe9dccdd1717901b9594f08c4515d000c7c62df573811/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37", size = 49451819, upload-time = "2026-04-21T10:49:21.474Z" }, - { url = "https://files.pythonhosted.org/packages/48/da/61ae89a88732f5a785646f3ec6125dbb640fa98a540eb2b9889caa561403/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca", size = 51909252, upload-time = "2026-04-21T10:49:31.164Z" }, - { url = "https://files.pythonhosted.org/packages/cb/1a/8dd5cafab7b66573fa91c03d06d213356ad4edd71813aa75e08ce2b3a844/pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d", size = 27388127, upload-time = "2026-04-21T10:49:37.334Z" }, - { url = "https://files.pythonhosted.org/packages/ad/80/d022a34ff05d2cbedd8ccf841fc1f532ecfa9eb5ed1711b56d0e0ea71fc9/pyarrow-24.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:1cc9057f0319e26333b357e17f3c2c022f1a83739b48a88b25bfd5fa2dc18838", size = 35007997, upload-time = "2026-04-21T10:49:48.796Z" }, - { url = "https://files.pythonhosted.org/packages/1a/ff/f01485fda6f4e5d441afb8dd5e7681e4db18826c1e271852f5d3957d6a80/pyarrow-24.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e6f1278ee4785b6db21229374a1c9e54ec7c549de5d1efc9630b6207de7e170b", size = 36678720, upload-time = "2026-04-21T10:49:55.858Z" }, - { url = "https://files.pythonhosted.org/packages/9e/c2/2d2d5fea814237923f71b36495211f20b43a1576f9a4d6da7e751a64ec6f/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:adbbedc55506cbdabb830890444fb856bfb0060c46c6f8026c6c2f2cf86ae795", size = 45741852, upload-time = "2026-04-21T10:50:04.624Z" }, - { url = "https://files.pythonhosted.org/packages/8e/3a/28ba9c1c1ebdbb5f1b94dfebb46f207e52e6a554b7fe4132540fde29a3a0/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ae8a1145af31d903fa9bb166824d7abe9b4681a000b0159c9fb99c11bc11ad26", size = 48889852, upload-time = "2026-04-21T10:50:12.293Z" }, - { url = "https://files.pythonhosted.org/packages/df/51/4a389acfd31dca009f8fb82d7f510bb4130f2b3a8e18cf00194d0687d8ac/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d7027eba1df3b2069e2e8d80f644fa0918b68c46432af3d088ddd390d063ecde", size = 49445207, upload-time = "2026-04-21T10:50:20.677Z" }, - { url = "https://files.pythonhosted.org/packages/19/4b/0bab2b23d2ae901b1b9a03c0efd4b2d070256f8ce3fc43f6e58c167b2081/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e56a1ffe9bf7b727432b89104cc0849c21582949dd7bdcb34f17b2001a351a76", size = 51954117, upload-time = "2026-04-21T10:50:29.14Z" }, - { url = "https://files.pythonhosted.org/packages/29/88/f4e9145da0417b3d2c12035a8492b35ff4a3dbc653e614fcfb51d9dedb38/pyarrow-24.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:38be1808cdd068605b787e6ca9119b27eb275a0234e50212c3492331680c3b1e", size = 28001155, upload-time = "2026-04-21T10:51:22.337Z" }, - { url = "https://files.pythonhosted.org/packages/79/4f/46a49a63f43526da895b1a45bbb51d5baf8e4d77159f8528fc3e5490007f/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:418e48ce50a45a6a6c73c454677203a9c75c966cb1e92ca3370959185f197a05", size = 35250387, upload-time = "2026-04-21T10:50:35.552Z" }, - { url = "https://files.pythonhosted.org/packages/a0/da/d5e0cd5ef00796922404806d5f00325cdadc3441ce2c13fe7115f2df9a64/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:2f16197705a230a78270cdd4ea8a1d57e86b2fdcbc34a1f6aebc72e65c986f9a", size = 36797102, upload-time = "2026-04-21T10:50:42.417Z" }, - { url = "https://files.pythonhosted.org/packages/34/c7/5904145b0a593a05236c882933d439b5720f0a145381179063722fbfc123/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fb24ac194bfc5e86839d7dcd52092ee31e5fe6733fe11f5e3b06ef0812b20072", size = 45745118, upload-time = "2026-04-21T10:50:49.324Z" }, - { url = "https://files.pythonhosted.org/packages/13/d3/cca42fe166d1c6e4d5b80e530b7949104d10e17508a90ae202dac205ce2a/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9700ebd9a51f5895ce75ff4ac4b3c47a7d4b42bc618be8e713e5d56bacf5f931", size = 48844765, upload-time = "2026-04-21T10:50:55.579Z" }, - { url = "https://files.pythonhosted.org/packages/b0/49/942c3b79878ba928324d1e17c274ed84581db8c0a749b24bcf4cbdf15bd3/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d8ddd2768da81d3ee08cfea9b597f4abb4e8e1dc8ae7e204b608d23a0d3ab699", size = 49471890, upload-time = "2026-04-21T10:51:02.439Z" }, - { url = "https://files.pythonhosted.org/packages/76/97/ff71431000a75d84135a1ace5ca4ba11726a231a8007bbb320a4c54075d5/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:61a3d7eaa97a14768b542f3d284dc6400dd2470d9f080708b13cd46b6ae18136", size = 51932250, upload-time = "2026-04-21T10:51:10.576Z" }, - { url = "https://files.pythonhosted.org/packages/51/be/6f79d55816d5c22557cf27533543d5d70dfe692adfbee4b99f2760674f38/pyarrow-24.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:c91d00057f23b8d353039520dc3a6c09d8608164c692e9f59a175a42b2ae0c19", size = 28131282, upload-time = "2026-04-21T10:51:16.815Z" }, -] - [[package]] name = "pyasn1" version = "0.6.3" @@ -4215,179 +3004,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, ] -[[package]] -name = "pybase64" -version = "1.4.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/b8/4ed5c7ad5ec15b08d35cc79ace6145d5c1ae426e46435f4987379439dfea/pybase64-1.4.3.tar.gz", hash = "sha256:c2ed274c9e0ba9c8f9c4083cfe265e66dd679126cd9c2027965d807352f3f053", size = 137272, upload-time = "2025-12-06T13:27:04.013Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/39/47/16d7af6fae7803f4c691856bc0d8d433ccf30e106432e2ef7707ee19a38a/pybase64-1.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f63aa7f29139b8a05ce5f97cdb7fad63d29071e5bdc8a638a343311fe996112a", size = 38241, upload-time = "2025-12-06T13:22:27.396Z" }, - { url = "https://files.pythonhosted.org/packages/4d/3e/268beb8d2240ab55396af4d1b45d2494935982212549b92a5f5b57079bd3/pybase64-1.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f5943ec1ae87a8b4fe310905bb57205ea4330c75e2c628433a7d9dd52295b588", size = 31672, upload-time = "2025-12-06T13:22:28.854Z" }, - { url = "https://files.pythonhosted.org/packages/80/14/4365fa33222edcc46b6db4973f9e22bda82adfb6ab2a01afff591f1e41c8/pybase64-1.4.3-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:5f2b8aef86f35cd5894c13681faf433a1fffc5b2e76544dcb5416a514a1a8347", size = 65978, upload-time = "2025-12-06T13:22:30.191Z" }, - { url = "https://files.pythonhosted.org/packages/1c/22/e89739d8bc9b96c68ead44b4eec42fe555683d9997e4ba65216d384920fc/pybase64-1.4.3-cp310-cp310-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a6ec7e53dd09b0a8116ccf5c3265c7c7fce13c980747525be76902aef36a514a", size = 68903, upload-time = "2025-12-06T13:22:31.29Z" }, - { url = "https://files.pythonhosted.org/packages/77/e1/7e59a19f8999cdefe9eb0d56bfd701dd38263b0f6fb4a4d29fce165a1b36/pybase64-1.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7528604cd69c538e1dbaafded46e9e4915a2adcd6f2a60fcef6390d87ca922ea", size = 57516, upload-time = "2025-12-06T13:22:32.395Z" }, - { url = "https://files.pythonhosted.org/packages/42/ad/f47dc7e6fe32022b176868b88b671a32dab389718c8ca905cab79280aaaf/pybase64-1.4.3-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:4ec645f32b50593879031e09158f8681a1db9f5df0f72af86b3969a1c5d1fa2b", size = 54533, upload-time = "2025-12-06T13:22:33.457Z" }, - { url = "https://files.pythonhosted.org/packages/7c/9a/7ab312b5a324833953b00e47b23eb4f83d45bd5c5c854b4b4e51b2a0cf5b/pybase64-1.4.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:634a000c5b3485ccc18bb9b244e0124f74b6fbc7f43eade815170237a7b34c64", size = 57187, upload-time = "2025-12-06T13:22:34.566Z" }, - { url = "https://files.pythonhosted.org/packages/2c/84/80acab1fcbaaae103e6b862ef5019192c8f2cd8758433595a202179a0d1d/pybase64-1.4.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:309ea32ad07639a485580af1be0ad447a434deb1924e76adced63ac2319cfe15", size = 57730, upload-time = "2025-12-06T13:22:35.581Z" }, - { url = "https://files.pythonhosted.org/packages/1f/24/84256d472400ea3163d7d69c44bb7e2e1027f0f1d4d20c47629a7dc4578e/pybase64-1.4.3-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:d10d517566b748d3f25f6ac7162af779360c1c6426ad5f962927ee205990d27c", size = 53036, upload-time = "2025-12-06T13:22:36.621Z" }, - { url = "https://files.pythonhosted.org/packages/a3/0f/33aecbed312ee0431798a73fa25e00dedbffdd91389ee23121fed397c550/pybase64-1.4.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a74cc0f4d835400857cc5c6d27ec854f7949491e07a04e6d66e2137812831f4c", size = 56321, upload-time = "2025-12-06T13:22:37.7Z" }, - { url = "https://files.pythonhosted.org/packages/dc/1c/a341b050746658cbec8cab3c733aeb3ef52ce8f11e60d0d47adbdf729ebf/pybase64-1.4.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:1b591d774ac09d5eb73c156a03277cb271438fbd8042bae4109ff3a827cd218c", size = 50114, upload-time = "2025-12-06T13:22:38.752Z" }, - { url = "https://files.pythonhosted.org/packages/ba/d3/f7e6680ae6dc4ddff39112ad66e0fa6b2ec346e73881bafc08498c560bc0/pybase64-1.4.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5eb588d35a04302ef6157d17db62354a787ac6f8b1585dd0b90c33d63a97a550", size = 66570, upload-time = "2025-12-06T13:22:40.221Z" }, - { url = "https://files.pythonhosted.org/packages/4c/71/774748eecc7fe23869b7e5df028e3c4c2efa16b506b83ea3fa035ea95dc2/pybase64-1.4.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:df8b122d5be2c96962231cc4831d9c2e1eae6736fb12850cec4356d8b06fe6f8", size = 55700, upload-time = "2025-12-06T13:22:41.289Z" }, - { url = "https://files.pythonhosted.org/packages/b3/91/dd15075bb2fe0086193e1cd4bad80a43652c38d8a572f9218d46ba721802/pybase64-1.4.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:31b7a85c661fc591bbcce82fb8adaebe2941e6a83b08444b0957b77380452a4b", size = 52491, upload-time = "2025-12-06T13:22:42.628Z" }, - { url = "https://files.pythonhosted.org/packages/7b/27/f357d63ea3774c937fc47160e040419ed528827aa3d4306d5ec9826259c0/pybase64-1.4.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e6d7beaae65979fef250e25e66cf81c68a8f81910bcda1a2f43297ab486a7e4e", size = 53957, upload-time = "2025-12-06T13:22:44.615Z" }, - { url = "https://files.pythonhosted.org/packages/b3/c3/243693771701a54e67ff5ccbf4c038344f429613f5643169a7befc51f007/pybase64-1.4.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4a6276bc3a3962d172a2b5aba544d89881c4037ea954517b86b00892c703d007", size = 68422, upload-time = "2025-12-06T13:22:45.641Z" }, - { url = "https://files.pythonhosted.org/packages/75/95/f987081bf6bc1d1eda3012dae1b06ad427732ef9933a632cb8b58f9917f8/pybase64-1.4.3-cp310-cp310-win32.whl", hash = "sha256:4bdd07ef017515204ee6eaab17e1ad05f83c0ccb5af8ae24a0fe6d9cb5bb0b7a", size = 33622, upload-time = "2025-12-06T13:22:47.348Z" }, - { url = "https://files.pythonhosted.org/packages/79/28/c169a769fe90128f16d394aad87b2096dd4bf2f035ae0927108a46b617df/pybase64-1.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:5db0b6bbda15110db2740c61970a8fda3bf9c93c3166a3f57f87c7865ed1125c", size = 35799, upload-time = "2025-12-06T13:22:48.731Z" }, - { url = "https://files.pythonhosted.org/packages/ab/f2/bdbe6af0bd4f3fe5bc70e77ead7f7d523bb9d3ca3ad50ac42b9adbb9ca14/pybase64-1.4.3-cp310-cp310-win_arm64.whl", hash = "sha256:f96367dfc82598569aa02b1103ebd419298293e59e1151abda2b41728703284b", size = 31158, upload-time = "2025-12-06T13:22:50.021Z" }, - { url = "https://files.pythonhosted.org/packages/2b/63/21e981e9d3f1f123e0b0ee2130112b1956cad9752309f574862c7ae77c08/pybase64-1.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:70b0d4a4d54e216ce42c2655315378b8903933ecfa32fced453989a92b4317b2", size = 38237, upload-time = "2025-12-06T13:22:52.159Z" }, - { url = "https://files.pythonhosted.org/packages/92/fb/3f448e139516404d2a3963915cc10dc9dde7d3a67de4edba2f827adfef17/pybase64-1.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8127f110cdee7a70e576c5c9c1d4e17e92e76c191869085efbc50419f4ae3c72", size = 31673, upload-time = "2025-12-06T13:22:53.241Z" }, - { url = "https://files.pythonhosted.org/packages/3c/fb/bb06a5b9885e7d853ac1e801c4d8abfdb4c8506deee33e53d55aa6690e67/pybase64-1.4.3-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f9ef0388878bc15a084bd9bf73ec1b2b4ee513d11009b1506375e10a7aae5032", size = 68331, upload-time = "2025-12-06T13:22:54.197Z" }, - { url = "https://files.pythonhosted.org/packages/64/15/8d60b9ec5e658185fc2ee3333e01a6e30d717cf677b24f47cbb3a859d13c/pybase64-1.4.3-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95a57cccf106352a72ed8bc8198f6820b16cc7d55aa3867a16dea7011ae7c218", size = 71370, upload-time = "2025-12-06T13:22:55.517Z" }, - { url = "https://files.pythonhosted.org/packages/ac/29/a3e5c1667cc8c38d025a4636855de0fc117fc62e2afeb033a3c6f12c6a22/pybase64-1.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cd1c47dfceb9c7bd3de210fb4e65904053ed2d7c9dce6d107f041ff6fbd7e21", size = 59834, upload-time = "2025-12-06T13:22:56.682Z" }, - { url = "https://files.pythonhosted.org/packages/a9/00/8ffcf9810bd23f3984698be161cf7edba656fd639b818039a7be1d6405d4/pybase64-1.4.3-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:9fe9922698f3e2f72874b26890d53a051c431d942701bb3a37aae94da0b12107", size = 56652, upload-time = "2025-12-06T13:22:57.724Z" }, - { url = "https://files.pythonhosted.org/packages/81/62/379e347797cdea4ab686375945bc77ad8d039c688c0d4d0cfb09d247beb9/pybase64-1.4.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:af5f4bd29c86b59bb4375e0491d16ec8a67548fa99c54763aaedaf0b4b5a6632", size = 59382, upload-time = "2025-12-06T13:22:58.758Z" }, - { url = "https://files.pythonhosted.org/packages/c6/f2/9338ffe2f487086f26a2c8ca175acb3baa86fce0a756ff5670a0822bb877/pybase64-1.4.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c302f6ca7465262908131411226e02100f488f531bb5e64cb901aa3f439bccd9", size = 59990, upload-time = "2025-12-06T13:23:01.007Z" }, - { url = "https://files.pythonhosted.org/packages/f9/a4/85a6142b65b4df8625b337727aa81dc199642de3d09677804141df6ee312/pybase64-1.4.3-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:2f3f439fa4d7fde164ebbbb41968db7d66b064450ab6017c6c95cef0afa2b349", size = 54923, upload-time = "2025-12-06T13:23:02.369Z" }, - { url = "https://files.pythonhosted.org/packages/ac/00/e40215d25624012bf5b7416ca37f168cb75f6dd15acdb91ea1f2ea4dc4e7/pybase64-1.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a23c6866551043f8b681a5e1e0d59469148b2920a3b4fc42b1275f25ea4217a", size = 58664, upload-time = "2025-12-06T13:23:03.378Z" }, - { url = "https://files.pythonhosted.org/packages/b0/73/d7e19a63e795c13837f2356268d95dc79d1180e756f57ced742a1e52fdeb/pybase64-1.4.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:56e6526f8565642abc5f84338cc131ce298a8ccab696b19bdf76fa6d7dc592ef", size = 52338, upload-time = "2025-12-06T13:23:04.458Z" }, - { url = "https://files.pythonhosted.org/packages/f2/32/3c746d7a310b69bdd9df77ffc85c41b80bce00a774717596f869b0d4a20e/pybase64-1.4.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6a792a8b9d866ffa413c9687d9b611553203753987a3a582d68cbc51cf23da45", size = 68993, upload-time = "2025-12-06T13:23:05.526Z" }, - { url = "https://files.pythonhosted.org/packages/5d/b3/63cec68f9d6f6e4c0b438d14e5f1ef536a5fe63ce14b70733ac5e31d7ab8/pybase64-1.4.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:62ad29a5026bb22cfcd1ca484ec34b0a5ced56ddba38ceecd9359b2818c9c4f9", size = 58055, upload-time = "2025-12-06T13:23:06.931Z" }, - { url = "https://files.pythonhosted.org/packages/d5/cb/7acf7c3c06f9692093c07f109668725dc37fb9a3df0fa912b50add645195/pybase64-1.4.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:11b9d1d2d32ec358c02214363b8fc3651f6be7dd84d880ecd597a6206a80e121", size = 54430, upload-time = "2025-12-06T13:23:07.936Z" }, - { url = "https://files.pythonhosted.org/packages/33/39/4eb33ff35d173bfff4002e184ce8907f5d0a42d958d61cd9058ef3570179/pybase64-1.4.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0aebaa7f238caa0a0d373616016e2040c6c879ebce3ba7ab3c59029920f13640", size = 56272, upload-time = "2025-12-06T13:23:09.253Z" }, - { url = "https://files.pythonhosted.org/packages/19/97/a76d65c375a254e65b730c6f56bf528feca91305da32eceab8bcc08591e6/pybase64-1.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e504682b20c63c2b0c000e5f98a80ea867f8d97642e042a5a39818e44ba4d599", size = 70904, upload-time = "2025-12-06T13:23:10.336Z" }, - { url = "https://files.pythonhosted.org/packages/5e/2c/8338b6d3da3c265002839e92af0a80d6db88385c313c73f103dfb800c857/pybase64-1.4.3-cp311-cp311-win32.whl", hash = "sha256:e9a8b81984e3c6fb1db9e1614341b0a2d98c0033d693d90c726677db1ffa3a4c", size = 33639, upload-time = "2025-12-06T13:23:11.9Z" }, - { url = "https://files.pythonhosted.org/packages/39/dc/32efdf2f5927e5449cc341c266a1bbc5fecd5319a8807d9c5405f76e6d02/pybase64-1.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:a90a8fa16a901fabf20de824d7acce07586e6127dc2333f1de05f73b1f848319", size = 35797, upload-time = "2025-12-06T13:23:13.174Z" }, - { url = "https://files.pythonhosted.org/packages/da/59/eda4f9cb0cbce5a45f0cd06131e710674f8123a4d570772c5b9694f88559/pybase64-1.4.3-cp311-cp311-win_arm64.whl", hash = "sha256:61d87de5bc94d143622e94390ec3e11b9c1d4644fe9be3a81068ab0f91056f59", size = 31160, upload-time = "2025-12-06T13:23:15.696Z" }, - { url = "https://files.pythonhosted.org/packages/86/a7/efcaa564f091a2af7f18a83c1c4875b1437db56ba39540451dc85d56f653/pybase64-1.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:18d85e5ab8b986bb32d8446aca6258ed80d1bafe3603c437690b352c648f5967", size = 38167, upload-time = "2025-12-06T13:23:16.821Z" }, - { url = "https://files.pythonhosted.org/packages/db/c7/c7ad35adff2d272bf2930132db2b3eea8c44bb1b1f64eb9b2b8e57cde7b4/pybase64-1.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3f5791a3491d116d0deaf4d83268f48792998519698f8751efb191eac84320e9", size = 31673, upload-time = "2025-12-06T13:23:17.835Z" }, - { url = "https://files.pythonhosted.org/packages/43/1b/9a8cab0042b464e9a876d5c65fe5127445a2436da36fda64899b119b1a1b/pybase64-1.4.3-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f0b3f200c3e06316f6bebabd458b4e4bcd4c2ca26af7c0c766614d91968dee27", size = 68210, upload-time = "2025-12-06T13:23:18.813Z" }, - { url = "https://files.pythonhosted.org/packages/62/f7/965b79ff391ad208b50e412b5d3205ccce372a2d27b7218ae86d5295b105/pybase64-1.4.3-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb632edfd132b3eaf90c39c89aa314beec4e946e210099b57d40311f704e11d4", size = 71599, upload-time = "2025-12-06T13:23:20.195Z" }, - { url = "https://files.pythonhosted.org/packages/03/4b/a3b5175130b3810bbb8ccfa1edaadbd3afddb9992d877c8a1e2f274b476e/pybase64-1.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:356ef1d74648ce997f5a777cf8f1aefecc1c0b4fe6201e0ef3ec8a08170e1b54", size = 59922, upload-time = "2025-12-06T13:23:21.487Z" }, - { url = "https://files.pythonhosted.org/packages/da/5d/c38d1572027fc601b62d7a407721688b04b4d065d60ca489912d6893e6cf/pybase64-1.4.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:c48361f90db32bacaa5518419d4eb9066ba558013aaf0c7781620279ecddaeb9", size = 56712, upload-time = "2025-12-06T13:23:22.77Z" }, - { url = "https://files.pythonhosted.org/packages/e7/d4/4e04472fef485caa8f561d904d4d69210a8f8fc1608ea15ebd9012b92655/pybase64-1.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:702bcaa16ae02139d881aeaef5b1c8ffb4a3fae062fe601d1e3835e10310a517", size = 59300, upload-time = "2025-12-06T13:23:24.543Z" }, - { url = "https://files.pythonhosted.org/packages/86/e7/16e29721b86734b881d09b7e23dfd7c8408ad01a4f4c7525f3b1088e25ec/pybase64-1.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:53d0ffe1847b16b647c6413d34d1de08942b7724273dd57e67dcbdb10c574045", size = 60278, upload-time = "2025-12-06T13:23:25.608Z" }, - { url = "https://files.pythonhosted.org/packages/b1/02/18515f211d7c046be32070709a8efeeef8a0203de4fd7521e6b56404731b/pybase64-1.4.3-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:9a1792e8b830a92736dae58f0c386062eb038dfe8004fb03ba33b6083d89cd43", size = 54817, upload-time = "2025-12-06T13:23:26.633Z" }, - { url = "https://files.pythonhosted.org/packages/e7/be/14e29d8e1a481dbff151324c96dd7b5d2688194bb65dc8a00ca0e1ad1e86/pybase64-1.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1d468b1b1ac5ad84875a46eaa458663c3721e8be5f155ade356406848d3701f6", size = 58611, upload-time = "2025-12-06T13:23:27.684Z" }, - { url = "https://files.pythonhosted.org/packages/b4/8a/a2588dfe24e1bbd742a554553778ab0d65fdf3d1c9a06d10b77047d142aa/pybase64-1.4.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e97b7bdbd62e71898cd542a6a9e320d9da754ff3ebd02cb802d69087ee94d468", size = 52404, upload-time = "2025-12-06T13:23:28.714Z" }, - { url = "https://files.pythonhosted.org/packages/27/fc/afcda7445bebe0cbc38cafdd7813234cdd4fc5573ff067f1abf317bb0cec/pybase64-1.4.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b33aeaa780caaa08ffda87fc584d5eab61e3d3bbb5d86ead02161dc0c20d04bc", size = 68817, upload-time = "2025-12-06T13:23:30.079Z" }, - { url = "https://files.pythonhosted.org/packages/d3/3a/87c3201e555ed71f73e961a787241a2438c2bbb2ca8809c29ddf938a3157/pybase64-1.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c0efcf78f11cf866bed49caa7b97552bc4855a892f9cc2372abcd3ed0056f0d", size = 57854, upload-time = "2025-12-06T13:23:31.17Z" }, - { url = "https://files.pythonhosted.org/packages/fd/7d/931c2539b31a7b375e7d595b88401eeb5bd6c5ce1059c9123f9b608aaa14/pybase64-1.4.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:66e3791f2ed725a46593f8bd2761ff37d01e2cdad065b1dceb89066f476e50c6", size = 54333, upload-time = "2025-12-06T13:23:32.422Z" }, - { url = "https://files.pythonhosted.org/packages/de/5e/537601e02cc01f27e9d75f440f1a6095b8df44fc28b1eef2cd739aea8cec/pybase64-1.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:72bb0b6bddadab26e1b069bb78e83092711a111a80a0d6b9edcb08199ad7299b", size = 56492, upload-time = "2025-12-06T13:23:33.515Z" }, - { url = "https://files.pythonhosted.org/packages/96/97/2a2e57acf8f5c9258d22aba52e71f8050e167b29ed2ee1113677c1b600c1/pybase64-1.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5b3365dbcbcdb0a294f0f50af0c0a16b27a232eddeeb0bceeefd844ef30d2a23", size = 70974, upload-time = "2025-12-06T13:23:36.27Z" }, - { url = "https://files.pythonhosted.org/packages/75/2e/a9e28941c6dab6f06e6d3f6783d3373044be9b0f9a9d3492c3d8d2260ac0/pybase64-1.4.3-cp312-cp312-win32.whl", hash = "sha256:7bca1ed3a5df53305c629ca94276966272eda33c0d71f862d2d3d043f1e1b91a", size = 33686, upload-time = "2025-12-06T13:23:37.848Z" }, - { url = "https://files.pythonhosted.org/packages/83/e3/507ab649d8c3512c258819c51d25c45d6e29d9ca33992593059e7b646a33/pybase64-1.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:9f2da8f56d9b891b18b4daf463a0640eae45a80af548ce435be86aa6eff3603b", size = 35833, upload-time = "2025-12-06T13:23:38.877Z" }, - { url = "https://files.pythonhosted.org/packages/bc/8a/6eba66cd549a2fc74bb4425fd61b839ba0ab3022d3c401b8a8dc2cc00c7a/pybase64-1.4.3-cp312-cp312-win_arm64.whl", hash = "sha256:0631d8a2d035de03aa9bded029b9513e1fee8ed80b7ddef6b8e9389ffc445da0", size = 31185, upload-time = "2025-12-06T13:23:39.908Z" }, - { url = "https://files.pythonhosted.org/packages/3a/50/b7170cb2c631944388fe2519507fe3835a4054a6a12a43f43781dae82be1/pybase64-1.4.3-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:ea4b785b0607d11950b66ce7c328f452614aefc9c6d3c9c28bae795dc7f072e1", size = 33901, upload-time = "2025-12-06T13:23:40.951Z" }, - { url = "https://files.pythonhosted.org/packages/48/8b/69f50578e49c25e0a26e3ee72c39884ff56363344b79fc3967f5af420ed6/pybase64-1.4.3-cp313-cp313-android_21_x86_64.whl", hash = "sha256:6a10b6330188c3026a8b9c10e6b9b3f2e445779cf16a4c453d51a072241c65a2", size = 40807, upload-time = "2025-12-06T13:23:42.006Z" }, - { url = "https://files.pythonhosted.org/packages/5c/8d/20b68f11adfc4c22230e034b65c71392e3e338b413bf713c8945bd2ccfb3/pybase64-1.4.3-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:27fdff227a0c0e182e0ba37a99109645188978b920dfb20d8b9c17eeee370d0d", size = 30932, upload-time = "2025-12-06T13:23:43.348Z" }, - { url = "https://files.pythonhosted.org/packages/f7/79/b1b550ac6bff51a4880bf6e089008b2e1ca16f2c98db5e039a08ac3ad157/pybase64-1.4.3-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:2a8204f1fdfec5aa4184249b51296c0de95445869920c88123978304aad42df1", size = 31394, upload-time = "2025-12-06T13:23:44.317Z" }, - { url = "https://files.pythonhosted.org/packages/82/70/b5d7c5932bf64ee1ec5da859fbac981930b6a55d432a603986c7f509c838/pybase64-1.4.3-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:874fc2a3777de6baf6aa921a7aa73b3be98295794bea31bd80568a963be30767", size = 38078, upload-time = "2025-12-06T13:23:45.348Z" }, - { url = "https://files.pythonhosted.org/packages/56/fe/e66fe373bce717c6858427670736d54297938dad61c5907517ab4106bd90/pybase64-1.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2dc64a94a9d936b8e3449c66afabbaa521d3cc1a563d6bbaaa6ffa4535222e4b", size = 38158, upload-time = "2025-12-06T13:23:46.872Z" }, - { url = "https://files.pythonhosted.org/packages/80/a9/b806ed1dcc7aed2ea3dd4952286319e6f3a8b48615c8118f453948e01999/pybase64-1.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e48f86de1c145116ccf369a6e11720ce696c2ec02d285f440dfb57ceaa0a6cb4", size = 31672, upload-time = "2025-12-06T13:23:47.88Z" }, - { url = "https://files.pythonhosted.org/packages/1c/c9/24b3b905cf75e23a9a4deaf203b35ffcb9f473ac0e6d8257f91a05dfce62/pybase64-1.4.3-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:1d45c8fe8fe82b65c36b227bb4a2cf623d9ada16bed602ce2d3e18c35285b72a", size = 68244, upload-time = "2025-12-06T13:23:49.026Z" }, - { url = "https://files.pythonhosted.org/packages/f8/cd/d15b0c3e25e5859fab0416dc5b96d34d6bd2603c1c96a07bb2202b68ab92/pybase64-1.4.3-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ad70c26ba091d8f5167e9d4e1e86a0483a5414805cdb598a813db635bd3be8b8", size = 71620, upload-time = "2025-12-06T13:23:50.081Z" }, - { url = "https://files.pythonhosted.org/packages/0d/31/4ca953cc3dcde2b3711d6bfd70a6f4ad2ca95a483c9698076ba605f1520f/pybase64-1.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e98310b7c43145221e7194ac9fa7fffc84763c87bfc5e2f59f9f92363475bdc1", size = 59930, upload-time = "2025-12-06T13:23:51.68Z" }, - { url = "https://files.pythonhosted.org/packages/60/55/e7f7bdcd0fd66e61dda08db158ffda5c89a306bbdaaf5a062fbe4e48f4a1/pybase64-1.4.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:398685a76034e91485a28aeebcb49e64cd663212fd697b2497ac6dfc1df5e671", size = 56425, upload-time = "2025-12-06T13:23:52.732Z" }, - { url = "https://files.pythonhosted.org/packages/cb/65/b592c7f921e51ca1aca3af5b0d201a98666d0a36b930ebb67e7c2ed27395/pybase64-1.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7e46400a6461187ccb52ed75b0045d937529e801a53a9cd770b350509f9e4d50", size = 59327, upload-time = "2025-12-06T13:23:53.856Z" }, - { url = "https://files.pythonhosted.org/packages/23/95/1613d2fb82dbb1548595ad4179f04e9a8451bfa18635efce18b631eabe3f/pybase64-1.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:1b62b9f2f291d94f5e0b76ab499790b7dcc78a009d4ceea0b0428770267484b6", size = 60294, upload-time = "2025-12-06T13:23:54.937Z" }, - { url = "https://files.pythonhosted.org/packages/9d/73/40431f37f7d1b3eab4673e7946ff1e8f5d6bd425ec257e834dae8a6fc7b0/pybase64-1.4.3-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:f30ceb5fa4327809dede614be586efcbc55404406d71e1f902a6fdcf322b93b2", size = 54858, upload-time = "2025-12-06T13:23:56.031Z" }, - { url = "https://files.pythonhosted.org/packages/a7/84/f6368bcaf9f743732e002a9858646fd7a54f428490d427dd6847c5cfe89e/pybase64-1.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0d5f18ed53dfa1d4cf8b39ee542fdda8e66d365940e11f1710989b3cf4a2ed66", size = 58629, upload-time = "2025-12-06T13:23:57.12Z" }, - { url = "https://files.pythonhosted.org/packages/43/75/359532f9adb49c6b546cafc65c46ed75e2ccc220d514ba81c686fbd83965/pybase64-1.4.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:119d31aa4b58b85a8ebd12b63c07681a138c08dfc2fe5383459d42238665d3eb", size = 52448, upload-time = "2025-12-06T13:23:58.298Z" }, - { url = "https://files.pythonhosted.org/packages/92/6c/ade2ba244c3f33ed920a7ed572ad772eb0b5f14480b72d629d0c9e739a40/pybase64-1.4.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:3cf0218b0e2f7988cf7d738a73b6a1d14f3be6ce249d7c0f606e768366df2cce", size = 68841, upload-time = "2025-12-06T13:23:59.886Z" }, - { url = "https://files.pythonhosted.org/packages/a0/51/b345139cd236be382f2d4d4453c21ee6299e14d2f759b668e23080f8663f/pybase64-1.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:12f4ee5e988bc5c0c1106b0d8fc37fb0508f12dab76bac1b098cb500d148da9d", size = 57910, upload-time = "2025-12-06T13:24:00.994Z" }, - { url = "https://files.pythonhosted.org/packages/1a/b8/9f84bdc4f1c4f0052489396403c04be2f9266a66b70c776001eaf0d78c1f/pybase64-1.4.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:937826bc7b6b95b594a45180e81dd4d99bd4dd4814a443170e399163f7ff3fb6", size = 54335, upload-time = "2025-12-06T13:24:02.046Z" }, - { url = "https://files.pythonhosted.org/packages/d0/c7/be63b617d284de46578a366da77ede39c8f8e815ed0d82c7c2acca560fab/pybase64-1.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:88995d1460971ef80b13e3e007afbe4b27c62db0508bc7250a2ab0a0b4b91362", size = 56486, upload-time = "2025-12-06T13:24:03.141Z" }, - { url = "https://files.pythonhosted.org/packages/5e/96/f252c8f9abd6ded3ef1ccd3cdbb8393a33798007f761b23df8de1a2480e6/pybase64-1.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:72326fe163385ed3e1e806dd579d47fde5d8a59e51297a60fc4e6cbc1b4fc4ed", size = 70978, upload-time = "2025-12-06T13:24:04.221Z" }, - { url = "https://files.pythonhosted.org/packages/af/51/0f5714af7aeef96e30f968e4371d75ad60558aaed3579d7c6c8f1c43c18a/pybase64-1.4.3-cp313-cp313-win32.whl", hash = "sha256:b1623730c7892cf5ed0d6355e375416be6ef8d53ab9b284f50890443175c0ac3", size = 33684, upload-time = "2025-12-06T13:24:05.29Z" }, - { url = "https://files.pythonhosted.org/packages/b6/ad/0cea830a654eb08563fb8214150ef57546ece1cc421c09035f0e6b0b5ea9/pybase64-1.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:8369887590f1646a5182ca2fb29252509da7ae31d4923dbb55d3e09da8cc4749", size = 35832, upload-time = "2025-12-06T13:24:06.35Z" }, - { url = "https://files.pythonhosted.org/packages/b4/0d/eec2a8214989c751bc7b4cad1860eb2c6abf466e76b77508c0f488c96a37/pybase64-1.4.3-cp313-cp313-win_arm64.whl", hash = "sha256:860b86bca71e5f0237e2ab8b2d9c4c56681f3513b1bf3e2117290c1963488390", size = 31175, upload-time = "2025-12-06T13:24:07.419Z" }, - { url = "https://files.pythonhosted.org/packages/db/c9/e23463c1a2913686803ef76b1a5ae7e6fac868249a66e48253d17ad7232c/pybase64-1.4.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:eb51db4a9c93215135dccd1895dca078e8785c357fabd983c9f9a769f08989a9", size = 38497, upload-time = "2025-12-06T13:24:08.873Z" }, - { url = "https://files.pythonhosted.org/packages/71/83/343f446b4b7a7579bf6937d2d013d82f1a63057cf05558e391ab6039d7db/pybase64-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a03ef3f529d85fd46b89971dfb00c634d53598d20ad8908fb7482955c710329d", size = 32076, upload-time = "2025-12-06T13:24:09.975Z" }, - { url = "https://files.pythonhosted.org/packages/46/fc/cb64964c3b29b432f54d1bce5e7691d693e33bbf780555151969ffd95178/pybase64-1.4.3-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:2e745f2ce760c6cf04d8a72198ef892015ddb89f6ceba489e383518ecbdb13ab", size = 72317, upload-time = "2025-12-06T13:24:11.129Z" }, - { url = "https://files.pythonhosted.org/packages/0a/b7/fab2240da6f4e1ad46f71fa56ec577613cf5df9dce2d5b4cfaa4edd0e365/pybase64-1.4.3-cp313-cp313t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fac217cd9de8581a854b0ac734c50fd1fa4b8d912396c1fc2fce7c230efe3a7", size = 75534, upload-time = "2025-12-06T13:24:12.433Z" }, - { url = "https://files.pythonhosted.org/packages/91/3b/3e2f2b6e68e3d83ddb9fa799f3548fb7449765daec9bbd005a9fbe296d7f/pybase64-1.4.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:da1ee8fa04b283873de2d6e8fa5653e827f55b86bdf1a929c5367aaeb8d26f8a", size = 65399, upload-time = "2025-12-06T13:24:13.928Z" }, - { url = "https://files.pythonhosted.org/packages/6b/08/476ac5914c3b32e0274a2524fc74f01cbf4f4af4513d054e41574eb018f6/pybase64-1.4.3-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:b0bf8e884ee822ca7b1448eeb97fa131628fe0ff42f60cae9962789bd562727f", size = 60487, upload-time = "2025-12-06T13:24:15.177Z" }, - { url = "https://files.pythonhosted.org/packages/f1/b8/618a92915330cc9cba7880299b546a1d9dab1a21fd6c0292ee44a4fe608c/pybase64-1.4.3-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1bf749300382a6fd1f4f255b183146ef58f8e9cb2f44a077b3a9200dfb473a77", size = 63959, upload-time = "2025-12-06T13:24:16.854Z" }, - { url = "https://files.pythonhosted.org/packages/a5/52/af9d8d051652c3051862c442ec3861259c5cdb3fc69774bc701470bd2a59/pybase64-1.4.3-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:153a0e42329b92337664cfc356f2065248e6c9a1bd651bbcd6dcaf15145d3f06", size = 64874, upload-time = "2025-12-06T13:24:18.328Z" }, - { url = "https://files.pythonhosted.org/packages/e4/51/5381a7adf1f381bd184d33203692d3c57cf8ae9f250f380c3fecbdbe554b/pybase64-1.4.3-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:86ee56ac7f2184ca10217ed1c655c1a060273e233e692e9086da29d1ae1768db", size = 58572, upload-time = "2025-12-06T13:24:19.417Z" }, - { url = "https://files.pythonhosted.org/packages/e0/f0/578ee4ffce5818017de4fdf544e066c225bc435e73eb4793cde28a689d0b/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0e71a4db76726bf830b47477e7d830a75c01b2e9b01842e787a0836b0ba741e3", size = 63636, upload-time = "2025-12-06T13:24:20.497Z" }, - { url = "https://files.pythonhosted.org/packages/b9/ad/8ae94814bf20159ea06310b742433e53d5820aa564c9fdf65bf2d79f8799/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2ba7799ec88540acd9861b10551d24656ca3c2888ecf4dba2ee0a71544a8923f", size = 56193, upload-time = "2025-12-06T13:24:21.559Z" }, - { url = "https://files.pythonhosted.org/packages/d1/31/6438cfcc3d3f0fa84d229fa125c243d5094e72628e525dfefadf3bcc6761/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2860299e4c74315f5951f0cf3e72ba0f201c3356c8a68f95a3ab4e620baf44e9", size = 72655, upload-time = "2025-12-06T13:24:22.673Z" }, - { url = "https://files.pythonhosted.org/packages/a3/0d/2bbc9e9c3fc12ba8a6e261482f03a544aca524f92eae0b4908c0a10ba481/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:bb06015db9151f0c66c10aae8e3603adab6b6cd7d1f7335a858161d92fc29618", size = 62471, upload-time = "2025-12-06T13:24:23.8Z" }, - { url = "https://files.pythonhosted.org/packages/2c/0b/34d491e7f49c1dbdb322ea8da6adecda7c7cd70b6644557c6e4ca5c6f7c7/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:242512a070817272865d37c8909059f43003b81da31f616bb0c391ceadffe067", size = 58119, upload-time = "2025-12-06T13:24:24.994Z" }, - { url = "https://files.pythonhosted.org/packages/ce/17/c21d0cde2a6c766923ae388fc1f78291e1564b0d38c814b5ea8a0e5e081c/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5d8277554a12d3e3eed6180ebda62786bf9fc8d7bb1ee00244258f4a87ca8d20", size = 60791, upload-time = "2025-12-06T13:24:26.046Z" }, - { url = "https://files.pythonhosted.org/packages/92/b2/eaa67038916a48de12b16f4c384bcc1b84b7ec731b23613cb05f27673294/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f40b7ddd698fc1e13a4b64fbe405e4e0e1279e8197e37050e24154655f5f7c4e", size = 74701, upload-time = "2025-12-06T13:24:27.466Z" }, - { url = "https://files.pythonhosted.org/packages/42/10/abb7757c330bb869ebb95dab0c57edf5961ffbd6c095c8209cbbf75d117d/pybase64-1.4.3-cp313-cp313t-win32.whl", hash = "sha256:46d75c9387f354c5172582a9eaae153b53a53afeb9c19fcf764ea7038be3bd8b", size = 33965, upload-time = "2025-12-06T13:24:28.548Z" }, - { url = "https://files.pythonhosted.org/packages/63/a0/2d4e5a59188e9e6aed0903d580541aaea72dcbbab7bf50fb8b83b490b6c3/pybase64-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:d7344625591d281bec54e85cbfdab9e970f6219cac1570f2aa140b8c942ccb81", size = 36207, upload-time = "2025-12-06T13:24:29.646Z" }, - { url = "https://files.pythonhosted.org/packages/1f/05/95b902e8f567b4d4b41df768ccc438af618f8d111e54deaf57d2df46bd76/pybase64-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:28a3c60c55138e0028313f2eccd321fec3c4a0be75e57a8d3eb883730b1b0880", size = 31505, upload-time = "2025-12-06T13:24:30.687Z" }, - { url = "https://files.pythonhosted.org/packages/e4/80/4bd3dff423e5a91f667ca41982dc0b79495b90ec0c0f5d59aca513e50f8c/pybase64-1.4.3-cp314-cp314-android_24_arm64_v8a.whl", hash = "sha256:015bb586a1ea1467f69d57427abe587469392215f59db14f1f5c39b52fdafaf5", size = 33835, upload-time = "2025-12-06T13:24:31.767Z" }, - { url = "https://files.pythonhosted.org/packages/45/60/a94d94cc1e3057f602e0b483c9ebdaef40911d84a232647a2fe593ab77bb/pybase64-1.4.3-cp314-cp314-android_24_x86_64.whl", hash = "sha256:d101e3a516f837c3dcc0e5a0b7db09582ebf99ed670865223123fb2e5839c6c0", size = 40673, upload-time = "2025-12-06T13:24:32.82Z" }, - { url = "https://files.pythonhosted.org/packages/e3/71/cf62b261d431857e8e054537a5c3c24caafa331de30daede7b2c6c558501/pybase64-1.4.3-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:8f183ac925a48046abe047360fe3a1b28327afb35309892132fe1915d62fb282", size = 30939, upload-time = "2025-12-06T13:24:34.001Z" }, - { url = "https://files.pythonhosted.org/packages/24/3e/d12f92a3c1f7c6ab5d53c155bff9f1084ba997a37a39a4f781ccba9455f3/pybase64-1.4.3-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30bf3558e24dcce4da5248dcf6d73792adfcf4f504246967e9db155be4c439ad", size = 31401, upload-time = "2025-12-06T13:24:35.11Z" }, - { url = "https://files.pythonhosted.org/packages/9b/3d/9c27440031fea0d05146f8b70a460feb95d8b4e3d9ca8f45c972efb4c3d3/pybase64-1.4.3-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:a674b419de318d2ce54387dd62646731efa32b4b590907800f0bd40675c1771d", size = 38075, upload-time = "2025-12-06T13:24:36.53Z" }, - { url = "https://files.pythonhosted.org/packages/4b/d4/6c0e0cf0efd53c254173fbcd84a3d8fcbf5e0f66622473da425becec32a5/pybase64-1.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:720104fd7303d07bac302be0ff8f7f9f126f2f45c1edb4f48fdb0ff267e69fe1", size = 38257, upload-time = "2025-12-06T13:24:38.049Z" }, - { url = "https://files.pythonhosted.org/packages/50/eb/27cb0b610d5cd70f5ad0d66c14ad21c04b8db930f7139818e8fbdc14df4d/pybase64-1.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:83f1067f73fa5afbc3efc0565cecc6ed53260eccddef2ebe43a8ce2b99ea0e0a", size = 31685, upload-time = "2025-12-06T13:24:40.327Z" }, - { url = "https://files.pythonhosted.org/packages/db/26/b136a4b65e5c94ff06217f7726478df3f31ab1c777c2c02cf698e748183f/pybase64-1.4.3-cp314-cp314-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:b51204d349a4b208287a8aa5b5422be3baa88abf6cc8ff97ccbda34919bbc857", size = 68460, upload-time = "2025-12-06T13:24:41.735Z" }, - { url = "https://files.pythonhosted.org/packages/68/6d/84ce50e7ee1ae79984d689e05a9937b2460d4efa1e5b202b46762fb9036c/pybase64-1.4.3-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:30f2fd53efecbdde4bdca73a872a68dcb0d1bf8a4560c70a3e7746df973e1ef3", size = 71688, upload-time = "2025-12-06T13:24:42.908Z" }, - { url = "https://files.pythonhosted.org/packages/e3/57/6743e420416c3ff1b004041c85eb0ebd9c50e9cf05624664bfa1dc8b5625/pybase64-1.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0932b0c5cfa617091fd74f17d24549ce5de3628791998c94ba57be808078eeaf", size = 60040, upload-time = "2025-12-06T13:24:44.37Z" }, - { url = "https://files.pythonhosted.org/packages/3b/68/733324e28068a89119af2921ce548e1c607cc5c17d354690fc51c302e326/pybase64-1.4.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:acb61f5ab72bec808eb0d4ce8b87ec9f38d7d750cb89b1371c35eb8052a29f11", size = 56478, upload-time = "2025-12-06T13:24:45.815Z" }, - { url = "https://files.pythonhosted.org/packages/b5/9e/f3f4aa8cfe3357a3cdb0535b78eb032b671519d3ecc08c58c4c6b72b5a91/pybase64-1.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:2bc2d5bc15168f5c04c53bdfe5a1e543b2155f456ed1e16d7edce9ce73842021", size = 59463, upload-time = "2025-12-06T13:24:46.938Z" }, - { url = "https://files.pythonhosted.org/packages/aa/d1/53286038e1f0df1cf58abcf4a4a91b0f74ab44539c2547b6c31001ddd054/pybase64-1.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:8a7bc3cd23880bdca59758bcdd6f4ef0674f2393782763910a7466fab35ccb98", size = 60360, upload-time = "2025-12-06T13:24:48.039Z" }, - { url = "https://files.pythonhosted.org/packages/00/9a/5cc6ce95db2383d27ff4d790b8f8b46704d360d701ab77c4f655bcfaa6a7/pybase64-1.4.3-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:ad15acf618880d99792d71e3905b0e2508e6e331b76a1b34212fa0f11e01ad28", size = 54999, upload-time = "2025-12-06T13:24:49.547Z" }, - { url = "https://files.pythonhosted.org/packages/64/e7/c3c1d09c3d7ae79e3aa1358c6d912d6b85f29281e47aa94fc0122a415a2f/pybase64-1.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448158d417139cb4851200e5fee62677ae51f56a865d50cda9e0d61bda91b116", size = 58736, upload-time = "2025-12-06T13:24:50.641Z" }, - { url = "https://files.pythonhosted.org/packages/db/d5/0baa08e3d8119b15b588c39f0d39fd10472f0372e3c54ca44649cbefa256/pybase64-1.4.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:9058c49b5a2f3e691b9db21d37eb349e62540f9f5fc4beabf8cbe3c732bead86", size = 52298, upload-time = "2025-12-06T13:24:51.791Z" }, - { url = "https://files.pythonhosted.org/packages/00/87/fc6f11474a1de7e27cd2acbb8d0d7508bda3efa73dfe91c63f968728b2a3/pybase64-1.4.3-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ce561724f6522907a66303aca27dce252d363fcd85884972d348f4403ba3011a", size = 69049, upload-time = "2025-12-06T13:24:53.253Z" }, - { url = "https://files.pythonhosted.org/packages/69/9d/7fb5566f669ac18b40aa5fc1c438e24df52b843c1bdc5da47d46d4c1c630/pybase64-1.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:63316560a94ac449fe86cb8b9e0a13714c659417e92e26a5cbf085cd0a0c838d", size = 57952, upload-time = "2025-12-06T13:24:54.342Z" }, - { url = "https://files.pythonhosted.org/packages/de/cc/ceb949232dbbd3ec4ee0190d1df4361296beceee9840390a63df8bc31784/pybase64-1.4.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:7ecd796f2ac0be7b73e7e4e232b8c16422014de3295d43e71d2b19fd4a4f5368", size = 54484, upload-time = "2025-12-06T13:24:55.774Z" }, - { url = "https://files.pythonhosted.org/packages/a7/69/659f3c8e6a5d7b753b9c42a4bd9c42892a0f10044e9c7351a4148d413a33/pybase64-1.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d01e102a12fb2e1ed3dc11611c2818448626637857ec3994a9cf4809dfd23477", size = 56542, upload-time = "2025-12-06T13:24:57Z" }, - { url = "https://files.pythonhosted.org/packages/85/2c/29c9e6c9c82b72025f9676f9e82eb1fd2339ad038cbcbf8b9e2ac02798fc/pybase64-1.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ebff797a93c2345f22183f454fd8607a34d75eca5a3a4a969c1c75b304cee39d", size = 71045, upload-time = "2025-12-06T13:24:58.179Z" }, - { url = "https://files.pythonhosted.org/packages/b9/84/5a3dce8d7a0040a5c0c14f0fe1311cd8db872913fa04438071b26b0dac04/pybase64-1.4.3-cp314-cp314-win32.whl", hash = "sha256:28b2a1bb0828c0595dc1ea3336305cd97ff85b01c00d81cfce4f92a95fb88f56", size = 34200, upload-time = "2025-12-06T13:24:59.956Z" }, - { url = "https://files.pythonhosted.org/packages/57/bc/ce7427c12384adee115b347b287f8f3cf65860b824d74fe2c43e37e81c1f/pybase64-1.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:33338d3888700ff68c3dedfcd49f99bfc3b887570206130926791e26b316b029", size = 36323, upload-time = "2025-12-06T13:25:01.708Z" }, - { url = "https://files.pythonhosted.org/packages/9a/1b/2b8ffbe9a96eef7e3f6a5a7be75995eebfb6faaedc85b6da6b233e50c778/pybase64-1.4.3-cp314-cp314-win_arm64.whl", hash = "sha256:62725669feb5acb186458da2f9353e88ae28ef66bb9c4c8d1568b12a790dfa94", size = 31584, upload-time = "2025-12-06T13:25:02.801Z" }, - { url = "https://files.pythonhosted.org/packages/ac/d8/6824c2e6fb45b8fa4e7d92e3c6805432d5edc7b855e3e8e1eedaaf6efb7c/pybase64-1.4.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:153fe29be038948d9372c3e77ae7d1cab44e4ba7d9aaf6f064dbeea36e45b092", size = 38601, upload-time = "2025-12-06T13:25:04.222Z" }, - { url = "https://files.pythonhosted.org/packages/ea/e5/10d2b3a4ad3a4850be2704a2f70cd9c0cf55725c8885679872d3bc846c67/pybase64-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f7fe3decaa7c4a9e162327ec7bd81ce183d2b16f23c6d53b606649c6e0203e9e", size = 32078, upload-time = "2025-12-06T13:25:05.362Z" }, - { url = "https://files.pythonhosted.org/packages/43/04/8b15c34d3c2282f1c1b0850f1113a249401b618a382646a895170bc9b5e7/pybase64-1.4.3-cp314-cp314t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:a5ae04ea114c86eb1da1f6e18d75f19e3b5ae39cb1d8d3cd87c29751a6a22780", size = 72474, upload-time = "2025-12-06T13:25:06.434Z" }, - { url = "https://files.pythonhosted.org/packages/42/00/f34b4d11278f8fdc68bc38f694a91492aa318f7c6f1bd7396197ac0f8b12/pybase64-1.4.3-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1755b3dce3a2a5c7d17ff6d4115e8bee4a1d5aeae74469db02e47c8f477147da", size = 75706, upload-time = "2025-12-06T13:25:07.636Z" }, - { url = "https://files.pythonhosted.org/packages/bb/5d/71747d4ad7fe16df4c4c852bdbdeb1f2cf35677b48d7c34d3011a7a6ad3a/pybase64-1.4.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb852f900e27ffc4ec1896817535a0fa19610ef8875a096b59f21d0aa42ff172", size = 65589, upload-time = "2025-12-06T13:25:08.809Z" }, - { url = "https://files.pythonhosted.org/packages/49/b1/d1e82bd58805bb5a3a662864800bab83a83a36ba56e7e3b1706c708002a5/pybase64-1.4.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:9cf21ea8c70c61eddab3421fbfce061fac4f2fb21f7031383005a1efdb13d0b9", size = 60670, upload-time = "2025-12-06T13:25:10.04Z" }, - { url = "https://files.pythonhosted.org/packages/15/67/16c609b7a13d1d9fc87eca12ba2dce5e67f949eeaab61a41bddff843cbb0/pybase64-1.4.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:afff11b331fdc27692fc75e85ae083340a35105cea1a3c4552139e2f0e0d174f", size = 64194, upload-time = "2025-12-06T13:25:11.48Z" }, - { url = "https://files.pythonhosted.org/packages/3c/11/37bc724e42960f0106c2d33dc957dcec8f760c91a908cc6c0df7718bc1a8/pybase64-1.4.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9a5143df542c1ce5c1f423874b948c4d689b3f05ec571f8792286197a39ba02", size = 64984, upload-time = "2025-12-06T13:25:12.645Z" }, - { url = "https://files.pythonhosted.org/packages/6e/66/b2b962a6a480dd5dae3029becf03ea1a650d326e39bf1c44ea3db78bb010/pybase64-1.4.3-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:d62e9861019ad63624b4a7914dff155af1cc5d6d79df3be14edcaedb5fdad6f9", size = 58750, upload-time = "2025-12-06T13:25:13.848Z" }, - { url = "https://files.pythonhosted.org/packages/2b/15/9b6d711035e29b18b2e1c03d47f41396d803d06ef15b6c97f45b75f73f04/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:84cfd4d92668ef5766cc42a9c9474b88960ac2b860767e6e7be255c6fddbd34a", size = 63816, upload-time = "2025-12-06T13:25:15.356Z" }, - { url = "https://files.pythonhosted.org/packages/b4/21/e2901381ed0df62e2308380f30d9c4d87d6b74e33a84faed3478d33a7197/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:60fc025437f9a7c2cc45e0c19ed68ed08ba672be2c5575fd9d98bdd8f01dd61f", size = 56348, upload-time = "2025-12-06T13:25:16.559Z" }, - { url = "https://files.pythonhosted.org/packages/c4/16/3d788388a178a0407aa814b976fe61bfa4af6760d9aac566e59da6e4a8b4/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:edc8446196f04b71d3af76c0bd1fe0a45066ac5bffecca88adb9626ee28c266f", size = 72842, upload-time = "2025-12-06T13:25:18.055Z" }, - { url = "https://files.pythonhosted.org/packages/a6/63/c15b1f8bd47ea48a5a2d52a4ec61f037062932ea6434ab916107b58e861e/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e99f6fa6509c037794da57f906ade271f52276c956d00f748e5b118462021d48", size = 62651, upload-time = "2025-12-06T13:25:19.191Z" }, - { url = "https://files.pythonhosted.org/packages/bd/b8/f544a2e37c778d59208966d4ef19742a0be37c12fc8149ff34483c176616/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d94020ef09f624d841aa9a3a6029df8cf65d60d7a6d5c8687579fa68bd679b65", size = 58295, upload-time = "2025-12-06T13:25:20.822Z" }, - { url = "https://files.pythonhosted.org/packages/03/99/1fae8a3b7ac181e36f6e7864a62d42d5b1f4fa7edf408c6711e28fba6b4d/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:f64ce70d89942a23602dee910dec9b48e5edf94351e1b378186b74fcc00d7f66", size = 60960, upload-time = "2025-12-06T13:25:22.099Z" }, - { url = "https://files.pythonhosted.org/packages/9d/9e/cd4c727742345ad8384569a4466f1a1428f4e5cc94d9c2ab2f53d30be3fe/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8ea99f56e45c469818b9781903be86ba4153769f007ba0655fa3b46dc332803d", size = 74863, upload-time = "2025-12-06T13:25:23.442Z" }, - { url = "https://files.pythonhosted.org/packages/28/86/a236ecfc5b494e1e922da149689f690abc84248c7c1358f5605b8c9fdd60/pybase64-1.4.3-cp314-cp314t-win32.whl", hash = "sha256:343b1901103cc72362fd1f842524e3bb24978e31aea7ff11e033af7f373f66ab", size = 34513, upload-time = "2025-12-06T13:25:24.592Z" }, - { url = "https://files.pythonhosted.org/packages/56/ce/ca8675f8d1352e245eb012bfc75429ee9cf1f21c3256b98d9a329d44bf0f/pybase64-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:57aff6f7f9dea6705afac9d706432049642de5b01080d3718acc23af87c5af76", size = 36702, upload-time = "2025-12-06T13:25:25.72Z" }, - { url = "https://files.pythonhosted.org/packages/3b/30/4a675864877397179b09b720ee5fcb1cf772cf7bebc831989aff0a5f79c1/pybase64-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:e906aa08d4331e799400829e0f5e4177e76a3281e8a4bc82ba114c6b30e405c9", size = 31904, upload-time = "2025-12-06T13:25:26.826Z" }, - { url = "https://files.pythonhosted.org/packages/b2/7c/545fd4935a0e1ddd7147f557bf8157c73eecec9cffd523382fa7af2557de/pybase64-1.4.3-graalpy311-graalpy242_311_native-macosx_10_9_x86_64.whl", hash = "sha256:d27c1dfdb0c59a5e758e7a98bd78eaca5983c22f4a811a36f4f980d245df4611", size = 38393, upload-time = "2025-12-06T13:26:19.535Z" }, - { url = "https://files.pythonhosted.org/packages/c3/ca/ae7a96be9ddc96030d4e9dffc43635d4e136b12058b387fd47eb8301b60f/pybase64-1.4.3-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:0f1a0c51d6f159511e3431b73c25db31095ee36c394e26a4349e067c62f434e5", size = 32109, upload-time = "2025-12-06T13:26:20.72Z" }, - { url = "https://files.pythonhosted.org/packages/bf/44/d4b7adc7bf4fd5b52d8d099121760c450a52c390223806b873f0b6a2d551/pybase64-1.4.3-graalpy311-graalpy242_311_native-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a492518f3078a4e3faaef310697d21df9c6bc71908cebc8c2f6fbfa16d7d6b1f", size = 43227, upload-time = "2025-12-06T13:26:21.845Z" }, - { url = "https://files.pythonhosted.org/packages/08/86/2ba2d8734ef7939debeb52cf9952e457ba7aa226cae5c0e6dd631f9b851f/pybase64-1.4.3-graalpy311-graalpy242_311_native-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae1a0f47784fd16df90d8acc32011c8d5fcdd9ab392c9ec49543e5f6a9c43a4", size = 35804, upload-time = "2025-12-06T13:26:23.149Z" }, - { url = "https://files.pythonhosted.org/packages/4f/5b/19c725dc3aaa6281f2ce3ea4c1628d154a40dd99657d1381995f8096768b/pybase64-1.4.3-graalpy311-graalpy242_311_native-win_amd64.whl", hash = "sha256:03cea70676ffbd39a1ab7930a2d24c625b416cacc9d401599b1d29415a43ab6a", size = 35880, upload-time = "2025-12-06T13:26:24.663Z" }, - { url = "https://files.pythonhosted.org/packages/17/45/92322aec1b6979e789b5710f73c59f2172bc37c8ce835305434796824b7b/pybase64-1.4.3-graalpy312-graalpy250_312_native-macosx_10_13_x86_64.whl", hash = "sha256:2baaa092f3475f3a9c87ac5198023918ea8b6c125f4c930752ab2cbe3cd1d520", size = 38746, upload-time = "2025-12-06T13:26:25.869Z" }, - { url = "https://files.pythonhosted.org/packages/11/94/f1a07402870388fdfc2ecec0c718111189732f7d0f2d7fe1386e19e8fad0/pybase64-1.4.3-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:cde13c0764b1af07a631729f26df019070dad759981d6975527b7e8ecb465b6c", size = 32573, upload-time = "2025-12-06T13:26:27.792Z" }, - { url = "https://files.pythonhosted.org/packages/fa/8f/43c3bb11ca9bacf81cb0b7a71500bb65b2eda6d5fe07433c09b543de97f3/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5c29a582b0ea3936d02bd6fe9bf674ab6059e6e45ab71c78404ab2c913224414", size = 43461, upload-time = "2025-12-06T13:26:28.906Z" }, - { url = "https://files.pythonhosted.org/packages/2d/4c/2a5258329200be57497d3972b5308558c6de42e3749c6cc2aa1cbe34b25a/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b6b664758c804fa919b4f1257aa8cf68e95db76fc331de5f70bfc3a34655afe1", size = 36058, upload-time = "2025-12-06T13:26:30.092Z" }, - { url = "https://files.pythonhosted.org/packages/ea/6d/41faa414cde66ec023b0ca8402a8f11cb61731c3dc27c082909cbbd1f929/pybase64-1.4.3-graalpy312-graalpy250_312_native-win_amd64.whl", hash = "sha256:f7537fa22ae56a0bf51e4b0ffc075926ad91c618e1416330939f7ef366b58e3b", size = 36231, upload-time = "2025-12-06T13:26:31.656Z" }, - { url = "https://files.pythonhosted.org/packages/2a/cf/6e712491bd665ea8633efb0b484121893ea838d8e830e06f39f2aae37e58/pybase64-1.4.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94cf50c36bb2f8618982ee5a978c4beed9db97d35944fa96e8586dd953c7994a", size = 38007, upload-time = "2025-12-06T13:26:32.804Z" }, - { url = "https://files.pythonhosted.org/packages/38/c0/9272cae1c49176337dcdbd97511e2843faae1aaf5a5fb48569093c6cd4ce/pybase64-1.4.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:01bc3ff5ca1341685c6d2d945b035f442f7b9c3b068a5c6ee8408a41fda5754e", size = 31538, upload-time = "2025-12-06T13:26:34.001Z" }, - { url = "https://files.pythonhosted.org/packages/20/f2/17546f97befe429c73f622bbd869ceebb518c40fdb0dec4c4f98312e80a5/pybase64-1.4.3-pp310-pypy310_pp73-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:03d0aa3761a99034960496280c02aa063f856a3cc9b33771bc4eab0e4e72b5c2", size = 40682, upload-time = "2025-12-06T13:26:35.168Z" }, - { url = "https://files.pythonhosted.org/packages/92/a0/464b36d5dfb61f3da17858afaeaa876a9342d58e9f17803ce7f28b5de9e8/pybase64-1.4.3-pp310-pypy310_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7ca5b1ce768520acd6440280cdab35235b27ad2faacfcec064bc9c3377066ef1", size = 41306, upload-time = "2025-12-06T13:26:36.351Z" }, - { url = "https://files.pythonhosted.org/packages/07/c9/a748dfc0969a8d960ecf1e82c8a2a16046ffec22f8e7ece582aa3b1c6cf9/pybase64-1.4.3-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3caa1e2ddad1c50553ffaaa1c86b74b3f9fbd505bea9970326ab88fc68c4c184", size = 35452, upload-time = "2025-12-06T13:26:37.772Z" }, - { url = "https://files.pythonhosted.org/packages/95/b7/4d37bd3577d1aa6c732dc099087fe027c48873e223de3784b095e5653f8b/pybase64-1.4.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bd47076f736b27a8b0f9b30d93b6bb4f5af01b0dc8971f883ed3b75934f39a99", size = 36125, upload-time = "2025-12-06T13:26:39.78Z" }, - { url = "https://files.pythonhosted.org/packages/b2/76/160dded493c00d3376d4ad0f38a2119c5345de4a6693419ad39c3565959b/pybase64-1.4.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:277de6e03cc9090fb359365c686a2a3036d23aee6cd20d45d22b8c89d1247f17", size = 37939, upload-time = "2025-12-06T13:26:41.014Z" }, - { url = "https://files.pythonhosted.org/packages/b7/b8/a0f10be8d648d6f8f26e560d6e6955efa7df0ff1e009155717454d76f601/pybase64-1.4.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ab1dd8b1ed2d1d750260ed58ab40defaa5ba83f76a30e18b9ebd5646f6247ae5", size = 31466, upload-time = "2025-12-06T13:26:42.539Z" }, - { url = "https://files.pythonhosted.org/packages/d3/22/832a2f9e76cdf39b52e01e40d8feeb6a04cf105494f2c3e3126d0149717f/pybase64-1.4.3-pp311-pypy311_pp73-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:bd4d2293de9fd212e294c136cec85892460b17d24e8c18a6ba18750928037750", size = 40681, upload-time = "2025-12-06T13:26:43.782Z" }, - { url = "https://files.pythonhosted.org/packages/12/d7/6610f34a8972415fab3bb4704c174a1cc477bffbc3c36e526428d0f3957d/pybase64-1.4.3-pp311-pypy311_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2af6d0d3a691911cc4c9a625f3ddcd3af720738c21be3d5c72de05629139d393", size = 41294, upload-time = "2025-12-06T13:26:44.936Z" }, - { url = "https://files.pythonhosted.org/packages/64/25/ed24400948a6c974ab1374a233cb7e8af0a5373cea0dd8a944627d17c34a/pybase64-1.4.3-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5cfc8c49a28322d82242088378f8542ce97459866ba73150b062a7073e82629d", size = 35447, upload-time = "2025-12-06T13:26:46.098Z" }, - { url = "https://files.pythonhosted.org/packages/ee/2b/e18ee7c5ee508a82897f021c1981533eca2940b5f072fc6ed0906c03a7a7/pybase64-1.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:debf737e09b8bf832ba86f5ecc3d3dbd0e3021d6cd86ba4abe962d6a5a77adb3", size = 36134, upload-time = "2025-12-06T13:26:47.35Z" }, -] - [[package]] name = "pycparser" version = "3.0" @@ -4555,21 +3171,6 @@ crypto = [ { name = "cryptography" }, ] -[[package]] -name = "pymupdf" -version = "1.26.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/48/d6/09b28f027b510838559f7748807192149c419b30cb90e6d5f0cf916dc9dc/pymupdf-1.26.7.tar.gz", hash = "sha256:71add8bdc8eb1aaa207c69a13400693f06ad9b927bea976f5d5ab9df0bb489c3", size = 84327033, upload-time = "2025-12-11T21:48:50.694Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/35/cd74cea1787b2247702ef8522186bdef32e9cb30a099e6bb864627ef6045/pymupdf-1.26.7-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:07085718dfdae5ab83b05eb5eb397f863bcc538fe05135318a01ea353e7a1353", size = 23179369, upload-time = "2025-12-11T21:47:21.587Z" }, - { url = "https://files.pythonhosted.org/packages/72/74/448b6172927c829c6a3fba80078d7b0a016ebbe2c9ee528821f5ea21677a/pymupdf-1.26.7-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:31aa9c8377ea1eea02934b92f4dcf79fb2abba0bf41f8a46d64c3e31546a3c02", size = 22470101, upload-time = "2025-12-11T21:47:37.105Z" }, - { url = "https://files.pythonhosted.org/packages/65/e7/47af26f3ac76be7ac3dd4d6cc7ee105948a8355d774e5ca39857bf91c11c/pymupdf-1.26.7-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e419b609996434a14a80fa060adec72c434a1cca6a511ec54db9841bc5d51b3c", size = 23502486, upload-time = "2025-12-12T09:51:25.824Z" }, - { url = "https://files.pythonhosted.org/packages/2a/6b/3de1714d734ff949be1e90a22375d0598d3540b22ae73eb85c2d7d1f36a9/pymupdf-1.26.7-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:69dfc78f206a96e5b3ac22741263ebab945fdf51f0dbe7c5757c3511b23d9d72", size = 24115727, upload-time = "2025-12-11T21:47:51.274Z" }, - { url = "https://files.pythonhosted.org/packages/62/9b/f86224847949577a523be2207315ae0fd3155b5d909cd66c274d095349a3/pymupdf-1.26.7-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1d5106f46e1ca0d64d46bd51892372a4f82076bdc14a9678d33d630702abca36", size = 24324386, upload-time = "2025-12-12T14:58:45.483Z" }, - { url = "https://files.pythonhosted.org/packages/85/8e/a117d39092ca645fde8b903f4a941d9aa75b370a67b4f1f435f56393dc5a/pymupdf-1.26.7-cp310-abi3-win32.whl", hash = "sha256:7c9645b6f5452629c747690190350213d3e5bbdb6b2eca227d82702b327f6eee", size = 17203888, upload-time = "2025-12-12T13:59:57.613Z" }, - { url = "https://files.pythonhosted.org/packages/dd/c3/d0047678146c294469c33bae167c8ace337deafb736b0bf97b9bc481aa65/pymupdf-1.26.7-cp310-abi3-win_amd64.whl", hash = "sha256:425b1befe40d41b72eb0fe211711c7ae334db5eb60307e9dd09066ed060cceba", size = 18405952, upload-time = "2025-12-11T21:48:02.947Z" }, -] - [[package]] name = "pynacl" version = "1.6.2" @@ -4626,56 +3227,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] -[[package]] -name = "pypdfium2" -version = "5.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/98/6b44bf82ddb3c7a3e0249203772aad8981b4491d6227f182685f310faeff/pypdfium2-5.9.0.tar.gz", hash = "sha256:db1274bd27844db6fda17ef1dbcd0026c47d357437058d838e98060c0da9e92e", size = 272455, upload-time = "2026-06-01T15:43:38.08Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/d9/59630cb40e5f37e7712e6ea65e9cac633f4195e8b737bb3a46054aa63340/pypdfium2-5.9.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:91914837c4a4285b3e0724a84eca8079363db7475acbcab405933d1807785664", size = 3407817, upload-time = "2026-06-01T15:42:58.426Z" }, - { url = "https://files.pythonhosted.org/packages/0f/3d/e205708835a3730d5242652b6577ac06ad4721e6fcef77cc7c9d3541c686/pypdfium2-5.9.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:90610d352f050b065b703f3a46602a852fce7dd8787300c8c7a472485b644d8f", size = 2862706, upload-time = "2026-06-01T15:43:00.581Z" }, - { url = "https://files.pythonhosted.org/packages/01/47/e843fb895a891438b3f8c6d834fdc9c19183cd60980fc9325429d5c01505/pypdfium2-5.9.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6c4fbe3a7190b329c526358fb2855d797f7b74b5ecfc61d19657ef20bcebc108", size = 3489945, upload-time = "2026-06-01T15:43:02.542Z" }, - { url = "https://files.pythonhosted.org/packages/35/bd/f5e6afd556f97fcaa2bec4cb04669664c166028fc2a059bd65447c852b43/pypdfium2-5.9.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:e93f0cf440169a3e445e6fbd06c803877e7418f3e13254287875cb67f208bb5a", size = 3674186, upload-time = "2026-06-01T15:43:04.496Z" }, - { url = "https://files.pythonhosted.org/packages/6d/4d/5286812216a292d51dfba8e7bff276da198f126508f8c2afa3630bf701dc/pypdfium2-5.9.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d902e03dff5efd51d93cd23d3e55bde53802fa6207bcd0e455239518859a069", size = 3669571, upload-time = "2026-06-01T15:43:06.571Z" }, - { url = "https://files.pythonhosted.org/packages/ac/c8/822db2c89baa13e6cee321d587fcd42df463a1fc2f7520b3f6814768bc71/pypdfium2-5.9.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6cf38d7ad3575947b82384869f2ab69ba345eb21d83118d25db3e83f967b0421", size = 3400412, upload-time = "2026-06-01T15:43:08.35Z" }, - { url = "https://files.pythonhosted.org/packages/1a/dd/7d09d8cdc28383df13f739a97ac4f1215a704a97a29506dee2bf89d8a350/pypdfium2-5.9.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:77f7479a28b43aa658735e3ce79cfd1fccd5d42db035c21bb4c26e8bd7e280e5", size = 3803326, upload-time = "2026-06-01T15:43:10.054Z" }, - { url = "https://files.pythonhosted.org/packages/99/58/3f4e04ffe1ae62b437de07a96da672091cef62b619d0dc78207c1af442e6/pypdfium2-5.9.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:07e6ba170d577eabf60dbba701d051c64318dd029d38ca5907d83ae1a66fe779", size = 4216890, upload-time = "2026-06-01T15:43:11.701Z" }, - { url = "https://files.pythonhosted.org/packages/1d/f6/2dde4656750c4a6da99e1f070ca09d2b5a9d68186b42e711a1a3e5b1cb32/pypdfium2-5.9.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ce3a3dd23ec0adaa079d8be54565ba2aa2f6060e76a4989cd42dabc163d74ee", size = 3728830, upload-time = "2026-06-01T15:43:13.329Z" }, - { url = "https://files.pythonhosted.org/packages/d0/ca/f2ff8b9200c7dfc5aee85126edc856eb93c7056085da2454a75ef1e4dbc4/pypdfium2-5.9.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae177938f5cf95a275db25a4f8553e2ebd954ecda2f9bc84848ba4b027ce438f", size = 4063322, upload-time = "2026-06-01T15:43:15.158Z" }, - { url = "https://files.pythonhosted.org/packages/64/88/0b587de03c873c28adc59f6ac959de4032d3f3bc946094523b14a192d9c3/pypdfium2-5.9.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ffe49edde2ac86f28ca7e58f565255a442f38a7508fff31b79a55f508f25a31e", size = 4039738, upload-time = "2026-06-01T15:43:16.975Z" }, - { url = "https://files.pythonhosted.org/packages/83/4c/fa627f00a954e66465e929077cf43bd012595091fff82758d989486e7bdc/pypdfium2-5.9.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b7b760bc2957ecf73c274af6ed8b168a2dcb328ac0a0f7ed6123cd92f6e7c9c9", size = 4997259, upload-time = "2026-06-01T15:43:18.915Z" }, - { url = "https://files.pythonhosted.org/packages/32/f0/1736d80c5d12d931f74ca6b4213b006ee016ec33c6325fad870234cc240c/pypdfium2-5.9.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7cdc8e5d2f8d82add1e4f70a4fbe5f3b33c17f301ebde38c669fd7f78a7d032c", size = 4537061, upload-time = "2026-06-01T15:43:20.879Z" }, - { url = "https://files.pythonhosted.org/packages/01/00/aa8890dfd385b2e7365034231987029cff15cc7eb4f06e8380da5608738a/pypdfium2-5.9.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:38a058dbd4929acaf0ab9171179eb86c24d8c6655a6836006796105a9f200890", size = 5232786, upload-time = "2026-06-01T15:43:23.73Z" }, - { url = "https://files.pythonhosted.org/packages/65/12/8f45ea698781a0bed96ac4fbde440060790863273943461f0f160a993d52/pypdfium2-5.9.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:1894511a0e862e7ec5679f3a6dc43ac72c4ef92c7ca438357203913e8634a643", size = 5170121, upload-time = "2026-06-01T15:43:25.858Z" }, - { url = "https://files.pythonhosted.org/packages/25/bd/9bb6ba375796e1de1d6c1af8d8303dd1781190346871c81a94d4e09eddfd/pypdfium2-5.9.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:040f5513b808db705d4878f57e2bf0b9dc6e6a0ad8d765c36cf62febf3933b28", size = 4663540, upload-time = "2026-06-01T15:43:27.677Z" }, - { url = "https://files.pythonhosted.org/packages/d2/4a/fd103bac197f22038bf70be1f7507ced7519f1214ea0dae137f37803ab8a/pypdfium2-5.9.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:f4991ae39bcea757552579bba4aebfaedb71c96dd35c2292f957b8ac9132f1ff", size = 5090619, upload-time = "2026-06-01T15:43:29.522Z" }, - { url = "https://files.pythonhosted.org/packages/22/89/9531fa1e6e004fe522cdca0cd945cd6a9d7338e7125e6b0734d632d31fa6/pypdfium2-5.9.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:25ff1a5abd08ff9e87f62e5dac114ea95647c257fbbdbe029be8db71a6d7650b", size = 5050806, upload-time = "2026-06-01T15:43:31.322Z" }, - { url = "https://files.pythonhosted.org/packages/fc/d0/e53c68555ff128b2470e4a468762b320d9c6ae2c914decea3487d923982f/pypdfium2-5.9.0-py3-none-win32.whl", hash = "sha256:b0057dc8c2033584dc3e61afb5f23a135dab52b081695b435e27f9b7b074c605", size = 3670966, upload-time = "2026-06-01T15:43:32.991Z" }, - { url = "https://files.pythonhosted.org/packages/da/0c/22e5fc035ad1594b44f265bc0a59ae34d377bc2ea74a92793e7a674bf96d/pypdfium2-5.9.0-py3-none-win_amd64.whl", hash = "sha256:06508c33b9772cf3878e48364c6e14c70cefc18a3abd6983ac9f338da9305275", size = 3800959, upload-time = "2026-06-01T15:43:34.536Z" }, - { url = "https://files.pythonhosted.org/packages/11/e3/cf1711add7add22a17f7c7633cd795edc92f17ab7bdf1930493ae0f56680/pypdfium2-5.9.0-py3-none-win_arm64.whl", hash = "sha256:565ddfc98795fd2f6054b544ee9791d7b9032f9cf77a57891b6e501fafd0ef3f", size = 3585718, upload-time = "2026-06-01T15:43:36.521Z" }, -] - -[[package]] -name = "pypika" -version = "0.51.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f8/78/cbaebba88e05e2dcda13ca203131b38d3640219f20ebb49676d26714861b/pypika-0.51.1.tar.gz", hash = "sha256:c30c7c1048fbf056fd3920c5a2b88b0c29dd190a9b2bee971fd17e4abe4d0ebe", size = 80919, upload-time = "2026-02-04T11:27:48.304Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/57/83/c77dfeed04022e8930b08eedca2b6e5efed256ab3321396fde90066efb65/pypika-0.51.1-py2.py3-none-any.whl", hash = "sha256:77985b4d7ce71b9905255bf12468cf598349e98837c037541cfc240e528aec46", size = 60585, upload-time = "2026-02-04T11:27:46.251Z" }, -] - -[[package]] -name = "pyproject-hooks" -version = "1.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/82/28175b2414effca1cdac8dc99f76d660e7a4fb0ceefa4b4ab8f5f6742925/pyproject_hooks-1.2.0.tar.gz", hash = "sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8", size = 19228, upload-time = "2024-09-29T09:24:13.293Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl", hash = "sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913", size = 10216, upload-time = "2024-09-29T09:24:11.978Z" }, -] - [[package]] name = "pytest" version = "9.0.3" @@ -4771,19 +3322,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] -[[package]] -name = "python-docx" -version = "1.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "lxml" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" }, -] - [[package]] name = "python-dotenv" version = "1.2.2" @@ -4829,15 +3367,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/04/e8135ebd1ad02c56ec633277529b2602ff99ff634be76cdba5744cf554fd/python_multipart-0.0.32-py3-none-any.whl", hash = "sha256:ff6d3f776f16878c894e52e107296ffc890e913c611b1a4ec6c44e2821fe2e23", size = 30042, upload-time = "2026-06-04T16:18:57.319Z" }, ] -[[package]] -name = "pytube" -version = "15.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/e7/16fec46c8d255c4bbc4b185d89c91dc92cdb802836570d8004d0db169c91/pytube-15.0.0.tar.gz", hash = "sha256:076052efe76f390dfa24b1194ff821d4e86c17d41cb5562f3a276a8bcbfc9d1d", size = 67229, upload-time = "2023-05-07T19:39:01.903Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/51/64/bcf8632ed2b7a36bbf84a0544885ffa1d0b4bcf25cc0903dba66ec5fdad9/pytube-15.0.0-py3-none-any.whl", hash = "sha256:07b9904749e213485780d7eb606e5e5b8e4341aa4dccf699160876da00e12d78", size = 57594, upload-time = "2023-05-07T19:38:59.191Z" }, -] - [[package]] name = "pywin32" version = "312" @@ -5078,19 +3607,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/f4/c67b0b3f1b9245e8d266f0f112c500d50e5b4e83cb6f3b71b6528104182a/requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0", size = 73075, upload-time = "2026-05-14T19:25:26.443Z" }, ] -[[package]] -name = "requests-oauthlib" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "oauthlib" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, -] - [[package]] name = "requests-toolbelt" version = "1.0.0" @@ -5786,15 +4302,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/87/3e/b0fbd7621e6430d33d234ad5e2dc9b0d4df575518f4790a2af934e1029ea/sortedcontainers_stubs-2.4.3-py3-none-any.whl", hash = "sha256:4496109dfa6645e4b675f57fbc7e42ec4d1bed2c74aab7fa379e0795e49fe406", size = 8816, upload-time = "2025-04-23T07:41:58.625Z" }, ] -[[package]] -name = "soupsieve" -version = "2.8.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/47/2c/0a5f6f8ee0d5589e48c7640213ed5175d52cf540a06725b628cc1a45d6ce/soupsieve-2.8.4.tar.gz", hash = "sha256:e121fd02e975c695e4e9e8774a5ee35d74714b59307868dcc5319ad2d9e3328e", size = 121110, upload-time = "2026-05-24T13:55:57.154Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/f5/0c41cb68dcae6b7de4fac4188a3a9589e21fb31df21ea3a2e888db95e6c9/soupsieve-2.8.4-py3-none-any.whl", hash = "sha256:e7e6b0769c8f51ed59acab6e994b00621096cfb1c640a7509295987388fbaf65", size = 37304, upload-time = "2026-05-24T13:55:55.406Z" }, -] - [[package]] name = "sqlite-vec" version = "0.1.9" @@ -5877,23 +4384,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687", size = 28165, upload-time = "2024-07-05T07:25:29.591Z" }, ] -[[package]] -name = "textual" -version = "8.2.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markdown-it-py", extra = ["linkify"] }, - { name = "mdit-py-plugins" }, - { name = "platformdirs" }, - { name = "pygments" }, - { name = "rich" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9b/7a/c519db0aba5024f86e71e9631810bfdd6866ed2c8695bd7fa34b90e7ef59/textual-8.2.7.tar.gz", hash = "sha256:658f568ff81e30ed43890c3e07520390e5cf1b4763822006e060656b0a88f105", size = 1859249, upload-time = "2026-05-19T10:52:49.531Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/f5/c1e18bc0707300a0e90204343abbf7d7acd6fb7ebe03a6d4893b99a234b8/textual-8.2.7-py3-none-any.whl", hash = "sha256:4caaa13a90bc4cf9c6c862c067ccd34fe84e9c161710a2a907a8026313b6bd73", size = 731129, upload-time = "2026-05-19T10:52:51.773Z" }, -] - [[package]] name = "threadpoolctl" version = "3.6.0" @@ -5903,67 +4393,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, ] -[[package]] -name = "tiktoken" -version = "0.12.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "regex" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/89/b3/2cb7c17b6c4cf8ca983204255d3f1d95eda7213e247e6947a0ee2c747a2c/tiktoken-0.12.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3de02f5a491cfd179aec916eddb70331814bd6bf764075d39e21d5862e533970", size = 1051991, upload-time = "2025-10-06T20:21:34.098Z" }, - { url = "https://files.pythonhosted.org/packages/27/0f/df139f1df5f6167194ee5ab24634582ba9a1b62c6b996472b0277ec80f66/tiktoken-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6cfb6d9b7b54d20af21a912bfe63a2727d9cfa8fbda642fd8322c70340aad16", size = 995798, upload-time = "2025-10-06T20:21:35.579Z" }, - { url = "https://files.pythonhosted.org/packages/ef/5d/26a691f28ab220d5edc09b9b787399b130f24327ef824de15e5d85ef21aa/tiktoken-0.12.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:cde24cdb1b8a08368f709124f15b36ab5524aac5fa830cc3fdce9c03d4fb8030", size = 1129865, upload-time = "2025-10-06T20:21:36.675Z" }, - { url = "https://files.pythonhosted.org/packages/b2/94/443fab3d4e5ebecac895712abd3849b8da93b7b7dec61c7db5c9c7ebe40c/tiktoken-0.12.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6de0da39f605992649b9cfa6f84071e3f9ef2cec458d08c5feb1b6f0ff62e134", size = 1152856, upload-time = "2025-10-06T20:21:37.873Z" }, - { url = "https://files.pythonhosted.org/packages/54/35/388f941251b2521c70dd4c5958e598ea6d2c88e28445d2fb8189eecc1dfc/tiktoken-0.12.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6faa0534e0eefbcafaccb75927a4a380463a2eaa7e26000f0173b920e98b720a", size = 1195308, upload-time = "2025-10-06T20:21:39.577Z" }, - { url = "https://files.pythonhosted.org/packages/f8/00/c6681c7f833dd410576183715a530437a9873fa910265817081f65f9105f/tiktoken-0.12.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:82991e04fc860afb933efb63957affc7ad54f83e2216fe7d319007dab1ba5892", size = 1255697, upload-time = "2025-10-06T20:21:41.154Z" }, - { url = "https://files.pythonhosted.org/packages/5f/d2/82e795a6a9bafa034bf26a58e68fe9a89eeaaa610d51dbeb22106ba04f0a/tiktoken-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:6fb2995b487c2e31acf0a9e17647e3b242235a20832642bb7a9d1a181c0c1bb1", size = 879375, upload-time = "2025-10-06T20:21:43.201Z" }, - { url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565, upload-time = "2025-10-06T20:21:44.566Z" }, - { url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284, upload-time = "2025-10-06T20:21:45.622Z" }, - { url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201, upload-time = "2025-10-06T20:21:47.074Z" }, - { url = "https://files.pythonhosted.org/packages/6a/d0/3d9275198e067f8b65076a68894bb52fd253875f3644f0a321a720277b8a/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded", size = 1152444, upload-time = "2025-10-06T20:21:48.139Z" }, - { url = "https://files.pythonhosted.org/packages/78/db/a58e09687c1698a7c592e1038e01c206569b86a0377828d51635561f8ebf/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd", size = 1195080, upload-time = "2025-10-06T20:21:49.246Z" }, - { url = "https://files.pythonhosted.org/packages/9e/1b/a9e4d2bf91d515c0f74afc526fd773a812232dd6cda33ebea7f531202325/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967", size = 1255240, upload-time = "2025-10-06T20:21:50.274Z" }, - { url = "https://files.pythonhosted.org/packages/9d/15/963819345f1b1fb0809070a79e9dd96938d4ca41297367d471733e79c76c/tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def", size = 879422, upload-time = "2025-10-06T20:21:51.734Z" }, - { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" }, - { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" }, - { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" }, - { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" }, - { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" }, - { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" }, - { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" }, - { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" }, - { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" }, - { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" }, - { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" }, - { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" }, - { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" }, - { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" }, - { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" }, - { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" }, - { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" }, - { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" }, - { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" }, - { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" }, - { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" }, - { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" }, - { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" }, - { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" }, - { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" }, - { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" }, - { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" }, - { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" }, - { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" }, - { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" }, - { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" }, - { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" }, - { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" }, - { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" }, - { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" }, -] - [[package]] name = "tokenizers" version = "0.22.2" @@ -6003,15 +4432,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cf/db/ce8eda256fa131af12e0a76d481711abe4681b6923c27efb9a255c9e4594/tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38", size = 13237, upload-time = "2024-10-02T10:46:11.806Z" }, ] -[[package]] -name = "tomli-w" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d4/19/b65f1a088ee23e37cdea415b357843eca8b1422a7b11a9eee6e35d4ec273/tomli_w-1.1.0.tar.gz", hash = "sha256:49e847a3a304d516a169a601184932ef0f6b61623fe680f836a2aa7128ed0d33", size = 6929, upload-time = "2024-10-08T11:13:29.279Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c4/ac/ce90573ba446a9bbe65838ded066a805234d159b4446ae9f8ec5bbd36cbd/tomli_w-1.1.0-py3-none-any.whl", hash = "sha256:1403179c78193e3184bfaade390ddbd071cba48a32a2e62ba11aae47490c63f7", size = 6440, upload-time = "2024-10-08T11:13:27.897Z" }, -] - [[package]] name = "tomlkit" version = "0.15.0" @@ -6185,15 +4605,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] -[[package]] -name = "uc-micro-py" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/78/67/9a363818028526e2d4579334460df777115bdec1bb77c08f9db88f6389f2/uc_micro_py-2.0.0.tar.gz", hash = "sha256:c53691e495c8db60e16ffc4861a35469b0ba0821fe409a8a7a0a71864d33a811", size = 6611, upload-time = "2026-03-01T06:31:27.526Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/61/73/d21edf5b204d1467e06500080a50f79d49ef2b997c79123a536d4a17d97c/uc_micro_py-2.0.0-py3-none-any.whl", hash = "sha256:3603a3859af53e5a39bc7677713c78ea6589ff188d70f4fee165db88e22b242c", size = 6383, upload-time = "2026-03-01T06:31:26.257Z" }, -] - [[package]] name = "uritemplate" version = "4.2.0" @@ -6325,32 +4736,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/e7/3937b9a9d6745b94dbe7b86531e098db8c53b77c8d07df7daa9577a47b8e/uuid_utils-0.16.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:680799a9ade01d69c53cb9d41392ced24919d4f600bfab5060b61fca37510097", size = 178508, upload-time = "2026-05-19T07:43:43.774Z" }, ] -[[package]] -name = "uv" -version = "0.11.19" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/67/f0/6254502aebfdc0a9df6069269a126dd58252ac29d2d6cdf4777cea3e90b5/uv-0.11.19.tar.gz", hash = "sha256:f56f5bf853626a30423052d7ee00bf5cc940a08347d6ee7ede96862d084054a5", size = 4213580, upload-time = "2026-06-03T22:37:15.976Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/73/be32c2f6ba30fa9d8b3baceb478107cc23722d4aaab87145a332e4985185/uv-0.11.19-py3-none-linux_armv6l.whl", hash = "sha256:c729f56ffef9b945053412c839695e8a0b13758aa15b7763e95a7dd539a6f522", size = 23620003, upload-time = "2026-06-03T22:37:53.017Z" }, - { url = "https://files.pythonhosted.org/packages/fd/ed/3aefe4a4ca4ac9204c6745670dbe12f4add69194d40f5abd1c7bd45ba9af/uv-0.11.19-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a98495b9dd67287d8c1a0786f98cb037a50f0ee6c3d648572edaa7137aabc277", size = 23183211, upload-time = "2026-06-03T22:37:20.699Z" }, - { url = "https://files.pythonhosted.org/packages/5b/eb/5d1469f9e709d56066f292978711fbf1f805b7fb46f901d3c1f260fd9908/uv-0.11.19-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7fdd881cd6d80782afcf8c1d446dd15a42985167fd812b763d38ba1e4a8d944d", size = 21754003, upload-time = "2026-06-03T22:37:05.027Z" }, - { url = "https://files.pythonhosted.org/packages/7b/93/109b5ee6678f54492f94fdef74149643eaa1f2f4716906a2a10816b31247/uv-0.11.19-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:7222f45b5541551057bfc2e3021f113800704f665c119fdf3ea700c6c4859b21", size = 23518832, upload-time = "2026-06-03T22:37:28.794Z" }, - { url = "https://files.pythonhosted.org/packages/08/0c/8c59bbcf78e94ca9994256920efa99d1c4dc9d0b966eb62ebba075585a16/uv-0.11.19-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:2e0e0b8ad59ec56f1440d6e4313b64a1d8119275dcec73d19eef33c43f99428c", size = 23163128, upload-time = "2026-06-03T22:37:23.226Z" }, - { url = "https://files.pythonhosted.org/packages/89/d6/69caf9e6f11c84b5fb92df190b46fbecb7dc6645ae891c6ed66d7aaaa310/uv-0.11.19-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f4aa17ffd719daf37b7a6265efd3ee4922a8ddaabaf0406d2b28c7e5ce2f20ff", size = 23164395, upload-time = "2026-06-03T22:37:18.11Z" }, - { url = "https://files.pythonhosted.org/packages/d6/83/0c2242b77c51ac33a0ddd8b06790429a0b8b9623974c9594ab2b0070ec47/uv-0.11.19-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32d7988c0dfb6f90941f201c871a4478e96e4f2a32bdb2256d62a78ee20593fc", size = 24541708, upload-time = "2026-06-03T22:37:08.093Z" }, - { url = "https://files.pythonhosted.org/packages/54/10/b1404fc52c0eddc3655f57a8b76e79dcf8dd02568382272f17e2fa68c4bb/uv-0.11.19-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d663bacb97e2e8412d1c26eace28c7ebbde9d6f5d7d78760fafd114d693817f", size = 25575501, upload-time = "2026-06-03T22:37:47.526Z" }, - { url = "https://files.pythonhosted.org/packages/7c/17/4cda5994195ba9ce1f6971d40d5f2ceec58e2a79030d9052b3bf322557b1/uv-0.11.19-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:574f5dd4f31666661ea6386d3b91c5f0e8b84a8cae98ebba447c4674f2e6a4c7", size = 24827200, upload-time = "2026-06-03T22:37:34.039Z" }, - { url = "https://files.pythonhosted.org/packages/5a/74/2bd8b51e1d76210fd424ae55ec3f34ded5a10eeff3dd38aeb03c816a0af2/uv-0.11.19-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:731d9fab8db5d41590af64236d03f8069c8da665fd0f9493b85985f19c86cd90", size = 24872664, upload-time = "2026-06-03T22:37:11.301Z" }, - { url = "https://files.pythonhosted.org/packages/06/b1/44b0764f656bbdd0728118610a63f2feddd9cbe450f974d80c5bb56aad34/uv-0.11.19-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:301fd78309fc545c2cec2bfcc61a6bbdde876856c6d2041502737cf44085c178", size = 23617890, upload-time = "2026-06-03T22:37:44.796Z" }, - { url = "https://files.pythonhosted.org/packages/d2/25/312fa33cd4c34e7618f86cad0c9fdb312d8fef2e7fc61944c1a2f1bf1256/uv-0.11.19-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:62b0b35a51d3034ff30ecd0f381e9bbc20d5b335754f54b098da29424d551ceb", size = 24267220, upload-time = "2026-06-03T22:37:39.425Z" }, - { url = "https://files.pythonhosted.org/packages/8d/25/13856aeff9e14c98ee3e1ceae4d209301cbdeabde93abcd758433601dc82/uv-0.11.19-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:65e932720daed1af1f720a0ff5f9b33ee5f7ad97488dcceceb85154fc1323b82", size = 24376177, upload-time = "2026-06-03T22:37:50.276Z" }, - { url = "https://files.pythonhosted.org/packages/45/7d/590b3ab420e03504cf658d2981e1fcb4af60f3858d42da1d4d8740141dd9/uv-0.11.19-py3-none-musllinux_1_1_i686.whl", hash = "sha256:8f90b6687a480d154595aa619fb836a9a20d00ce37293db8099aad924f2b18f9", size = 23808336, upload-time = "2026-06-03T22:37:26.086Z" }, - { url = "https://files.pythonhosted.org/packages/9e/8e/40acebd4ea419c870930580623e8367e23d810a0ecb8cc2f44d852a27293/uv-0.11.19-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:28b0d612a766eb25756dbaa315433b726e93affa467d29a2682cc317547952ba", size = 25080747, upload-time = "2026-06-03T22:37:13.886Z" }, - { url = "https://files.pythonhosted.org/packages/9c/d3/4037b2acb2bb73b1a3ee47a1d23864ecc503f5840387afd29f621d4fd2ec/uv-0.11.19-py3-none-win32.whl", hash = "sha256:aa6a7e8d07b33ad22f4732848ebb1d9486503973c248d6e632c06ce4339fe347", size = 22459533, upload-time = "2026-06-03T22:37:36.741Z" }, - { url = "https://files.pythonhosted.org/packages/d4/43/f374fad7ad94e4a8c47cf09f00d803c76c6cc7f225668c41f4e2fb5de000/uv-0.11.19-py3-none-win_amd64.whl", hash = "sha256:480fc34a8d0967af6a90b3f99a6e5687cd5c6e29528de96bec04d6e305a59363", size = 25143888, upload-time = "2026-06-03T22:37:42.169Z" }, - { url = "https://files.pythonhosted.org/packages/18/98/d2db53ae036528b0a9407529ef175ee200b01f626c9c160978784c8af870/uv-0.11.19-py3-none-win_arm64.whl", hash = "sha256:50e4d4796ca1a6da359a4f723a0fea86640c381d3ff4fa759a41badd7cb52dee", size = 23601290, upload-time = "2026-06-03T22:37:31.393Z" }, -] - [[package]] name = "uvicorn" version = "0.49.0" @@ -6365,61 +4750,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/fa/e1388bbcf24ef3274f45c0c1c7b501fd14971037c1b6ee23610553307497/uvicorn-0.49.0-py3-none-any.whl", hash = "sha256:ba3d14c3ee7e41c6c654c46c9eb489d33213cdd30aa1696eab1374337c13f68f", size = 71376, upload-time = "2026-06-03T22:01:29.037Z" }, ] -[package.optional-dependencies] -standard = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "httptools" }, - { name = "python-dotenv" }, - { name = "pyyaml" }, - { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, - { name = "watchfiles" }, - { name = "websockets" }, -] - -[[package]] -name = "uvloop" -version = "0.22.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/14/ecceb239b65adaaf7fde510aa8bd534075695d1e5f8dadfa32b5723d9cfb/uvloop-0.22.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ef6f0d4cc8a9fa1f6a910230cd53545d9a14479311e87e3cb225495952eb672c", size = 1343335, upload-time = "2025-10-16T22:16:11.43Z" }, - { url = "https://files.pythonhosted.org/packages/ba/ae/6f6f9af7f590b319c94532b9567409ba11f4fa71af1148cab1bf48a07048/uvloop-0.22.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd375a12b71d33d46af85a3343b35d98e8116134ba404bd657b3b1d15988792", size = 742903, upload-time = "2025-10-16T22:16:12.979Z" }, - { url = "https://files.pythonhosted.org/packages/09/bd/3667151ad0702282a1f4d5d29288fce8a13c8b6858bf0978c219cd52b231/uvloop-0.22.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac33ed96229b7790eb729702751c0e93ac5bc3bcf52ae9eccbff30da09194b86", size = 3648499, upload-time = "2025-10-16T22:16:14.451Z" }, - { url = "https://files.pythonhosted.org/packages/b3/f6/21657bb3beb5f8c57ce8be3b83f653dd7933c2fd00545ed1b092d464799a/uvloop-0.22.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:481c990a7abe2c6f4fc3d98781cc9426ebd7f03a9aaa7eb03d3bfc68ac2a46bd", size = 3700133, upload-time = "2025-10-16T22:16:16.272Z" }, - { url = "https://files.pythonhosted.org/packages/09/e0/604f61d004ded805f24974c87ddd8374ef675644f476f01f1df90e4cdf72/uvloop-0.22.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a592b043a47ad17911add5fbd087c76716d7c9ccc1d64ec9249ceafd735f03c2", size = 3512681, upload-time = "2025-10-16T22:16:18.07Z" }, - { url = "https://files.pythonhosted.org/packages/bb/ce/8491fd370b0230deb5eac69c7aae35b3be527e25a911c0acdffb922dc1cd/uvloop-0.22.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1489cf791aa7b6e8c8be1c5a080bae3a672791fcb4e9e12249b05862a2ca9cec", size = 3615261, upload-time = "2025-10-16T22:16:19.596Z" }, - { url = "https://files.pythonhosted.org/packages/c7/d5/69900f7883235562f1f50d8184bb7dd84a2fb61e9ec63f3782546fdbd057/uvloop-0.22.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c60ebcd36f7b240b30788554b6f0782454826a0ed765d8430652621b5de674b9", size = 1352420, upload-time = "2025-10-16T22:16:21.187Z" }, - { url = "https://files.pythonhosted.org/packages/a8/73/c4e271b3bce59724e291465cc936c37758886a4868787da0278b3b56b905/uvloop-0.22.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b7f102bf3cb1995cfeaee9321105e8f5da76fdb104cdad8986f85461a1b7b77", size = 748677, upload-time = "2025-10-16T22:16:22.558Z" }, - { url = "https://files.pythonhosted.org/packages/86/94/9fb7fad2f824d25f8ecac0d70b94d0d48107ad5ece03769a9c543444f78a/uvloop-0.22.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53c85520781d84a4b8b230e24a5af5b0778efdb39142b424990ff1ef7c48ba21", size = 3753819, upload-time = "2025-10-16T22:16:23.903Z" }, - { url = "https://files.pythonhosted.org/packages/74/4f/256aca690709e9b008b7108bc85fba619a2bc37c6d80743d18abad16ee09/uvloop-0.22.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56a2d1fae65fd82197cb8c53c367310b3eabe1bbb9fb5a04d28e3e3520e4f702", size = 3804529, upload-time = "2025-10-16T22:16:25.246Z" }, - { url = "https://files.pythonhosted.org/packages/7f/74/03c05ae4737e871923d21a76fe28b6aad57f5c03b6e6bfcfa5ad616013e4/uvloop-0.22.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40631b049d5972c6755b06d0bfe8233b1bd9a8a6392d9d1c45c10b6f9e9b2733", size = 3621267, upload-time = "2025-10-16T22:16:26.819Z" }, - { url = "https://files.pythonhosted.org/packages/75/be/f8e590fe61d18b4a92070905497aec4c0e64ae1761498cad09023f3f4b3e/uvloop-0.22.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:535cc37b3a04f6cd2c1ef65fa1d370c9a35b6695df735fcff5427323f2cd5473", size = 3723105, upload-time = "2025-10-16T22:16:28.252Z" }, - { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" }, - { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" }, - { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" }, - { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" }, - { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" }, - { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" }, - { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, - { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, - { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, - { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, - { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, - { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, - { url = "https://files.pythonhosted.org/packages/90/cd/b62bdeaa429758aee8de8b00ac0dd26593a9de93d302bff3d21439e9791d/uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142", size = 1362067, upload-time = "2025-10-16T22:16:44.503Z" }, - { url = "https://files.pythonhosted.org/packages/0d/f8/a132124dfda0777e489ca86732e85e69afcd1ff7686647000050ba670689/uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74", size = 752423, upload-time = "2025-10-16T22:16:45.968Z" }, - { url = "https://files.pythonhosted.org/packages/a3/94/94af78c156f88da4b3a733773ad5ba0b164393e357cc4bd0ab2e2677a7d6/uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35", size = 4272437, upload-time = "2025-10-16T22:16:47.451Z" }, - { url = "https://files.pythonhosted.org/packages/b5/35/60249e9fd07b32c665192cec7af29e06c7cd96fa1d08b84f012a56a0b38e/uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25", size = 4292101, upload-time = "2025-10-16T22:16:49.318Z" }, - { url = "https://files.pythonhosted.org/packages/02/62/67d382dfcb25d0a98ce73c11ed1a6fba5037a1a1d533dcbb7cab033a2636/uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6", size = 4114158, upload-time = "2025-10-16T22:16:50.517Z" }, - { url = "https://files.pythonhosted.org/packages/f0/7a/f1171b4a882a5d13c8b7576f348acfe6074d72eaf52cccef752f748d4a9f/uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079", size = 4177360, upload-time = "2025-10-16T22:16:52.646Z" }, - { url = "https://files.pythonhosted.org/packages/79/7b/b01414f31546caf0919da80ad57cbfe24c56b151d12af68cee1b04922ca8/uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289", size = 1454790, upload-time = "2025-10-16T22:16:54.355Z" }, - { url = "https://files.pythonhosted.org/packages/d4/31/0bb232318dd838cad3fa8fb0c68c8b40e1145b32025581975e18b11fab40/uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3", size = 796783, upload-time = "2025-10-16T22:16:55.906Z" }, - { url = "https://files.pythonhosted.org/packages/42/38/c9b09f3271a7a723a5de69f8e237ab8e7803183131bc57c890db0b6bb872/uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c", size = 4647548, upload-time = "2025-10-16T22:16:57.008Z" }, - { url = "https://files.pythonhosted.org/packages/c1/37/945b4ca0ac27e3dc4952642d4c900edd030b3da6c9634875af6e13ae80e5/uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21", size = 4467065, upload-time = "2025-10-16T22:16:58.206Z" }, - { url = "https://files.pythonhosted.org/packages/97/cc/48d232f33d60e2e2e0b42f4e73455b146b76ebe216487e862700457fbf3c/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88", size = 4328384, upload-time = "2025-10-16T22:16:59.36Z" }, - { url = "https://files.pythonhosted.org/packages/e4/16/c1fd27e9549f3c4baf1dc9c20c456cd2f822dbf8de9f463824b0c0357e06/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e", size = 4296730, upload-time = "2025-10-16T22:17:00.744Z" }, -] - [[package]] name = "validators" version = "0.35.0" @@ -6461,132 +4791,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" }, ] -[[package]] -name = "watchfiles" -version = "1.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/cd/41/5e1a4bb12aac5f1493fa1bdc11154eca3b258ca4eba65d39c473fe19d8e9/watchfiles-1.2.0.tar.gz", hash = "sha256:c995fba777f1ea992f090f9236e9284cf7a5d1a0130dd5a3d82c598cacd76838", size = 108252, upload-time = "2026-05-18T04:32:04.251Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/5a/2bf22ecb24916983bf1cc0095e7dea2741d14d6553b0d6a2ac8bc96eca93/watchfiles-1.2.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bb68bf4df85abebe5efddc53cf2075520f243a59868d9b3973278b23e76962a9", size = 400471, upload-time = "2026-05-18T04:31:08.908Z" }, - { url = "https://files.pythonhosted.org/packages/55/70/dea1f6a0e76607841a60fb51af150e70124864673f61704abb62b90cdcc7/watchfiles-1.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c16cb06dd17d43b9d185094268459eac92c9538356f050e55b54e82cf700e1d4", size = 394599, upload-time = "2026-05-18T04:30:19.845Z" }, - { url = "https://files.pythonhosted.org/packages/18/52/752dcc7dc817baef5e89518732925795ce52e36a683a9a3c9fb68b21504e/watchfiles-1.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a0feab9af4c021c581f695258c642b3d10c5fd4c676e33a0d8606425d82631", size = 455458, upload-time = "2026-05-18T04:30:29.126Z" }, - { url = "https://files.pythonhosted.org/packages/12/48/366ebbb22fcc504c2f72b45f0b7e72f40a18795cc01752c16066d597b67a/watchfiles-1.2.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a16ffe19bf5cf9f5edaa1ad1dd830c5a816e8feec430c522302ab55483a4b994", size = 460513, upload-time = "2026-05-18T04:31:40.85Z" }, - { url = "https://files.pythonhosted.org/packages/ad/44/1f9e1b15e7a729062e0d0c3d0d7225ea4ab98b2267ef87287153be2495fc/watchfiles-1.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:204f299afcbd65918ab78dbc52626b0ae45e9d8cef403fdbf33ecf9e40eac66e", size = 493616, upload-time = "2026-05-18T04:30:58.47Z" }, - { url = "https://files.pythonhosted.org/packages/7e/55/8b1086dcc8a1d6a697a62767bd7ea368e74c61c6fd171683cfe24a3fe5d2/watchfiles-1.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11743adfa510bfffebe97659fb280182b5c9b238708f667e866f308c3430dc19", size = 573154, upload-time = "2026-05-18T04:30:37.903Z" }, - { url = "https://files.pythonhosted.org/packages/14/7a/242f400cc77fafa7b18d53d19d9cb64fc6a6f61f28c55913bae7c674d92a/watchfiles-1.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb72919d93e3a16fc451d3aa3d4b1698423daca1b382d3d959c9ac51297c12a8", size = 467046, upload-time = "2026-05-18T04:30:41.869Z" }, - { url = "https://files.pythonhosted.org/packages/02/c8/79eee650c62d2c186598489814468e389b5def0ebe755399ff645b35b1b2/watchfiles-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62f042afde2dde21ec1d2c1a74361e804673df86f51e418a999c9acfe671b07", size = 457100, upload-time = "2026-05-18T04:31:13.064Z" }, - { url = "https://files.pythonhosted.org/packages/81/36/519f6dbb7a95e4fe7c1513ed25b1520295ef9905a27f1f2226a73892bfb7/watchfiles-1.2.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:027ae72bfdfd254862065d8b3e2a815c6ab9b1853ce41e6648ece84afd34a551", size = 467038, upload-time = "2026-05-18T04:30:32.915Z" }, - { url = "https://files.pythonhosted.org/packages/2f/12/951af6b9f89097e02511122258402cb3578443021930b70cf968d6310dc0/watchfiles-1.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e1cfd51e97e13ff3bd047c140764d277fc9b95b7cb5da59e46a47d167adab310", size = 632563, upload-time = "2026-05-18T04:30:11.539Z" }, - { url = "https://files.pythonhosted.org/packages/28/cc/0cba1f0a6117b7ec117271bdc3cb3a5a252005959755a2c09a745e0942cc/watchfiles-1.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:24b2405c0a46738dd9e1cf7135aa5dbdb9d42d024628651b3b13d5117e99f8df", size = 660851, upload-time = "2026-05-18T04:31:53.186Z" }, - { url = "https://files.pythonhosted.org/packages/d0/f2/26347558cc8bf6877845e66b315f644d03c173906aa09e233a3f4fd23928/watchfiles-1.2.0-cp310-cp310-win32.whl", hash = "sha256:8c520725602756229f045b032a1ff33d7ef0f7404189d62f6c2438cb6d8ef6a1", size = 277023, upload-time = "2026-05-18T04:30:18.825Z" }, - { url = "https://files.pythonhosted.org/packages/6d/68/a5e67b6b68e94f4c1511d61c46c55eba0737583620b6febf194c7b9cc23f/watchfiles-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:03b14855c6f35539e2d95c442ae9530a75762f1e26567152b9ed05f96534a74d", size = 290107, upload-time = "2026-05-18T04:32:09.677Z" }, - { url = "https://files.pythonhosted.org/packages/fc/3d/8024c801df84d1587740d0359e7fdd80afeae3d159011f3d5376dd82f18e/watchfiles-1.2.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:704fd259e332e01f9b9c178f4bce9e49027e5587cc2600eeeaf8e76e1c846201", size = 400242, upload-time = "2026-05-18T04:31:19.014Z" }, - { url = "https://files.pythonhosted.org/packages/87/5b/f4dfd45323e949984a3a7f9dc31d1cbb049921e7d98253488dda72ccdaa9/watchfiles-1.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6543cf55d170003296d185c0af981f3e1311564907e1f4e08671fc7693a890a5", size = 394562, upload-time = "2026-05-18T04:30:08.46Z" }, - { url = "https://files.pythonhosted.org/packages/98/d8/19483ef075d601c409bce8bcbb5c0f81a10876fff870400568f08ce484a1/watchfiles-1.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89d8c2394a065ca86f5d2910ff263ae67c127e1376ccc4f9fc35c71db879f80a", size = 456611, upload-time = "2026-05-18T04:30:45.723Z" }, - { url = "https://files.pythonhosted.org/packages/b1/6a/cc81fbe7ee42f2f22e661a6e12def7807e01b14b2f39e0ff83fd373fd307/watchfiles-1.2.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:772b80df316480d894a0e3165fdd19cf77f5d17f9a787f94029465ad0e3529d1", size = 461379, upload-time = "2026-05-18T04:31:29.292Z" }, - { url = "https://files.pythonhosted.org/packages/b1/57/7e669002082c0a0f4fb5113bb70125f7110124b846b0a11bc5ae8e90eac1/watchfiles-1.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d158cd89df6053823533e06fb1d73c549133bff5f0396170c0e53d9559340717", size = 493556, upload-time = "2026-05-18T04:30:05.44Z" }, - { url = "https://files.pythonhosted.org/packages/45/7d/f60a2b19807b21fe8281f3a8da4f59eef0d5f96825ac4680ba2d4f2ebf91/watchfiles-1.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d516b3283a758e087841aedb8031549fb41ced08f3db10aa6d2bf32dc042525b", size = 575255, upload-time = "2026-05-18T04:30:40.568Z" }, - { url = "https://files.pythonhosted.org/packages/bd/49/77f5b5e6efbcd57482f74948ebb1b97e5c0046d6b61475042d830c84b3ff/watchfiles-1.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:53b2290c92e0506d102cd448fbc610d87079553f86caa39d67440856a8b8bba5", size = 467052, upload-time = "2026-05-18T04:31:17.942Z" }, - { url = "https://files.pythonhosted.org/packages/ee/5a/73e2959af1b97fd5d556f9a8bdba017be23ceeef731869d5eaa0a753d5a3/watchfiles-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a711b51aec4370d0dcda5b6c09463206f133a5759341d7744b953a7b62e1100e", size = 456858, upload-time = "2026-05-18T04:30:30.182Z" }, - { url = "https://files.pythonhosted.org/packages/50/57/1bc8c27fad7e6c19bddee15d276dbb6ab72480ec01c127afff1673aee417/watchfiles-1.2.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:e2ca07fa7d89195ec0865d3d285666286740bfa83d83e5cee204043a31ecc165", size = 467579, upload-time = "2026-05-18T04:32:15.897Z" }, - { url = "https://files.pythonhosted.org/packages/09/6c/3c2e44edba3553c5e3c3b8c8a2a6dee6b9e12ae2cf4bd2378bebf9dc3038/watchfiles-1.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e0618518f282c4ebff60f5e5b1247b6d91bb8b9f4476947563a1e74acc66f3c6", size = 633253, upload-time = "2026-05-18T04:31:37.123Z" }, - { url = "https://files.pythonhosted.org/packages/30/c2/d8c84a882ab39bbefcc4915ab3e91830b7a7e990c5570b0b69075aba3faf/watchfiles-1.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d191c054d0715c3c95c99df9b8dbf6fd096d8c1e021e8f212e1bd8bc444ccb5", size = 660713, upload-time = "2026-05-18T04:31:24.62Z" }, - { url = "https://files.pythonhosted.org/packages/a9/07/f97736a5fc605364fe67b25e9fa4a6965dfd4840d50c406ada507e9d735f/watchfiles-1.2.0-cp311-cp311-win32.whl", hash = "sha256:9342472aff9b093c5acd4f6d8f70ae0937964ab56542502bcf5579782da69ae8", size = 277222, upload-time = "2026-05-18T04:31:21.131Z" }, - { url = "https://files.pythonhosted.org/packages/cf/99/2b04981977fc2608afd60360d928c6aecf6b950292ca221d98f4005f6694/watchfiles-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:dbd6c97045dad81227c8d040173da044c1de08de64a5ea8b555da4aee1d5fa22", size = 290274, upload-time = "2026-05-18T04:31:45.966Z" }, - { url = "https://files.pythonhosted.org/packages/3c/74/f7f58a7075ee9cf612b0cfcddb78b8cd8234f0742d6f0075cf0da2dde1c6/watchfiles-1.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:57a2d9fa4fb4c2ecae57b13dfff2c7ab53e21a2ba674fe9f05506680fcdcc0d7", size = 283460, upload-time = "2026-05-18T04:31:39.126Z" }, - { url = "https://files.pythonhosted.org/packages/b8/2f/e42c992d2afda3108ea1c02acecc991b9f31d05c14adc2a7cee9ee211fc4/watchfiles-1.2.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:bc13eb17538be00c874699dc0abe4ee2bc8d50bb1166a6b9e175ef3fd7eb8f26", size = 400115, upload-time = "2026-05-18T04:32:02.06Z" }, - { url = "https://files.pythonhosted.org/packages/5f/8f/6af2ea19065c91d8b0ea3516fdfc8c0d349f407e8e9fbf4e5a17360de8ad/watchfiles-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d95ddc1eb6914154253d239089900813f6a767e174b8e6a50e7fdacb7e4236c", size = 393659, upload-time = "2026-05-18T04:30:50.951Z" }, - { url = "https://files.pythonhosted.org/packages/13/01/b32a967c56fb3e3e5be3db52c3d3b87fa4513aa367d8ed1ad96d42952e5f/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f70d8b291ef6e88d19b1f297a6905ddb978888d9272b0d05e6f53309856bcfc", size = 453207, upload-time = "2026-05-18T04:31:04.231Z" }, - { url = "https://files.pythonhosted.org/packages/04/98/97557a812180338cb1abd32e1cffcc4588f59b5f23e0cb006b2ba95ba64a/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56d8641cf834c2836922899105bd3ce3d0dfc69291d52edf0b4d0436829b34c0", size = 459273, upload-time = "2026-05-18T04:31:50.377Z" }, - { url = "https://files.pythonhosted.org/packages/e8/a8/b4b08dcb7653b8087c6586f7ce649505900e866bbcfe40dc9587af02e686/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2581a94056e55d7d0a31a823ea92bf73749c489ca2285bfdc0fbe6b2bb49d50c", size = 489927, upload-time = "2026-05-18T04:31:42.485Z" }, - { url = "https://files.pythonhosted.org/packages/50/94/3dceea03545d2e5ddfd839f0ddd5e1cecbf1697b5a428d5ba11cef6af95d/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41bc1199f7523b3f82843c88cbb979180c949caef0342cf90968f178e5d49b01", size = 570476, upload-time = "2026-05-18T04:31:03.071Z" }, - { url = "https://files.pythonhosted.org/packages/cc/f2/d39a5450c3532092b91f81d274360e613c2371bc874a89c7a1a3c5e8d138/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7571e4464cb6e434958f867f7f730b8ab0b75e3f8e5eac0499168486ab3c33a8", size = 465650, upload-time = "2026-05-18T04:30:12.701Z" }, - { url = "https://files.pythonhosted.org/packages/22/24/ed72f68cbc1333ca9b9f2200aa048bb6658ae41709bc1caad4310f4bdffd/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e53a384f76b631c3ae5334ce6a52f0baa3a911eb94a4eac7f160079868b716d5", size = 456398, upload-time = "2026-05-18T04:30:13.784Z" }, - { url = "https://files.pythonhosted.org/packages/0d/64/982ef4a4e5bab5b6e5b6becc8cd5e732f6130a78b855f0abec6439a9a135/watchfiles-1.2.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:d20029a60a71a052a24c4db7673bc4de39ab89adbaccbfb5d67987c5d73f424d", size = 465140, upload-time = "2026-05-18T04:31:52.111Z" }, - { url = "https://files.pythonhosted.org/packages/a0/0c/95282abf4ed680b6096010bcfc30c5fa7a041fc5aa5a2ad17a2cc6c75bba/watchfiles-1.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2cb93af48550faf1cea04c303107c8b75833de7013e57ce27d3b8d21d8d0f58c", size = 630259, upload-time = "2026-05-18T04:31:25.676Z" }, - { url = "https://files.pythonhosted.org/packages/30/45/607c1de1530c4bdcf2cf1d1ecc2505ddba5d96bd43ba9f2b0e79876f850f/watchfiles-1.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2995c176de7692b86a2e4c58d9ec718f753150a979cb4a754e2b4ffa38e70906", size = 659859, upload-time = "2026-05-18T04:30:24.333Z" }, - { url = "https://files.pythonhosted.org/packages/fa/08/d9e2e0f9e8e6791d33aefc694ad7eefa7f901f63caff84a81ded38692f9c/watchfiles-1.2.0-cp312-cp312-win32.whl", hash = "sha256:7a2cffd17d27d2ecbb310c2b1d8174f222a5495b1a721894afa88ec11e25b898", size = 275480, upload-time = "2026-05-18T04:30:31.307Z" }, - { url = "https://files.pythonhosted.org/packages/1c/e6/9d42569c0102645cc8cea5d8c7d8a1e9d4ada2cb7f05f75e554b8aa2202a/watchfiles-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:f155b3a1b2a5fc89cdc70d47ee5d54e3b75e88efa34982028a35daef9ba00379", size = 288718, upload-time = "2026-05-18T04:32:10.745Z" }, - { url = "https://files.pythonhosted.org/packages/0a/26/88e0dc6ee3898169d7fa22bb6a69cabf2502d2ee25cb8c876d1262d204f8/watchfiles-1.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:8fa585ede612ee9f9e91b18bebf9ba11b9ae29a4e3a0d0cf6fca3e382133f0d5", size = 281026, upload-time = "2026-05-18T04:30:22.23Z" }, - { url = "https://files.pythonhosted.org/packages/d1/4d/70a7feced9f87e2ff26dba42667290f41694fc64646c67261fbb8cab5d5c/watchfiles-1.2.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:01ea8d66f0693b9b60a6541c8d10263091ca9a9060d242f3c1f3143f9aad2c98", size = 399730, upload-time = "2026-05-18T04:31:38.162Z" }, - { url = "https://files.pythonhosted.org/packages/31/3a/0da302f2307aee316922806ebd5726c542cbd787c938271cf14a074c7daf/watchfiles-1.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ba0480b9a74af058f43b337e937a451e109295c420916d68ad24e3dc02f5e44", size = 392842, upload-time = "2026-05-18T04:30:27.051Z" }, - { url = "https://files.pythonhosted.org/packages/db/ef/d5bdb705c224dbc256aa0c1ec47bf4e61ec52558f2afb44a71a1fe4d7015/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f34e26a19f91f710c08e0183429f0d1d15df734e6bc78c31e77b9ea9c433658", size = 452989, upload-time = "2026-05-18T04:31:11.945Z" }, - { url = "https://files.pythonhosted.org/packages/71/29/5495f2c1661949ef7a35e4d71111d129cfe7606414a26887a919d0a55406/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b4e77f6a55f858504069abd35d336a637555c09bca453dde1ee1e5ada8a6a1fb", size = 458978, upload-time = "2026-05-18T04:30:52.606Z" }, - { url = "https://files.pythonhosted.org/packages/d5/8c/7f9c07c433811c2fffd93e13fdfb7135de9aab5f2ae41be08960fa0047dc/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cb4d80e212f116474a545c21c912b445f16bb0cef9e6a73a498164223e14e2f", size = 490248, upload-time = "2026-05-18T04:31:36.003Z" }, - { url = "https://files.pythonhosted.org/packages/3c/11/d93632febc52fbc21be90231bb7c17fd5387f46c9076fd40a5f9c2ae6910/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b974946a10af379d425e2eef5b62f5c6ebeaccf91d45eaad6f5b27ecd4f91aa0", size = 571847, upload-time = "2026-05-18T04:31:10.862Z" }, - { url = "https://files.pythonhosted.org/packages/55/b4/383173e73aabb07ad1d9c7aa859d95437ac46a6d6a1e11005facda0c9d19/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86bc13c25a8d1fcd70b51d0ce7c9b65e90de5666fcbfd3e34957cc73ee19aeb5", size = 465974, upload-time = "2026-05-18T04:30:17.006Z" }, - { url = "https://files.pythonhosted.org/packages/a7/6c/89b1a230a78f57c52dd8893adb1f92f94411721b6ec12596c56d98c74356/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca148d73dea36c9763aaa351e4d7a51780ec1584217c45276f4fe8239c768b71", size = 454782, upload-time = "2026-05-18T04:30:35.656Z" }, - { url = "https://files.pythonhosted.org/packages/24/62/1732118367cfff0a9fce3bf62ff4bfded09ef5df21d9d446b858b3f70a96/watchfiles-1.2.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:c525543d91961c6955b2636b308569e84a1d1c5f5f2932041ab9ef46422f43e3", size = 465182, upload-time = "2026-05-18T04:30:20.846Z" }, - { url = "https://files.pythonhosted.org/packages/28/96/716f7e5f51339bf22963f3345f9f27d7f3b30e2eadc597e257c881dd3c53/watchfiles-1.2.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:a204794696ffb8f9b10fba6f7cb5216d42f3b2b71860ccac6b6e42f5f10973b0", size = 629841, upload-time = "2026-05-18T04:31:05.397Z" }, - { url = "https://files.pythonhosted.org/packages/4c/fe/c40783950fd771ccf66ab3ec2722d188a9af1c7f96c6e811f36e40c6e03f/watchfiles-1.2.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:10d86db20695afe7997ac9e1717637d6714a8d0220458c33f3d2061f54cec427", size = 658028, upload-time = "2026-05-18T04:31:48.22Z" }, - { url = "https://files.pythonhosted.org/packages/71/72/4508db1856d1d87fcbb3b63f4839bab1b5682cb0e8d224d122263c09654a/watchfiles-1.2.0-cp313-cp313-win32.whl", hash = "sha256:eb283ee99e21ad6443c8cdb06ac5b34b1308c329cbdf03fa02b445363714c799", size = 275183, upload-time = "2026-05-18T04:30:59.57Z" }, - { url = "https://files.pythonhosted.org/packages/f9/36/14b76ca57652e5cc5fd1c11f32a261292c08a0d19a00351013c2549cbfb2/watchfiles-1.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:a0f27f01bee51861392bb6b7c4fdb290b27d1eb194e9e28788d68102a0e898d9", size = 288059, upload-time = "2026-05-18T04:32:07.937Z" }, - { url = "https://files.pythonhosted.org/packages/1b/8d/0a85e395398d8d20fadfe5c5d32c726eee17a519e78fb356f2cf7531bffe/watchfiles-1.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:3651aa7058595e9cfb75d35dd5ada2bf9f48a5b8a0f3562821d3e210c507e077", size = 280186, upload-time = "2026-05-18T04:31:54.484Z" }, - { url = "https://files.pythonhosted.org/packages/37/68/36db056f1fdcc5f07302f56e631774d6835bcd6fa3ace402304621d5f9e5/watchfiles-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:faea288b6f0ab1902ef08f4ca6de005dccf856c4e0c4f21b8c5fce02d90a1b08", size = 399031, upload-time = "2026-05-18T04:30:44.576Z" }, - { url = "https://files.pythonhosted.org/packages/c1/64/01a9d6f66a82a5c101ce939274106cc72759d62427e153f01edd2b9f87c2/watchfiles-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01859b11fd9fbca670f4d5da00fbac282cfea9bd67a2125d8b2833a3b5617ea9", size = 391205, upload-time = "2026-05-18T04:30:25.413Z" }, - { url = "https://files.pythonhosted.org/packages/84/2c/0a44fe058cb4bb7b8ede6b6670698bbb7c0400740e378d00022189b7b31d/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fff610d7bb2256a317bb1e96f0d7862c7aa8076733ee5df0fd41bbe76a24a4f4", size = 451892, upload-time = "2026-05-18T04:32:14.005Z" }, - { url = "https://files.pythonhosted.org/packages/67/a1/351e0d56cd35e6488b5c8b4fb11a809a5bc923e8fe8fed9faf8920be0c89/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b141a4891c995a039cd89e9a49e62df1dc8a559a5d1a6e4c7106d16c12777a55", size = 458867, upload-time = "2026-05-18T04:31:22.279Z" }, - { url = "https://files.pythonhosted.org/packages/d5/7d/9d09605187f1b838998624049fcf8bf47b73c1a3b76901fcac1782f62277/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f22943b7770483f6ea0721c6b11d022947a98eb0acae14694de034f4d0d38925", size = 490217, upload-time = "2026-05-18T04:31:43.657Z" }, - { url = "https://files.pythonhosted.org/packages/60/5d/a17a16eccb182f04188cd308ec24b1a71a9b5c4e7098269cf35d9fa56d02/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1bc6195825b7dcd217968bb1f801a60fd4c16e8eeab5bedc7fe917d7d5995ab4", size = 571458, upload-time = "2026-05-18T04:32:11.875Z" }, - { url = "https://files.pythonhosted.org/packages/d3/3d/4dd457062083ab1938e5dfd45032eb425cee2ac817287ca8ff4356183e5d/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4a4b147f5dca2a5d325a06a832fb43f345751adfbc63204aec30e0d9ca965a2", size = 464707, upload-time = "2026-05-18T04:30:43.492Z" }, - { url = "https://files.pythonhosted.org/packages/c6/71/ea8c57b128f5383de74d0c7d2d9c57ad7c9a65a930c451bd25d524b295b7/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4543579a9bdb0c9560039b4ffddbdb39545707659fbc430ce4c10f3f68d557f9", size = 454663, upload-time = "2026-05-18T04:30:16.061Z" }, - { url = "https://files.pythonhosted.org/packages/53/fd/2e812bf938406d7db351f0703ddd3fc6c061cf30d96153a77bc79a943a44/watchfiles-1.2.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:20aa0e708b920bde876a4aa82dc7dd6ebea228a63a67cda6632c2fc87b787efa", size = 463537, upload-time = "2026-05-18T04:31:44.9Z" }, - { url = "https://files.pythonhosted.org/packages/86/56/d17a7f1dd1bc3035f1072694a551301272f1739c2d8e319c927cb9e29b38/watchfiles-1.2.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:d413349d565dab74297f2a63e84a097936be69bf8f3b3801f27f380e32040f44", size = 629194, upload-time = "2026-05-18T04:31:14.141Z" }, - { url = "https://files.pythonhosted.org/packages/be/06/f1ff66bf5cae50aa4062779a0ecd0bbaf15e466195719074078947d9a17d/watchfiles-1.2.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f28b2725eb8cce327b9b3ab02415c853011dc55c95832fe90de6bc56f5315f72", size = 656194, upload-time = "2026-05-18T04:31:47.14Z" }, - { url = "https://files.pythonhosted.org/packages/e7/54/a9c7ea9a82a4ac65e7004c0a03920b5cdd2f9c3b678757d9cd425aa51d53/watchfiles-1.2.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:b8c8358484d5fa12ef34f05b7f4168eaf1932f408725ff6d023c33ec17bd79d4", size = 400205, upload-time = "2026-05-18T04:32:05.153Z" }, - { url = "https://files.pythonhosted.org/packages/aa/5d/c9ab3534374a4a67450696905d6ef16a04405448b8dc52bd752ae50423d4/watchfiles-1.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f04b092229ad2c50126dd3c922c8822e51e605993764a33058d4a791ab42281", size = 392508, upload-time = "2026-05-18T04:30:54.849Z" }, - { url = "https://files.pythonhosted.org/packages/26/ca/1ad30103535cf0cecd7b993e8d50edc5351b1820e38f2d22e3df58962feb/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a7ce236284f002a156f70add88efe5c70879cccbb658be0822c54b1306fc09d", size = 452448, upload-time = "2026-05-18T04:30:53.727Z" }, - { url = "https://files.pythonhosted.org/packages/37/a1/ceee2cdf2afbd715fa07758d39c9859513eae411b23196f7fd039e5feedd/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b9909cc2b48468b575eefa944919e1fe8a36c5849d5c7c168f80a8c1db69398e", size = 459605, upload-time = "2026-05-18T04:30:23.312Z" }, - { url = "https://files.pythonhosted.org/packages/e8/f6/421e30fd1cb3907a84ed92ab3f1983e37ba2dca015e9a894a048418417a2/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a37faaed405c67e28e6be45a1fa4f206ef5a2860f27c237db9fa30704c38242", size = 490757, upload-time = "2026-05-18T04:30:47.358Z" }, - { url = "https://files.pythonhosted.org/packages/41/b0/55ed1b97ed08be7bba6f9a541cac15f2a858e1d74d2b07b6da70a82aab00/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9649193aa27bd9ff2e80ff29bfaa93085496c7a3a377592823cc58b77ee88add", size = 568672, upload-time = "2026-05-18T04:30:38.915Z" }, - { url = "https://files.pythonhosted.org/packages/d1/cf/d8ae8a80dd7bafab395ea7681c10237311bbf34d37704a8c744e7cf31fc7/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e4ff8e37f99cf1da89e255e07c9c4b37c214038c4283707bdec308cb1b0ea1f", size = 464197, upload-time = "2026-05-18T04:30:09.914Z" }, - { url = "https://files.pythonhosted.org/packages/7c/8a/3076c496ca8dafe0e8cd03fcebdfc47be4b1174b4e5b24ff6e396e6b3af2/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:054dc20fd2e3132b4c3883b4a00d72fd6e1f56fdaf89fccd12e8057d74cd74d7", size = 453181, upload-time = "2026-05-18T04:30:14.829Z" }, - { url = "https://files.pythonhosted.org/packages/e5/10/9745e17c98e7b8a86454df0a3c7b5686bd650383f1e9f26e4ebcbd6cc0c0/watchfiles-1.2.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:e140ed30ebde76796b686e67c182cff10ea2fbab186fafd1560f74bb5a473a6e", size = 465109, upload-time = "2026-05-18T04:30:28.123Z" }, - { url = "https://files.pythonhosted.org/packages/8f/95/8ef4a95481d3e0cb52d62a06fa6e972e81424be2d9698b91a2fecca9904c/watchfiles-1.2.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:bb7e52ecf68ba46d22df23467b87cffeb2146908aa523ebfe803019618cfda06", size = 630653, upload-time = "2026-05-18T04:31:49.304Z" }, - { url = "https://files.pythonhosted.org/packages/fd/e4/3b3bf36b0f829b50c6ebcb8d031583863c59f923d6a6af3d485e470d0fac/watchfiles-1.2.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:23282a321c8baf9b3a3c4afff673f9fe65eb7fdc2338d765ccad9d3d1916a5ba", size = 657838, upload-time = "2026-05-18T04:31:06.497Z" }, - { url = "https://files.pythonhosted.org/packages/21/b1/6cbbb50c1f3002ab568777d44aa21206dfb8807a840990c4037523b51812/watchfiles-1.2.0-cp314-cp314-win32.whl", hash = "sha256:c0db965c5f79aa49fe672d297cf1febc5ad149b658594944f49a54a2b96270a7", size = 275108, upload-time = "2026-05-18T04:30:06.891Z" }, - { url = "https://files.pythonhosted.org/packages/92/45/190ce6db8dcb4536682cf75d3889ff1a27182a58cb519d343cb6d9ea63d8/watchfiles-1.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:71283b39fd17e5408eb123bd37aeecfd9d54c81fc184421943208aadb879d103", size = 288441, upload-time = "2026-05-18T04:32:12.901Z" }, - { url = "https://files.pythonhosted.org/packages/74/0d/3eae1c2313ab08378431d907c3f8095ecca00f3eda33111cf4f0f2591799/watchfiles-1.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:c5c19526f4e54a00f2666a6c0e9e40d582c09e865055ea7378bf0009aab857b3", size = 280684, upload-time = "2026-05-18T04:31:26.902Z" }, - { url = "https://files.pythonhosted.org/packages/b1/75/fb64e6c25d6b5ca636d03df34ffb1c6e9873303e76d27967e045f8df088f/watchfiles-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:d73a585accffa5ae39c17264c36ec3166d2fad7000c780f5ef83b2722afb9dd2", size = 398857, upload-time = "2026-05-18T04:32:17.108Z" }, - { url = "https://files.pythonhosted.org/packages/73/4e/9f7adf01754cbf81843722ccfec169d8f26c69778281a302855cecd2ee08/watchfiles-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ae99b14c5f21e026e0e9d96f40e07d8570ebee6cafd9d8fc318354606daa7a28", size = 392413, upload-time = "2026-05-18T04:31:07.911Z" }, - { url = "https://files.pythonhosted.org/packages/47/c8/bec626bcc2d69f44b9acb24ce7d60ed7b16b73628eea747fcbd169d8edda/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4429f3b105524a10b72c3a819b091c495d2811d419c1e1e8df773a5a5974f831", size = 452409, upload-time = "2026-05-18T04:31:20.142Z" }, - { url = "https://files.pythonhosted.org/packages/00/b7/b6362068e81e7c556d155a34c35d40ac3ef42d747b06d7f6e5bf58e359c2/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:43d818978d06062d9b22c4fab2ebe44cf5213d42dc8e62bda8c2760cfa2eeb33", size = 458827, upload-time = "2026-05-18T04:32:06.219Z" }, - { url = "https://files.pythonhosted.org/packages/67/f8/9a813fa42afb1e0b4625e75f0479826644d3ee8dc287e093799bc01f390c/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b9f732dc58b2dbe69e464ccf8fff7a03b0dd0be439da4c0720d3558527d3d6b4", size = 490104, upload-time = "2026-05-18T04:31:56.034Z" }, - { url = "https://files.pythonhosted.org/packages/2f/bf/27dfb6094ca4c9aad21298b5525b6c53cb36121ee454331d05161e58d130/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f200104103feb097de4cab8fe4f5dd18a2026934c7dea98c55a2f5fd6d5a33b", size = 571360, upload-time = "2026-05-18T04:31:57.133Z" }, - { url = "https://files.pythonhosted.org/packages/fb/39/44a096d67270ea93df91d33877dbe91fbda3aa4f8ec2edf799d93eda8736/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63ac26eefbf4af1741247d6fb68b11c49a25b2f7413fbd318a83a12aaa9cf666", size = 464644, upload-time = "2026-05-18T04:30:57.33Z" }, - { url = "https://files.pythonhosted.org/packages/0e/80/c7472203bad6268e3ef1ad260739704847898938ad7ea8b63a5131f46b50/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c4997d4e4a55f0d02b6cde327322daf3a0400e5df6c6b15948994bf72497925", size = 454771, upload-time = "2026-05-18T04:30:48.736Z" }, - { url = "https://files.pythonhosted.org/packages/51/cf/3b10b268b4b7f0fc26e9debb5eef1998b515887840f444cd3ec80c688755/watchfiles-1.2.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4c887eba18b7945ac73067a8b4a66f21cd46c2539b2bc68588f7be6c7eb6d26b", size = 463494, upload-time = "2026-05-18T04:31:33.826Z" }, - { url = "https://files.pythonhosted.org/packages/3d/3e/a4302545cd589262a0dc7d140e86f7688eba3f9c72776c27f7e23b8864c4/watchfiles-1.2.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:3416ff151bb6b5a8d8d11664974fbef4d9305b9b2957839ab5a270468fd8df30", size = 629383, upload-time = "2026-05-18T04:31:15.596Z" }, - { url = "https://files.pythonhosted.org/packages/db/99/d5649df0a9a410d45b7c882304d0b790903ac9b6e8f2cfd12114e0c6b9f2/watchfiles-1.2.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:0e831a271c035d89789cffc386b6aa1375f39f1cd25eb7ca0997e4970d152fc5", size = 656093, upload-time = "2026-05-18T04:31:58.707Z" }, - { url = "https://files.pythonhosted.org/packages/92/b9/362702539275019a54dd2e94511b31a9b89c5f9e6a21966de7eb692549fc/watchfiles-1.2.0-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:37a6721cdf3f65dbb13aa9503510ccb4451603ac837e44d265d7992a597e1374", size = 400109, upload-time = "2026-05-18T04:31:16.879Z" }, - { url = "https://files.pythonhosted.org/packages/8f/75/71d5ba62db781e5587bded1d944c675374bc4aa37ff33d5018d98e8b6538/watchfiles-1.2.0-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:2b37d10b5a63bd4d87e18472d80fa525bd670586fae62e5dd580452764879b65", size = 392167, upload-time = "2026-05-18T04:31:28.058Z" }, - { url = "https://files.pythonhosted.org/packages/3c/01/c66dd95d0423fe30d31820e2d1d5bda773764131bbb6ac0cb1cf303ac328/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a105bc2283f67e8fbec74253ec2d94925de92ed72c0393f1206bf326b7b7b69", size = 452372, upload-time = "2026-05-18T04:31:00.836Z" }, - { url = "https://files.pythonhosted.org/packages/91/15/2fe99557e72f85627c6a8eed50d889e8d101623e060a22ad75b875cb932d/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5327989a465505f05cfe06f04fa9d0c2fd5432bb243e10e6f012b1bdca3c8579", size = 459596, upload-time = "2026-05-18T04:31:34.96Z" }, - { url = "https://files.pythonhosted.org/packages/ed/23/d4acfa0023367428ed48351b3b9b267893037b6cadae55620c61c24bcfd4/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecb47f183a8025b2aa18b546725c3657e542112ae9c0613a2af79b4fa8d04ad7", size = 490869, upload-time = "2026-05-18T04:31:59.923Z" }, - { url = "https://files.pythonhosted.org/packages/a4/5f/3164cbdce06c9fb95c4f7b9e2f9760b5e2797af43a9ecc317ef42a23a278/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8520a4ab0e37f770afc34459c4f8f7019e153f9124dc101c15538365875d1ab2", size = 571641, upload-time = "2026-05-18T04:32:00.948Z" }, - { url = "https://files.pythonhosted.org/packages/41/e6/85d3731c55e65cd7690f3f803d24c139588aaf863e4bf2148fe7a7fa1a19/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71cd71740ed2c15211ebb237ced4e39a1cdf6f80566e5fe95428da1626f4fde6", size = 464444, upload-time = "2026-05-18T04:30:34.298Z" }, - { url = "https://files.pythonhosted.org/packages/f4/7d/562641012b8b09872742c3b8adf9629ec479fd78f8d68ae4a0c13da8add6/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f88af53d6ddaf72179ef613ddc905e6f4785f712b49b80b3bef9f3525e6194b4", size = 453593, upload-time = "2026-05-18T04:31:23.464Z" }, - { url = "https://files.pythonhosted.org/packages/56/fe/cb8ef3d6f929d14158fdaaad9925985b7310abc9384dcd4d82dd0016fb59/watchfiles-1.2.0-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:cee9d5efd929efdac5f7e58f72b3376f676b64050a91c5b99a7094c5b2317488", size = 465096, upload-time = "2026-05-18T04:31:30.384Z" }, - { url = "https://files.pythonhosted.org/packages/25/91/80908e835e100527a9267147b08c0eee1fa6ab0ffec15edc04d1d44885f7/watchfiles-1.2.0-cp315-cp315-musllinux_1_1_aarch64.whl", hash = "sha256:b718bf356bbc15e559bd8ef41782b573b8ae0e3f177ab244b440568d7ea02cfb", size = 630638, upload-time = "2026-05-18T04:30:49.89Z" }, - { url = "https://files.pythonhosted.org/packages/46/4b/95ab2f256bb4af3cb2eb23b9317bda984ee6e0f11733a5c004a6c95b06e3/watchfiles-1.2.0-cp315-cp315-musllinux_1_1_x86_64.whl", hash = "sha256:922c0e019fe68b3ae392965a766b02a71ba1168c932cebc3733cd52c5fe5b377", size = 657684, upload-time = "2026-05-18T04:31:32.027Z" }, - { url = "https://files.pythonhosted.org/packages/23/f4/7513ef1e85fc4c6331b59479d6d72661fc391fbe543678052ac72c8b6c19/watchfiles-1.2.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:4674d49eb94706dfe666c069fc0a1b646ffcf920473492e209f6d5f60d3f0cc2", size = 403050, upload-time = "2026-05-18T04:30:36.753Z" }, - { url = "https://files.pythonhosted.org/packages/27/0b/a54103cfd732bb703c7a749222011a0483ef3705948dae3b203158601119/watchfiles-1.2.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:094b9b70103d4e963499bdea001ee3c2697b144cd9ae6218a62c0f89ec9e31db", size = 396629, upload-time = "2026-05-18T04:32:03.268Z" }, - { url = "https://files.pythonhosted.org/packages/5e/2c/73f31a3b893886206c3f54d73e8ad8dee58cdb2f69ad2622e0a8a9e07f4e/watchfiles-1.2.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0ef001f8c25ad0fa9529f914c1600647ecd0f542d11c19b7894768c67b6acb7", size = 457318, upload-time = "2026-05-18T04:31:01.932Z" }, - { url = "https://files.pythonhosted.org/packages/e9/f9/45d021e4a5cc7b9dd567f7cbb06d3b75f751a690063fb6cc7ec60f4e46b7/watchfiles-1.2.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a88fc94e647bc4eec523f1caa540258eb71d14278b9daf72fa1e2658a98df0f0", size = 457771, upload-time = "2026-05-18T04:30:56.331Z" }, -] - -[[package]] -name = "websocket-client" -version = "1.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2c/41/aa4bf9664e4cda14c3b39865b12251e8e7d239f4cd0e3cc1b6c2ccde25c1/websocket_client-1.9.0.tar.gz", hash = "sha256:9e813624b6eb619999a97dc7958469217c3176312b3a16a4bd1bc7e08a46ec98", size = 70576, upload-time = "2025-10-07T21:16:36.495Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616, upload-time = "2025-10-07T21:16:34.951Z" }, -] - [[package]] name = "websockets" version = "16.0" @@ -6997,19 +5201,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/4d/4b880086bd0d3e034d25647be1d830afc3e3f610e98c4ab3490af6b1b6d5/yarl-1.24.2-py3-none-any.whl", hash = "sha256:2783d9226db8797636cd6896e4de81feed252d1db72265686c9558d97a4d94b9", size = 53576, upload-time = "2026-05-19T21:31:03.909Z" }, ] -[[package]] -name = "youtube-transcript-api" -version = "1.2.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "defusedxml" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/60/43/4104185a2eaa839daa693b30e15c37e7e58795e8e09ec414f22b3db54bec/youtube_transcript_api-1.2.4.tar.gz", hash = "sha256:b72d0e96a335df599d67cee51d49e143cff4f45b84bcafc202ff51291603ddcd", size = 469839, upload-time = "2026-01-29T09:09:17.088Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/be/95/129ea37efd6cd6ed00f62baae6543345c677810b8a3bf0026756e1d3cf3c/youtube_transcript_api-1.2.4-py3-none-any.whl", hash = "sha256:03878759356da5caf5edac77431780b91448fb3d8c21d4496015bdc8a7bc43ff", size = 485227, upload-time = "2026-01-29T09:09:15.427Z" }, -] - [[package]] name = "zipp" version = "4.1.0" From acecde49003e7e580cf8673c86f5f8a3bb8742ca Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:41:23 -0500 Subject: [PATCH 021/287] fix: address remote review findings --- src/extended_data/connectors/registry.py | 4 --- src/extended_data/containers/mappings.py | 40 ++++++++++++++++++++++-- tests/connectors/test_cli.py | 1 + tests/core/test_containers.py | 8 +++++ 4 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 2fd9fbb..43f441e 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -70,7 +70,6 @@ class ConnectorInfo: class_name: str | None module: str | None base_url: str | None - api_key_env: str | None description: str | None error: str | None @@ -87,7 +86,6 @@ def as_dict(self) -> dict[str, Any]: "class": self.class_name, "module": self.module, "base_url": self.base_url, - "api_key_env": self.api_key_env, "description": self.description, "error": self.error, } @@ -297,7 +295,6 @@ def _available_connector_info(name: str, cls: builtins.type[VendorConnectorBase] class_name=cls.__name__, module=cls.__module__, base_url=getattr(cls, "BASE_URL", None), - api_key_env=getattr(cls, "API_KEY_ENV", None), description=_get_description(cls), error=None, ) @@ -318,7 +315,6 @@ def _missing_builtin_connector_info(name: str, error: ImportError | None) -> Con class_name=spec.class_name, module=spec.module_path, base_url=None, - api_key_env=None, description=None, error=str(error) if error else "Connector class could not be loaded.", ) diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index 3683898..614be6a 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -3,8 +3,12 @@ from __future__ import annotations from collections import UserDict -from collections.abc import Mapping -from typing import Any +from collections.abc import Iterable, Mapping +from typing import TYPE_CHECKING, Any, overload + + +if TYPE_CHECKING: + from _typeshed import SupportsKeysAndGetItem from extended_data.primitives.mappings import ( all_values_from_map, @@ -24,7 +28,7 @@ class ExtendedDict(UserDict[str, Any]): def __init__(self, initialdata: Mapping[str, Any] | None = None, **kwargs: Any) -> None: """Initialize the extended dictionary.""" super().__init__() - self.update(dict(initialdata or {}, **kwargs)) + self.update(initialdata or {}, **kwargs) def __setitem__(self, key: str, item: Any) -> None: """Set a value while preserving extended nested containers.""" @@ -32,6 +36,36 @@ def __setitem__(self, key: str, item: Any) -> None: self.data[key] = extend_data(item) + @overload + def update(self, other: SupportsKeysAndGetItem[str, Any], /) -> None: ... + + @overload + def update(self, other: SupportsKeysAndGetItem[str, Any], /, **kwargs: Any) -> None: ... + + @overload + def update(self, other: Iterable[tuple[str, Any]], /) -> None: ... + + @overload + def update(self, other: Iterable[tuple[str, Any]], /, **kwargs: Any) -> None: ... + + @overload + def update(self, **kwargs: Any) -> None: ... + + def update(self, *args: Any, **kwargs: Any) -> None: # type: ignore[misc] + """Update values while preserving extended nested containers.""" + if len(args) > 1: + msg = f"update expected at most 1 argument, got {len(args)}" + raise TypeError(msg) + + if args: + other = args[0] + items = other.items() if hasattr(other, "items") else other + for key, value in items: + self[key] = value + + for key, value in kwargs.items(): + self[key] = value + def deep_merge(self, *mappings: Mapping[str, Any]) -> ExtendedDict: """Return a deeply merged copy.""" from extended_data.containers.factory import extend_data, to_builtin diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 811cea5..61f53f7 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -34,6 +34,7 @@ def test_cli_list_json(): output = mock_write.call_args.args[0] assert '"name": "github"' in output assert '"available":' in output + assert "api_key_env" not in output def test_cli_info(): diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 9bc5805..7df1259 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -41,12 +41,20 @@ def test_extended_dict_promotes_nested_values_on_mutation() -> None: value["owner"] = "platform" value.update({"ports": [8080, "9090"]}) + value.update([("metadata", {"tier": "prod"})], runtime={"python": "3.13"}) + value.update(other={"literal": "key"}) assert isinstance(value["service"], ExtendedDict) assert isinstance(value["service"]["name"], ExtendedString) assert isinstance(value["owner"], ExtendedString) assert isinstance(value["ports"], ExtendedList) assert isinstance(value["ports"][1], ExtendedString) + assert isinstance(value["metadata"], ExtendedDict) + assert isinstance(value["metadata"]["tier"], ExtendedString) + assert isinstance(value["runtime"], ExtendedDict) + assert isinstance(value["runtime"]["python"], ExtendedString) + assert isinstance(value["other"], ExtendedDict) + assert isinstance(value["other"]["literal"], ExtendedString) assert value["service"]["name"].upper_first() == "Api" From b77fe64fb7434df9725f3629d16a5d07e8fcb6fd Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:51:39 -0500 Subject: [PATCH 022/287] fix: harden input provider diagnostics --- docs/package-surface.md | 11 +++-- src/extended_data/inputs/__main__.py | 44 ++++++++++++------- tests/inputs/test_main.py | 63 +++++++++++++++++++++++++++- 3 files changed, 97 insertions(+), 21 deletions(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index 408aed1..014ce52 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -62,10 +62,13 @@ to lower extended containers back to standard Python data. `InputProvider` loads input data from explicit mappings, environment variables, and stdin, then decodes or coerces values through the primitive layer. Its `decode_input(..., as_extended=True)` path gives input-driven workflows the same -container bridge as file and Base64 decoding. `Logging` provides structured -lifecycle logging for applications and connector workflows. `ConnectorFabric` -caches and coordinates vendor connectors while sharing input loading, logging, -data normalization, retry behavior, and serialization. +container bridge as file and Base64 decoding. Requested input coercions are +strict, and diagnostics identify the input key and failed operation without +echoing raw values from environment variables, stdin, JSON, YAML, or Base64 +payloads. `Logging` provides structured lifecycle logging for applications and +connector workflows. `ConnectorFabric` caches and coordinates vendor connectors +while sharing input loading, logging, data normalization, retry behavior, and +serialization. ## Connector Fabric diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 0a08372..220d9a9 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -126,7 +126,7 @@ def _load_from_stdin() -> dict[str, Any]: decoded_stdin: dict[str, Any] = json.loads(inputs_from_stdin) return decoded_stdin except json.JSONDecodeError as exc: - message = f"Failed to decode stdin:\n{inputs_from_stdin}" + message = f"Failed to decode stdin as JSON ({len(inputs_from_stdin)} characters)." raise RuntimeError(message) from exc @staticmethod @@ -138,11 +138,20 @@ def _coerce_text(value: Any) -> Any: try: return value.decode("utf-8") except UnicodeDecodeError as exc: - message = f"Failed to decode bytes to string: {value!r}" + message = f"Failed to decode {type(value).__name__} input as UTF-8 text." raise RuntimeError(message) from exc return value + @staticmethod + def _format_available_keys(inputs: Mapping[str, Any]) -> str: + """Format available input keys without exposing their values.""" + if not inputs: + return "none" + + keys = sorted(str(key) for key in inputs) + return ", ".join(keys[:20]) + (f", ... ({len(keys)} total)" if len(keys) > 20 else "") + def get_input( self, k: str, @@ -179,38 +188,43 @@ def get_input( inp = default if is_bool and not isinstance(inp, bool): - inp = strtobool(inp) + try: + inp = strtobool(inp, raise_on_error=True) + except (TypeError, ValueError) as exc: + message = f"Input {k} cannot be converted to boolean." + raise RuntimeError(message) from exc if is_integer and inp is not None and not isinstance(inp, int): try: - inp = strtoint(inp) + inp = strtoint(inp, raise_on_error=True) except (TypeError, ValueError) as exc: - message = f"Input {k} cannot be converted to integer: {inp!r}" + message = f"Input {k} cannot be converted to integer." raise RuntimeError(message) from exc if is_float and inp is not None and not isinstance(inp, float): try: - inp = strtofloat(str(inp)) + inp = strtofloat(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: - message = f"Input {k} cannot be converted to float: {inp!r}" + message = f"Input {k} cannot be converted to float." raise RuntimeError(message) from exc if is_path and inp is not None: try: - inp = strtopath(str(inp)) + inp = strtopath(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: - message = f"Input {k} cannot be converted to Path: {inp!r}" + message = f"Input {k} cannot be converted to Path." raise RuntimeError(message) from exc if is_datetime and inp is not None: try: - inp = strtodatetime(str(inp)) + inp = strtodatetime(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: - message = f"Input {k} cannot be converted to datetime: {inp!r}" + message = f"Input {k} cannot be converted to datetime." raise RuntimeError(message) from exc if is_nothing(inp) and required: - message = f"Required input {k} not passed from inputs:\n{self.inputs}" + available = self._format_available_keys(self.inputs) + message = f"Required input {k} not passed. Available input keys: {available}." raise RuntimeError(message) return inp @@ -260,7 +274,7 @@ def decode_input( encoding="json" if decode_from_json else "yaml", ) except binascii.Error as exc: - message = f"Failed to decode {conf} from base64" + message = f"Failed to decode input {k} from Base64." raise RuntimeError(message) from exc if not isinstance(conf, str): @@ -272,13 +286,13 @@ def decode_input( try: conf = decode_yaml(conf) except YAMLError as exc: - message = f"Failed to decode {conf} from YAML" + message = f"Failed to decode input {k} from YAML." raise RuntimeError(message) from exc elif decode_from_json: try: conf = decode_json(conf) except json.JSONDecodeError as exc: - message = f"Failed to decode {conf} from JSON" + message = f"Failed to decode input {k} from JSON." raise RuntimeError(message) from exc if conf is None and not allow_none: diff --git a/tests/inputs/test_main.py b/tests/inputs/test_main.py index 762e9ce..399df58 100644 --- a/tests/inputs/test_main.py +++ b/tests/inputs/test_main.py @@ -113,10 +113,26 @@ def test_get_input_required(): This test verifies that the InputProvider raises an error if a required input is not provided. """ - dic = InputProvider(inputs={"key1": "value1"}) - with pytest.raises(RuntimeError, match="Required input key2 not passed"): + dic = InputProvider(inputs={"key1": "value1", "API_TOKEN": "super-secret"}, from_environment=False) + with pytest.raises(RuntimeError, match="Required input key2 not passed") as exc_info: dic.get_input("key2", required=True) + message = str(exc_info.value) + assert "key1" in message + assert "API_TOKEN" in message + assert "value1" not in message + assert "super-secret" not in message + + +def test_init_with_invalid_stdin_does_not_echo_payload(monkeypatch): + """Invalid stdin diagnostics do not expose raw stdin content.""" + monkeypatch.setattr("sys.stdin.read", lambda: '{"API_TOKEN": "super-secret"') + + with pytest.raises(RuntimeError, match="Failed to decode stdin as JSON") as exc_info: + InputProvider(from_stdin=True) + + assert "super-secret" not in str(exc_info.value) + def test_get_input_boolean(): """Test retrieving and converting a boolean input. @@ -134,6 +150,16 @@ def test_get_input_boolean_existing_bool(): assert dic.get_input("bool_key", is_bool=True) is False +def test_get_input_boolean_conversion_errors_do_not_echo_values(): + """Boolean conversion diagnostics identify the key without exposing the value.""" + dic = InputProvider(inputs={"bool_key": "super-secret"}) + + with pytest.raises(RuntimeError, match="Input bool_key cannot be converted to boolean") as exc_info: + dic.get_input("bool_key", is_bool=True) + + assert "super-secret" not in str(exc_info.value) + + def test_get_input_integer(): """Test retrieving and converting an integer input. @@ -145,6 +171,16 @@ def test_get_input_integer(): assert dic.get_input("int_key", is_integer=True) == integer_test_value +def test_get_input_conversion_errors_do_not_echo_values(): + """Type conversion diagnostics identify the key without exposing the value.""" + dic = InputProvider(inputs={"int_key": "super-secret"}) + + with pytest.raises(RuntimeError, match="Input int_key cannot be converted to integer") as exc_info: + dic.get_input("int_key", is_integer=True) + + assert "super-secret" not in str(exc_info.value) + + def test_decode_input_json(): """Test decoding an input from JSON format. @@ -198,6 +234,29 @@ def test_decode_input_json_can_return_extended_containers(): assert decoded["name"].upper_first() == "Test" +def test_decode_input_errors_do_not_echo_values(): + """Decode diagnostics identify the input key without exposing raw values.""" + dic = InputProvider( + inputs={ + "json_key": '{"token": "super-secret"', + "yaml_key": "token: [super-secret", + "base64_key": "not valid base64!", + } + ) + + with pytest.raises(RuntimeError, match="Failed to decode input json_key from JSON") as json_exc: + dic.decode_input("json_key", decode_from_json=True) + with pytest.raises(RuntimeError, match="Failed to decode input yaml_key from YAML") as yaml_exc: + dic.decode_input("yaml_key", decode_from_yaml=True) + with pytest.raises(RuntimeError, match="Failed to decode input base64_key from Base64") as base64_exc: + dic.decode_input("base64_key", decode_from_base64=True) + + for exc_info in (json_exc, yaml_exc, base64_exc): + message = str(exc_info.value) + assert "super-secret" not in message + assert "not valid base64" not in message + + def test_decode_input_base64_external_json_can_return_extended_containers(): """Externally produced Base64 JSON should decode once and then be extended.""" encoded_value = base64.b64encode(b'{"name": "test"}').decode("utf-8") From e036bec43c9f9216e962457a65f02e4e683ae5f5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 01:57:53 -0500 Subject: [PATCH 023/287] fix: centralize crewai adapter import guidance --- docs/package-surface.md | 3 ++ src/extended_data/connectors/_optional.py | 16 +++++++ .../connectors/anthropic/tools.py | 8 ++-- src/extended_data/connectors/aws/tools.py | 8 ++-- src/extended_data/connectors/cursor/tools.py | 8 ++-- src/extended_data/connectors/github/tools.py | 8 ++-- src/extended_data/connectors/google/tools.py | 8 ++-- src/extended_data/connectors/meshy/tools.py | 8 ++-- src/extended_data/connectors/secrets/tools.py | 8 ++-- src/extended_data/connectors/slack/tools.py | 8 ++-- src/extended_data/connectors/vault/tools.py | 8 ++-- src/extended_data/connectors/zoom/tools.py | 8 ++-- .../connectors/test_optional_dependencies.py | 46 +++++++++++++++++++ 13 files changed, 95 insertions(+), 50 deletions(-) create mode 100644 tests/connectors/test_optional_dependencies.py diff --git a/docs/package-surface.md b/docs/package-surface.md index 014ce52..6b6c526 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -125,6 +125,9 @@ pip install "extended-data[meshy,mcp]" CrewAI tool adapters are still importable when users install `crewai` directly, but `extended-data` does not expose a CrewAI extra while current CrewAI dependency trees pull vulnerable `chromadb` releases. +All built-in CrewAI tool adapters use +`extended_data.connectors._optional.get_crewai_tool_decorator()` so missing or +incompatible CrewAI installs fail with the same user-managed install guidance. Optional dependency checks live in `extended_data.connectors._optional`; there are no old package compatibility shims in the public API. When a known built-in diff --git a/src/extended_data/connectors/_optional.py b/src/extended_data/connectors/_optional.py index 8104c0c..7da4b40 100644 --- a/src/extended_data/connectors/_optional.py +++ b/src/extended_data/connectors/_optional.py @@ -58,6 +58,8 @@ ), } +CREWAI_TOOLS_IMPORT_ERROR = f"crewai is required for CrewAI tools.\n{PACKAGE_INSTALL_HINTS['crewai']}" + def is_available(package: str) -> bool: """Check if a package is available for import. @@ -117,6 +119,20 @@ def require_extra(package: str, extra: str | None = None) -> Any: ) from e +def get_crewai_tool_decorator() -> Any: + """Import the CrewAI tool decorator with extended-data's install guidance.""" + try: + module = importlib.import_module("crewai.tools") + except ImportError as e: + raise ImportError(CREWAI_TOOLS_IMPORT_ERROR) from e + + try: + return module.tool + except AttributeError as e: + msg = "crewai.tools.tool is required for CrewAI tools, but the installed CrewAI package does not expose it." + raise ImportError(msg) from e + + def require_any(*packages: str, extra: str) -> Any: """Import the first available package from a list. diff --git a/src/extended_data/connectors/anthropic/tools.py b/src/extended_data/connectors/anthropic/tools.py index 1c10df1..1c21099 100644 --- a/src/extended_data/connectors/anthropic/tools.py +++ b/src/extended_data/connectors/anthropic/tools.py @@ -113,11 +113,9 @@ def get_langchain_tools() -> list[Any]: def get_crewai_tools() -> list[Any]: """Get all Anthropic tools as CrewAI tools.""" - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index 234d2dd..d1c809b 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -346,11 +346,9 @@ def get_langchain_tools() -> list[Any]: def get_crewai_tools() -> list[Any]: """Get all AWS tools as CrewAI tools.""" - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools. Install CrewAI separately; extended-data does not install it." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/cursor/tools.py b/src/extended_data/connectors/cursor/tools.py index d29dc0d..4cf43ad 100644 --- a/src/extended_data/connectors/cursor/tools.py +++ b/src/extended_data/connectors/cursor/tools.py @@ -119,11 +119,9 @@ def get_langchain_tools() -> list[Any]: def get_crewai_tools() -> list[Any]: """Get all Cursor tools as CrewAI tools.""" - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/github/tools.py b/src/extended_data/connectors/github/tools.py index 553252e..7dc51e9 100644 --- a/src/extended_data/connectors/github/tools.py +++ b/src/extended_data/connectors/github/tools.py @@ -325,11 +325,9 @@ def get_langchain_tools() -> list[Any]: def get_crewai_tools() -> list[Any]: """Get all GitHub tools as CrewAI tools.""" - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index a7f2ee7..8c2b154 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -377,11 +377,9 @@ def get_crewai_tools() -> list[Any]: Raises: ImportError: If crewai is not installed. """ - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools. Install CrewAI separately; extended-data does not install it." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index 134727c..ac9511b 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -551,11 +551,9 @@ def get_crewai_tools() -> list[Any]: Raises: ImportError: If crewai is not installed. """ - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools. Install CrewAI separately; extended-data does not install it." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index 6822524..ecd699e 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -305,11 +305,9 @@ def get_langchain_tools() -> list[Any]: def get_crewai_tools() -> list[Any]: """Get all secrets sync tools as CrewAI tools.""" - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/slack/tools.py b/src/extended_data/connectors/slack/tools.py index 222c0ad..e42074c 100644 --- a/src/extended_data/connectors/slack/tools.py +++ b/src/extended_data/connectors/slack/tools.py @@ -327,11 +327,9 @@ def get_crewai_tools() -> list[Any]: Raises: ImportError: If crewai is not installed. """ - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools. Install CrewAI separately; extended-data does not install it." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/vault/tools.py b/src/extended_data/connectors/vault/tools.py index d30f3a6..7984d8e 100644 --- a/src/extended_data/connectors/vault/tools.py +++ b/src/extended_data/connectors/vault/tools.py @@ -141,11 +141,9 @@ def get_langchain_tools() -> list[Any]: def get_crewai_tools() -> list[Any]: """Get all Vault tools as CrewAI tools.""" - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/src/extended_data/connectors/zoom/tools.py b/src/extended_data/connectors/zoom/tools.py index d7914c7..04f0f82 100644 --- a/src/extended_data/connectors/zoom/tools.py +++ b/src/extended_data/connectors/zoom/tools.py @@ -177,11 +177,9 @@ def get_langchain_tools() -> list[Any]: def get_crewai_tools() -> list[Any]: """Get all Zoom tools as CrewAI tools.""" - try: - from crewai.tools import tool as crewai_tool - except ImportError as e: - msg = "crewai is required for CrewAI tools." - raise ImportError(msg) from e + from extended_data.connectors._optional import get_crewai_tool_decorator + + crewai_tool = get_crewai_tool_decorator() tools = [] for defn in TOOL_DEFINITIONS: diff --git a/tests/connectors/test_optional_dependencies.py b/tests/connectors/test_optional_dependencies.py new file mode 100644 index 0000000..9437b3a --- /dev/null +++ b/tests/connectors/test_optional_dependencies.py @@ -0,0 +1,46 @@ +"""Tests for connector optional dependency helpers.""" + +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from extended_data.connectors import _optional + + +def test_get_crewai_tool_decorator_explains_user_managed_install(monkeypatch) -> None: + """Missing CrewAI reports the deliberate no-extra install policy.""" + + def fake_import_module(name: str) -> object: + if name == "crewai.tools": + raise ImportError("No module named 'crewai'") + pytest.fail(f"unexpected import: {name}") + + monkeypatch.setattr(_optional.importlib, "import_module", fake_import_module) + + with pytest.raises(ImportError) as exc_info: + _optional.get_crewai_tool_decorator() + + message = str(exc_info.value) + assert "crewai is required for CrewAI tools" in message + assert "extended-data does not publish a CrewAI extra" in message + assert "chromadb" in message + assert "extended-data[crewai]" not in message + + +def test_get_crewai_tool_decorator_returns_tool_decorator(monkeypatch) -> None: + """Installed CrewAI tool support is returned directly.""" + sentinel = object() + + monkeypatch.setattr(_optional.importlib, "import_module", lambda name: SimpleNamespace(tool=sentinel)) + + assert _optional.get_crewai_tool_decorator() is sentinel + + +def test_get_crewai_tool_decorator_rejects_incompatible_crewai(monkeypatch) -> None: + """A CrewAI install without crewai.tools.tool is treated as unsupported.""" + monkeypatch.setattr(_optional.importlib, "import_module", lambda name: SimpleNamespace()) + + with pytest.raises(ImportError, match="does not expose it"): + _optional.get_crewai_tool_decorator() From a464987a1e3563eb01b4afc956df716b41296569 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:04:18 -0500 Subject: [PATCH 024/287] feat: sanitize format decode failures --- docs/package-surface.md | 5 ++ src/extended_data/__init__.py | 2 + src/extended_data/inputs/__main__.py | 8 +-- .../primitives/formats/__init__.py | 2 + .../primitives/formats/errors.py | 72 +++++++++++++++++++ src/extended_data/primitives/formats/hcl.py | 10 ++- src/extended_data/primitives/formats/json.py | 7 +- src/extended_data/primitives/formats/toml.py | 10 +-- .../primitives/formats/yaml/utils.py | 8 ++- src/extended_data/primitives/types.py | 6 +- tests/core/test_hcl2_utils.py | 14 ++-- tests/core/test_json_utils.py | 12 ++++ tests/core/test_package_surface.py | 1 + tests/core/test_toml_utils.py | 25 +++---- tests/core/test_yaml_utils.py | 17 ++++- 15 files changed, 159 insertions(+), 40 deletions(-) create mode 100644 src/extended_data/primitives/formats/errors.py diff --git a/docs/package-surface.md b/docs/package-surface.md index 6b6c526..73c4320 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -6,6 +6,7 @@ namespace. The root package exposes the primitives users need most often: ```python from extended_data import ( ConnectorFabric, + DataDecodeError, ExtendedDict, ExtendedList, ExtendedSet, @@ -31,6 +32,10 @@ from extended_data import ( - Tier 3 processors use the first two tiers to handle files, imports, exports, inputs, API data, vendor integrations, and workflows. +Direct JSON, YAML, TOML, and HCL decode failures raise `DataDecodeError` with +format and position context while preserving the parser exception as the cause; +the public error message does not echo the raw payload. + ```python name = ExtendedString("API Response Value").to_snake_case() payload = ExtendedDict({"outer": {"inner": 1}}).flatten() diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index a587e20..c9ac23c 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -35,6 +35,7 @@ write_file, ) from extended_data.io.importers import unwrap_raw_data_from_import +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.hcl import decode_hcl2, encode_hcl2 from extended_data.primitives.formats.json import decode_json, encode_json from extended_data.primitives.formats.toml import decode_toml, encode_toml @@ -160,6 +161,7 @@ def __getattr__(name: str) -> Any: __all__ = [ "ConnectorFabric", "ConnectorInfo", + "DataDecodeError", "ExitRunError", "ExtendedDict", "ExtendedList", diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 220d9a9..c1a5668 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -18,10 +18,10 @@ from case_insensitive_dict import CaseInsensitiveDict from deepmerge import Merger # type: ignore[attr-defined] -from yaml import YAMLError from extended_data.containers.factory import extend_data from extended_data.io.base64 import base64_decode +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.json import decode_json from extended_data.primitives.formats.yaml import decode_yaml from extended_data.primitives.state import is_nothing @@ -273,7 +273,7 @@ def decode_input( unwrap_raw_data=decode_from_json or decode_from_yaml, encoding="json" if decode_from_json else "yaml", ) - except binascii.Error as exc: + except (binascii.Error, DataDecodeError) as exc: message = f"Failed to decode input {k} from Base64." raise RuntimeError(message) from exc @@ -285,13 +285,13 @@ def decode_input( if decode_from_yaml: try: conf = decode_yaml(conf) - except YAMLError as exc: + except DataDecodeError as exc: message = f"Failed to decode input {k} from YAML." raise RuntimeError(message) from exc elif decode_from_json: try: conf = decode_json(conf) - except json.JSONDecodeError as exc: + except DataDecodeError as exc: message = f"Failed to decode input {k} from JSON." raise RuntimeError(message) from exc diff --git a/src/extended_data/primitives/formats/__init__.py b/src/extended_data/primitives/formats/__init__.py index 2206c55..29a0df8 100644 --- a/src/extended_data/primitives/formats/__init__.py +++ b/src/extended_data/primitives/formats/__init__.py @@ -1,5 +1,6 @@ """Tier 1 serialization codecs.""" +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.hcl import decode_hcl2, encode_hcl2 from extended_data.primitives.formats.json import decode_json, encode_json from extended_data.primitives.formats.toml import decode_toml, encode_toml @@ -22,6 +23,7 @@ __all__ = [ + "DataDecodeError", "LiteralScalarString", "PureDumper", "PureLoader", diff --git a/src/extended_data/primitives/formats/errors.py b/src/extended_data/primitives/formats/errors.py new file mode 100644 index 0000000..4c33cdd --- /dev/null +++ b/src/extended_data/primitives/formats/errors.py @@ -0,0 +1,72 @@ +"""Shared exceptions for Tier 1 format decoders.""" + +from __future__ import annotations + +from typing import Any + + +class DataDecodeError(ValueError): + """Raised when a supported data format cannot be decoded safely.""" + + def __init__( + self, + format_name: str, + *, + reason: str | None = None, + line: int | None = None, + column: int | None = None, + ) -> None: + """Initialize a sanitized decode error.""" + self.format_name = format_name + self.reason = reason + self.line = line + self.column = column + + message = f"Failed to decode {format_name} data" + if reason: + message = f"{message}: {reason}" + if line is not None: + message = f"{message} at line {line}" + if column is not None: + message = f"{message}, column {column}" + super().__init__(f"{message}.") + + @classmethod + def from_exception(cls, format_name: str, exc: BaseException) -> DataDecodeError: + """Build a sanitized decode error from a parser exception.""" + line, column = _get_error_position(exc) + return cls(format_name, reason=_get_error_reason(exc), line=line, column=column) + + +def invalid_utf8_error(format_name: str) -> DataDecodeError: + """Return a decode error for invalid UTF-8 input bytes.""" + return DataDecodeError(format_name, reason="input bytes are not valid UTF-8") + + +def _get_error_reason(exc: BaseException) -> str: + """Extract a parser reason without including source snippets.""" + for attr in ("msg", "problem"): + value = getattr(exc, attr, None) + if isinstance(value, str) and value: + return value.strip().replace("\n", " ") + return type(exc).__name__ + + +def _get_error_position(exc: BaseException) -> tuple[int | None, int | None]: + """Extract one-based line and column data when the parser exposes it.""" + line = _as_int(getattr(exc, "lineno", None) or getattr(exc, "line", None)) + column = _as_int(getattr(exc, "colno", None) or getattr(exc, "col", None) or getattr(exc, "column", None)) + + mark = getattr(exc, "problem_mark", None) + if mark is not None: + line = _as_int(getattr(mark, "line", None), offset=1) + column = _as_int(getattr(mark, "column", None), offset=1) + + return line, column + + +def _as_int(value: Any, *, offset: int = 0) -> int | None: + """Return an integer value plus offset, or None when unavailable.""" + if isinstance(value, int): + return value + offset + return None diff --git a/src/extended_data/primitives/formats/hcl.py b/src/extended_data/primitives/formats/hcl.py index 0167ad1..89b64c8 100644 --- a/src/extended_data/primitives/formats/hcl.py +++ b/src/extended_data/primitives/formats/hcl.py @@ -11,8 +11,9 @@ import hcl2 -from lark.exceptions import ParseError +from lark.exceptions import LarkError +from extended_data.primitives.formats.errors import DataDecodeError, invalid_utf8_error from extended_data.primitives.strings import bytestostr from extended_data.primitives.types import convert_special_types @@ -235,10 +236,13 @@ def decode_hcl2(hcl2_data: str | memoryview | bytes | bytearray) -> Any: try: hcl2_data = bytestostr(hcl2_data) except UnicodeDecodeError as exc: - raise ParseError(f"Failed to decode bytes to string: {hcl2_data!r}") from exc + raise invalid_utf8_error("HCL2") from exc hcl2_data_stream = StringIO(hcl2_data) - return _normalize_hcl_value(hcl2.load(hcl2_data_stream)) + try: + return _normalize_hcl_value(hcl2.load(hcl2_data_stream)) + except LarkError as exc: + raise DataDecodeError.from_exception("HCL2", exc) from exc def encode_hcl2(data: Any) -> str: diff --git a/src/extended_data/primitives/formats/json.py b/src/extended_data/primitives/formats/json.py index 815be5e..fd0fc03 100644 --- a/src/extended_data/primitives/formats/json.py +++ b/src/extended_data/primitives/formats/json.py @@ -10,6 +10,8 @@ import orjson +from extended_data.primitives.formats.errors import DataDecodeError + def decode_json(json_data: str | memoryview | bytes | bytearray) -> Any: """Decodes a JSON string or bytes into a Python object using orjson. @@ -20,7 +22,10 @@ def decode_json(json_data: str | memoryview | bytes | bytearray) -> Any: Returns: Any: The decoded Python object. """ - return orjson.loads(json_data) + try: + return orjson.loads(json_data) + except orjson.JSONDecodeError as exc: + raise DataDecodeError.from_exception("JSON", exc) from exc def encode_json( diff --git a/src/extended_data/primitives/formats/toml.py b/src/extended_data/primitives/formats/toml.py index fbe51d3..5980add 100644 --- a/src/extended_data/primitives/formats/toml.py +++ b/src/extended_data/primitives/formats/toml.py @@ -9,8 +9,7 @@ import tomlkit -from tomlkit.exceptions import TOMLKitError - +from extended_data.primitives.formats.errors import DataDecodeError, invalid_utf8_error from extended_data.primitives.strings import bytestostr from extended_data.primitives.types import convert_special_types @@ -27,8 +26,11 @@ def decode_toml(toml_data: str | memoryview | bytes | bytearray) -> Any: try: toml_data = bytestostr(toml_data) except UnicodeDecodeError as exc: - raise TOMLKitError(f"Failed to decode bytes to string: {toml_data!r}") from exc - return tomlkit.parse(toml_data) + raise invalid_utf8_error("TOML") from exc + try: + return tomlkit.parse(toml_data) + except tomlkit.exceptions.TOMLKitError as exc: + raise DataDecodeError.from_exception("TOML", exc) from exc def encode_toml(raw_data: Any) -> str: diff --git a/src/extended_data/primitives/formats/yaml/utils.py b/src/extended_data/primitives/formats/yaml/utils.py index f7d4dee..2214d60 100644 --- a/src/extended_data/primitives/formats/yaml/utils.py +++ b/src/extended_data/primitives/formats/yaml/utils.py @@ -10,6 +10,7 @@ import yaml +from extended_data.primitives.formats.errors import DataDecodeError, invalid_utf8_error from extended_data.primitives.formats.yaml.dumpers import PureDumper from extended_data.primitives.formats.yaml.loaders import PureLoader from extended_data.primitives.formats.yaml.tag_classes import YamlPairs, YamlTagged @@ -28,8 +29,11 @@ def decode_yaml(yaml_data: str | memoryview | bytes | bytearray) -> Any: try: yaml_data = bytestostr(yaml_data) except UnicodeDecodeError as exc: - raise yaml.YAMLError(f"Failed to decode bytes to string: {yaml_data!r}") from exc - return yaml.load(yaml_data, Loader=PureLoader) # noqa: S506 + raise invalid_utf8_error("YAML") from exc + try: + return yaml.load(yaml_data, Loader=PureLoader) # noqa: S506 + except yaml.YAMLError as exc: + raise DataDecodeError.from_exception("YAML", exc) from exc def encode_yaml(raw_data: Any) -> str: diff --git a/src/extended_data/primitives/types.py b/src/extended_data/primitives/types.py index ab0e246..617197b 100644 --- a/src/extended_data/primitives/types.py +++ b/src/extended_data/primitives/types.py @@ -41,9 +41,7 @@ from pathlib import Path from typing import Any -from orjson import JSONDecodeError -from yaml.error import YAMLError - +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.json import decode_json from extended_data.primitives.formats.yaml import YamlPairs, YamlTagged, decode_yaml from extended_data.primitives.strings import removesuffix @@ -438,7 +436,7 @@ def reconstruct_special_type(converted_obj: str, fail_silently: bool = False) -> if is_potential_json(converted_obj): return decode_json(converted_obj) - except (ValueError, TypeError, YAMLError, JSONDecodeError) as exc: + except (ValueError, TypeError, DataDecodeError) as exc: if not fail_silently: raise ConversionError(type(converted_obj), converted_obj) from exc return converted_obj diff --git a/tests/core/test_hcl2_utils.py b/tests/core/test_hcl2_utils.py index a3a3e63..96a2b70 100644 --- a/tests/core/test_hcl2_utils.py +++ b/tests/core/test_hcl2_utils.py @@ -4,9 +4,8 @@ import pytest -from lark.exceptions import ParseError, UnexpectedToken - from extended_data.primitives.formats import hcl as hcl2_utils +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.hcl import decode_hcl2, encode_hcl2 @@ -56,13 +55,18 @@ def test_decode_hcl2_empty() -> None: def test_decode_hcl2_invalid() -> None: """Reject invalid HCL input.""" - with pytest.raises(UnexpectedToken): - decode_hcl2("invalid hcl2 data") + with pytest.raises(DataDecodeError) as exc_info: + decode_hcl2('locals { token = "super-secret" ') + + message = str(exc_info.value) + assert "Failed to decode HCL2 data" in message + assert "line 1" in message + assert "super-secret" not in message def test_decode_hcl2_invalid_bytes() -> None: """Reject byte input that cannot be decoded as UTF-8.""" - with pytest.raises(ParseError, match="Failed to decode bytes to string"): + with pytest.raises(DataDecodeError, match="input bytes are not valid UTF-8"): decode_hcl2(b"\x80") diff --git a/tests/core/test_json_utils.py b/tests/core/test_json_utils.py index ee785a2..fd12f36 100644 --- a/tests/core/test_json_utils.py +++ b/tests/core/test_json_utils.py @@ -15,6 +15,7 @@ import pytest +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.json import decode_json, encode_json @@ -67,6 +68,17 @@ def test_decode_json(simple_json: str, simple_dict: dict) -> None: assert result == simple_dict +def test_decode_json_invalid_input_raises_sanitized_decode_error() -> None: + """Invalid JSON raises a package-owned decode error without echoing values.""" + with pytest.raises(DataDecodeError) as exc_info: + decode_json('{"token": "super-secret"') + + message = str(exc_info.value) + assert "Failed to decode JSON data" in message + assert "line 1" in message + assert "super-secret" not in message + + def test_encode_json(simple_dict: dict, simple_json: str) -> None: """Tests encoding of a dictionary to JSON format. diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index ce18619..3d4f897 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -33,6 +33,7 @@ def test_clean_major_version_public_names() -> None: def test_root_exports_first_class_integrated_primitives() -> None: """Inputs, logging, and connector fabric are available from the root package.""" + assert extended_data.DataDecodeError.__name__ == "DataDecodeError" assert extended_data.InputProvider is InputProvider assert extended_data.Logging is Logging assert extended_data.ConnectorFabric is ConnectorFabric diff --git a/tests/core/test_toml_utils.py b/tests/core/test_toml_utils.py index 37a1b6f..cb71185 100644 --- a/tests/core/test_toml_utils.py +++ b/tests/core/test_toml_utils.py @@ -17,24 +17,21 @@ import pytest import tomlkit +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.toml import decode_toml, encode_toml def test_decode_toml_invalid_format() -> None: - """Tests the `decode_toml` function with an invalid TOML format. - - This test checks whether the `decode_toml` function raises a `ParseError` - when provided with a malformed TOML string, specifically one that contains - an unclosed quote. - - Asserts: - The function raises `tomlkit.exceptions.ParseError` when decoding - the invalid TOML string. - """ - invalid_toml = "title = 'Unclosed quote" - with pytest.raises(tomlkit.exceptions.ParseError): + """Reject malformed TOML through a sanitized package error.""" + invalid_toml = "token = 'super-secret" + with pytest.raises(DataDecodeError) as exc_info: decode_toml(invalid_toml) + message = str(exc_info.value) + assert "Failed to decode TOML data" in message + assert "line 1" in message + assert "super-secret" not in message + def test_decode_toml_bytes_success() -> None: """Decode TOML from bytes.""" @@ -43,8 +40,8 @@ def test_decode_toml_bytes_success() -> None: def test_decode_toml_invalid_bytes() -> None: - """Raise a TOMLKitError when bytes cannot be decoded.""" - with pytest.raises(tomlkit.exceptions.TOMLKitError, match="Failed to decode bytes to string"): + """Raise a sanitized decode error when bytes cannot be decoded.""" + with pytest.raises(DataDecodeError, match="input bytes are not valid UTF-8"): decode_toml(b"\x80") diff --git a/tests/core/test_yaml_utils.py b/tests/core/test_yaml_utils.py index 9663988..6e2bffc 100644 --- a/tests/core/test_yaml_utils.py +++ b/tests/core/test_yaml_utils.py @@ -19,10 +19,10 @@ from types import SimpleNamespace import pytest -import yaml from yaml import MappingNode, ScalarNode, SequenceNode +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.yaml import ( LiteralScalarString, YamlPairs, @@ -155,11 +155,22 @@ def test_decode_yaml_bytes_success(simple_yaml_fixture: str) -> None: def test_decode_yaml_invalid_bytes() -> None: - """Raise a YAMLError when bytes cannot be decoded.""" - with pytest.raises(yaml.YAMLError, match="Failed to decode bytes to string"): + """Raise a sanitized decode error when bytes cannot be decoded.""" + with pytest.raises(DataDecodeError, match="input bytes are not valid UTF-8"): decode_yaml(b"\x80") +def test_decode_yaml_invalid_input_does_not_echo_payload() -> None: + """Invalid YAML messages do not include source snippets.""" + with pytest.raises(DataDecodeError) as exc_info: + decode_yaml("token: [super-secret") + + message = str(exc_info.value) + assert "Failed to decode YAML data" in message + assert "line 1" in message + assert "super-secret" not in message + + @pytest.mark.parametrize( ("node", "loader_method", "constructed_value"), [ From 4bbeb90a79c25febb7a429d7480389533cfcdbe7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:08:54 -0500 Subject: [PATCH 025/287] feat: honor google project activity threshold --- .../connectors/google/services.py | 99 ++++++++++++++++--- tests/connectors/test_google_activity.py | 71 +++++++++++++ 2 files changed, 158 insertions(+), 12 deletions(-) create mode 100644 tests/connectors/test_google_activity.py diff --git a/src/extended_data/connectors/google/services.py b/src/extended_data/connectors/google/services.py index 23a92a6..761534b 100644 --- a/src/extended_data/connectors/google/services.py +++ b/src/extended_data/connectors/google/services.py @@ -6,11 +6,75 @@ from __future__ import annotations +import datetime as dt + from typing import TYPE_CHECKING, Any from extended_data import unhump_map +_PROJECT_ACTIVITY_TIME_FIELDS = ( + "lastActivityTime", + "lastActiveTime", + "last_activity_time", + "last_active_time", + "updateTime", + "createTime", +) + + +def _has_http_status(exc: BaseException, status: int) -> bool: + """Return whether an exception exposes a Google-style HTTP response status.""" + return getattr(getattr(exc, "resp", None), "status", None) == status + + +def _parse_project_activity_time(value: Any) -> dt.datetime | None: + """Parse a Google-style timestamp into an aware UTC datetime.""" + if not isinstance(value, str) or not value.strip(): + return None + + normalized = value.strip() + if normalized.endswith("Z"): + normalized = f"{normalized[:-1]}+00:00" + + try: + parsed = dt.datetime.fromisoformat(normalized) + except ValueError: + return None + + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=dt.timezone.utc) + return parsed.astimezone(dt.timezone.utc) + + +def _latest_project_activity_time(project_data: dict[str, Any]) -> dt.datetime | None: + """Return the latest activity timestamp available on project metadata.""" + timestamps = [ + parsed + for field in _PROJECT_ACTIVITY_TIME_FIELDS + if (parsed := _parse_project_activity_time(project_data.get(field))) is not None + ] + return max(timestamps) if timestamps else None + + +def _project_activity_is_stale( + project_data: dict[str, Any], + *, + days_since_activity: int, + now: dt.datetime | None = None, +) -> bool: + """Return whether project metadata indicates activity older than the threshold.""" + activity_time = _latest_project_activity_time(project_data) + if activity_time is None: + return True + + reference_time = now or dt.datetime.now(dt.timezone.utc) + if reference_time.tzinfo is None: + reference_time = reference_time.replace(tzinfo=dt.timezone.utc) + cutoff = reference_time.astimezone(dt.timezone.utc) - dt.timedelta(days=days_since_activity) + return activity_time <= cutoff + + class GoogleServicesMixin: """Mixin providing Google Cloud services discovery operations. @@ -621,8 +685,6 @@ def is_project_empty( """ self.logger.info(f"Checking if project {project_id} is empty") - from googleapiclient.errors import HttpError - try: if check_compute: instances = self.list_compute_instances(project_id) @@ -654,9 +716,9 @@ def is_project_empty( self.logger.info(f"Project {project_id} has {len(topics)} Pub/Sub topics") return False - except HttpError as e: + except Exception as e: # API might not be enabled, treat as empty for that service - if e.resp.status == 403: + if _has_http_status(e, 403): self.logger.debug(f"API access denied, skipping check: {e}") else: raise @@ -742,21 +804,26 @@ def find_inactive_projects( """Find projects that appear to be inactive or dead. A project is considered inactive if: - - It has no resources (compute, GKE, storage, etc.) - Its lifecycle state is not ACTIVE + - It has no resources and no recent activity timestamp Args: projects: Pre-fetched projects dict. Fetched if not provided. check_resources: Check if projects have resources. Defaults to True. - days_since_activity: Days threshold for activity (not implemented yet). + days_since_activity: Days threshold for available project activity + timestamps. Empty projects with recent timestamps are not marked + inactive. Empty projects without activity timestamps are treated + as inactive. Returns: List of inactive project dictionaries. """ - from googleapiclient.errors import HttpError - self.logger.info("Finding inactive projects") + if days_since_activity < 0: + msg = "days_since_activity must be greater than or equal to 0." + raise ValueError(msg) + if projects is None: # Get projects from cloud module - requires GoogleCloudMixin if hasattr(self, "list_projects"): @@ -780,11 +847,19 @@ def find_inactive_projects( if check_resources: try: is_empty = self.is_project_empty(project_id) - if is_empty: - project_data["inactive_reason"] = "no_resources" + if is_empty and _project_activity_is_stale( + project_data, + days_since_activity=days_since_activity, + ): + activity_time = _latest_project_activity_time(project_data) + project_data["inactive_reason"] = ( + f"no_resources_since={activity_time.date().isoformat()}" + if activity_time is not None + else "no_resources" + ) inactive.append(project_data) - except HttpError as e: - if e.resp.status == 403: + except Exception as e: + if _has_http_status(e, 403): # Can't check, skip self.logger.debug(f"Cannot check resources for {project_id}: {e}") else: diff --git a/tests/connectors/test_google_activity.py b/tests/connectors/test_google_activity.py new file mode 100644 index 0000000..99aa92f --- /dev/null +++ b/tests/connectors/test_google_activity.py @@ -0,0 +1,71 @@ +"""Tests for Google project activity helpers without Google SDK imports.""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from extended_data.connectors.google.services import GoogleServicesMixin + + +class DummyGoogleServices(GoogleServicesMixin): + """Small concrete test double for GoogleServicesMixin.""" + + def __init__(self, empty_projects: set[str]) -> None: + self.empty_projects = empty_projects + self.logger = MagicMock() + + def is_project_empty(self, project_id: str) -> bool: + return project_id in self.empty_projects + + +def test_find_inactive_projects_uses_activity_threshold_for_empty_projects() -> None: + """Recently active empty projects are not reported as inactive.""" + connector = DummyGoogleServices({"old", "recent", "unknown"}) + projects: dict[str, dict[str, Any]] = { + "old": { + "projectId": "old", + "lifecycleState": "ACTIVE", + "updateTime": "2000-01-01T00:00:00Z", + }, + "recent": { + "projectId": "recent", + "lifecycleState": "ACTIVE", + "updateTime": "2999-01-01T00:00:00Z", + }, + "unknown": { + "projectId": "unknown", + "lifecycleState": "ACTIVE", + }, + } + + inactive = connector.find_inactive_projects(projects, days_since_activity=90) + + assert {project["projectId"] for project in inactive} == {"old", "unknown"} + assert projects["old"]["inactive_reason"] == "no_resources_since=2000-01-01" + assert projects["unknown"]["inactive_reason"] == "no_resources" + assert "inactive_reason" not in projects["recent"] + + +def test_find_inactive_projects_keeps_non_empty_active_projects() -> None: + """Active projects with resources are not inactive solely because timestamps are old.""" + connector = DummyGoogleServices(set()) + projects = { + "active": { + "projectId": "active", + "lifecycleState": "ACTIVE", + "updateTime": "2000-01-01T00:00:00Z", + } + } + + assert connector.find_inactive_projects(projects, days_since_activity=90) == [] + + +def test_find_inactive_projects_rejects_negative_activity_threshold() -> None: + """Negative activity thresholds fail instead of silently widening the query.""" + connector = DummyGoogleServices(set()) + + with pytest.raises(ValueError, match="days_since_activity"): + connector.find_inactive_projects({}, days_since_activity=-1) From 578494b4dbab8eb3beef2a951b3ee14bbbdb50fc Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:11:56 -0500 Subject: [PATCH 026/287] fix: avoid github token in clone url --- src/extended_data/io/files.py | 20 ++++++++++++++++++-- tests/core/test_file_data_type.py | 11 +++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/extended_data/io/files.py b/src/extended_data/io/files.py index 2b1311b..188f3e4 100644 --- a/src/extended_data/io/files.py +++ b/src/extended_data/io/files.py @@ -6,6 +6,7 @@ import tempfile import urllib.request +from base64 import b64encode from collections.abc import Mapping from pathlib import Path from typing import Any, TypeAlias @@ -21,6 +22,21 @@ """Type alias for file paths that can be represented as strings or os.PathLike objects.""" +def _github_auth_header_env(github_token: str) -> dict[str, str]: + """Return Git environment config for GitHub token auth without URL credentials.""" + env = os.environ.copy() + try: + config_count = int(env.get("GIT_CONFIG_COUNT", "0")) + except ValueError: + config_count = 0 + + encoded = b64encode(f"x-access-token:{github_token}".encode()).decode("ascii") + env[f"GIT_CONFIG_KEY_{config_count}"] = "http.https://github.com/.extraheader" + env[f"GIT_CONFIG_VALUE_{config_count}"] = f"Authorization: Basic {encoded}" + env["GIT_CONFIG_COUNT"] = str(config_count + 1) + return env + + def get_parent_repository(file_path: FilePath | None = None, search_parent_directories: bool = True) -> Repo | None: """Retrieves the Git repository object for a given path. @@ -74,11 +90,11 @@ def clone_repository_to_temp( Raises: EnvironmentError: If errors occur while trying to clone a Git repository. """ - repo_url = f"https://{github_token}:x-oauth-basic@github.com/{repo_owner}/{repo_name}.git" + repo_url = f"https://github.com/{repo_owner}/{repo_name}.git" try: temp_dir = Path(tempfile.mkdtemp()) - repo = Repo.clone_from(repo_url, str(temp_dir), branch=branch or None) + repo = Repo.clone_from(repo_url, str(temp_dir), branch=branch or None, env=_github_auth_header_env(github_token)) return temp_dir, repo except GitCommandError as e: error_message = "Git command error occurred" diff --git a/tests/core/test_file_data_type.py b/tests/core/test_file_data_type.py index 5648648..3232a2b 100644 --- a/tests/core/test_file_data_type.py +++ b/tests/core/test_file_data_type.py @@ -17,6 +17,7 @@ from __future__ import annotations +from base64 import b64encode from pathlib import Path from typing import Any @@ -126,6 +127,7 @@ def test_clone_repository_to_temp(mocker, valid_repo_data: dict) -> None: """ # Mock the Repo.clone_from method to return a mock Repo instance mock_clone_from = mocker.patch("extended_data.io.files.Repo.clone_from") + mocker.patch.dict("extended_data.io.files.os.environ", {}, clear=True) mock_repo_instance = mocker.Mock(spec=Repo) mock_clone_from.return_value = mock_repo_instance @@ -135,6 +137,15 @@ def test_clone_repository_to_temp(mocker, valid_repo_data: dict) -> None: # Assert that temp_dir is a Path instance and repo is the mocked Repo instance assert isinstance(temp_dir, Path) assert repo is mock_repo_instance + clone_url = mock_clone_from.call_args.args[0] + clone_env = mock_clone_from.call_args.kwargs["env"] + expected_header = b64encode(b"x-access-token:token123").decode("ascii") + + assert clone_url == "https://github.com/owner/repo.git" + assert "token123" not in clone_url + assert clone_env["GIT_CONFIG_KEY_0"] == "http.https://github.com/.extraheader" + assert clone_env["GIT_CONFIG_VALUE_0"] == f"Authorization: Basic {expected_header}" + assert clone_env["GIT_CONFIG_COUNT"] == "1" # Test cloning with errors mock_clone_from.side_effect = GitCommandError("Error", "git") From 8110a1652d9108c070131edcf8cd8d40bf4081d2 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:22:21 -0500 Subject: [PATCH 027/287] test: run safe examples --- tests/examples/test_safe_examples.py | 47 ++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 tests/examples/test_safe_examples.py diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py new file mode 100644 index 0000000..2ef54ef --- /dev/null +++ b/tests/examples/test_safe_examples.py @@ -0,0 +1,47 @@ +"""Smoke tests for examples that do not require live vendor credentials.""" + +from __future__ import annotations + +import os +import subprocess +import sys + +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parents[2] +SAFE_EXAMPLES = [ + "examples/core/basic_usage.py", + "examples/core/composed_workflows.py", + "examples/core/file_operations.py", + "examples/core/serialization.py", + "examples/core/string_transformations.py", + "examples/inputs/basic_usage.py", + "examples/inputs/decorator_api.py", + "examples/inputs/encoding_decoding.py", + "examples/logging/basic_logging.py", + "examples/logging/exit_run_formatting.py", + "examples/logging/markers_and_storage.py", + "examples/logging/verbosity_control.py", +] + + +@pytest.mark.parametrize("example_path", SAFE_EXAMPLES) +def test_safe_example_runs(example_path: str, tmp_path: Path) -> None: + """Keep runnable examples aligned with the installed package surface.""" + env = os.environ.copy() + env.pop("OVERRIDE_STDIN", None) + + result = subprocess.run( + [sys.executable, str(REPO_ROOT / example_path)], + cwd=tmp_path, + env=env, + capture_output=True, + text=True, + timeout=15, + check=False, + ) + + assert result.returncode == 0, f"{example_path} failed\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" From acf2d22910534dfc0c6d8edabe2f9d14f07da8d3 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:31:39 -0500 Subject: [PATCH 028/287] feat: add first-class data workflows --- README.md | 7 +- docs/package-surface.md | 23 +++ examples/core/composed_workflows.py | 16 +- src/extended_data/__init__.py | 4 + src/extended_data/workflows/__init__.py | 216 +++++++++++++++++++++++- tests/core/test_package_surface.py | 3 + tests/core/test_workflows.py | 111 ++++++++++++ 7 files changed, 369 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 25c5c5f..a764d2d 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ CrewAI releases pull vulnerable `chromadb` versions transitively. ## Usage ```python -from extended_data import ConnectorFabric, ExtendedDict, InputProvider, Logging, decode_file, decode_json, encode_yaml +from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file, decode_json, encode_yaml logger = Logging(logger_name="example") inputs = InputProvider(inputs={"GITHUB_OWNER": "jbcom"}, from_environment=False) @@ -38,9 +38,11 @@ connectors = ConnectorFabric(inputs=inputs.inputs, logger=logger) data = decode_json('{"status": "ok"}') payload = ExtendedDict(data).deep_merge({"source": "example"}) decoded_file = decode_file('{"service": {"name": "api"}}', suffix="json", as_extended=True) +workflow = DataWorkflow.from_value(payload).then(("normalize", lambda data: data.unhump())).result() print(encode_yaml(payload.data)) print(decoded_file["service"]["name"].upper_first()) +print(workflow.as_builtin()) ``` The fabric can also instantiate any registered connector by name: @@ -101,6 +103,9 @@ The package is intentionally tiered: Tier 3 decoders can opt into Tier 2 containers with `as_extended=True`, so decoded files, Base64 payloads, and directed inputs can immediately use `ExtendedDict`, `ExtendedList`, `ExtendedSet`, and `ExtendedString` methods. +`DataWorkflow` makes those compositions first-class: read or decode data, +apply named transformations, write an output artifact, and keep the step trail +in a `WorkflowResult`. Missing workflow inputs and empty writes fail loudly. More detail lives in [`docs/package-surface.md`](docs/package-surface.md). diff --git a/docs/package-surface.md b/docs/package-surface.md index 73c4320..f1cab32 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -7,6 +7,7 @@ namespace. The root package exposes the primitives users need most often: from extended_data import ( ConnectorFabric, DataDecodeError, + DataWorkflow, ExtendedDict, ExtendedList, ExtendedSet, @@ -64,6 +65,28 @@ assert payload["service"]["name"].upper_first() == "Api" Use `extend_data(value)` to promote existing plain data and `to_builtin(value)` to lower extended containers back to standard Python data. +`DataWorkflow` is the Tier 3 composition surface for higher-order data +processing. It reads or decodes structured data through the file and format +processors, promotes values into Tier 2 containers by default, applies named +transformation steps, writes output artifacts, and returns a `WorkflowResult` +with the completed value, output path, and step trail. + +```python +from extended_data import DataWorkflow + +env_data = DataWorkflow.from_file("config/dev.yaml").value +result = ( + DataWorkflow.from_file("config/base.yaml") + .then(("merge-env", lambda data: data.deep_merge(env_data))) + .write("build/config.yaml") +) + +assert result.steps == ("read:config/base.yaml", "merge-env", "write:build/config.yaml") +``` + +Missing workflow input files raise `FileNotFoundError`, and empty workflow +writes raise `ValueError` unless `allow_empty=True` is passed. + `InputProvider` loads input data from explicit mappings, environment variables, and stdin, then decodes or coerces values through the primitive layer. Its `decode_input(..., as_extended=True)` path gives input-driven workflows the same diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index 1dfdc9c..78e25c4 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -11,6 +11,7 @@ from tempfile import TemporaryDirectory from extended_data import ( + DataWorkflow, ExtendedDict, base64_decode, base64_encode, @@ -44,17 +45,16 @@ def demonstrate_layered_config_workflow() -> None: write_file("config/base.yaml", base_config, tld=tld) write_file("config/dev.yaml", env_config, tld=tld) - base_text = read_file("config/base.yaml", tld=tld) - env_text = read_file("config/dev.yaml", tld=tld) - - base_data = decode_file(base_text, file_path="config/base.yaml", as_extended=True) - env_data = decode_file(env_text, file_path="config/dev.yaml", as_extended=True) - merged = base_data.deep_merge(env_data) - - write_file("build/config.yaml", merged, tld=tld) + env_data = DataWorkflow.from_file("config/dev.yaml", tld=tld).value + result = ( + DataWorkflow.from_file("config/base.yaml", tld=tld) + .then(("merge-env", lambda data: data.deep_merge(env_data))) + .write("build/config.yaml", tld=tld) + ) merged_text = read_file("build/config.yaml", tld=tld) print(merged_text) + print(f"Steps: {', '.join(result.steps)}") def demonstrate_terraform_handoff_workflow() -> None: diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index c9ac23c..feddb36 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -112,6 +112,7 @@ strtotime, typeof, ) +from extended_data.workflows import DataWorkflow, WorkflowResult, WorkflowStep if TYPE_CHECKING: @@ -162,6 +163,7 @@ def __getattr__(name: str) -> Any: "ConnectorFabric", "ConnectorInfo", "DataDecodeError", + "DataWorkflow", "ExitRunError", "ExtendedDict", "ExtendedList", @@ -173,6 +175,8 @@ def __getattr__(name: str) -> Any: "Logging", "SortedDefaultDict", "VendorConnectorBase", + "WorkflowResult", + "WorkflowStep", "__version__", "all_non_empty", "all_non_empty_in_dict", diff --git a/src/extended_data/workflows/__init__.py b/src/extended_data/workflows/__init__.py index 63a5e94..3906f6b 100644 --- a/src/extended_data/workflows/__init__.py +++ b/src/extended_data/workflows/__init__.py @@ -1,3 +1,215 @@ -"""Workflow composition helpers for Extended Data.""" +"""Tier 3 workflow composition over Extended Data primitives and containers.""" -__all__: list[str] = [] +from __future__ import annotations + +from collections.abc import Callable, Iterable +from dataclasses import dataclass +from pathlib import Path +from typing import Any, TypeAlias, cast + +from extended_data.containers import extend_data, to_builtin +from extended_data.io.files import FilePath, decode_file, read_file, write_file + + +WorkflowAction: TypeAlias = Callable[[Any], Any] +StepLike: TypeAlias = "WorkflowStep | tuple[str, WorkflowAction] | WorkflowAction" + + +@dataclass(frozen=True, slots=True) +class WorkflowStep: + """A named transformation in a data workflow.""" + + name: str + action: WorkflowAction + + def __call__(self, value: Any) -> Any: + """Apply the step to a workflow value.""" + return self.action(value) + + +@dataclass(frozen=True, slots=True) +class WorkflowResult: + """The completed value and audit trail for a data workflow.""" + + value: Any + steps: tuple[str, ...] = () + output_path: Path | None = None + + def as_builtin(self) -> Any: + """Return the workflow value lowered to built-in Python containers.""" + return to_builtin(self.value) + + def as_extended(self) -> Any: + """Return the workflow value promoted to Extended Data containers.""" + return extend_data(self.value) + + +class DataWorkflow: + """Compose file decoding, transformations, and exports as a Tier 3 primitive.""" + + def __init__( + self, + value: Any, + *, + steps: Iterable[str] = (), + as_extended: bool = True, + ) -> None: + """Create a workflow from an existing value.""" + self._value = extend_data(value) if as_extended else value + self._steps = tuple(steps) + self._as_extended = as_extended + + @property + def value(self) -> Any: + """Return the current workflow value.""" + return self._value + + @property + def steps(self) -> tuple[str, ...]: + """Return the names of executed workflow steps.""" + return self._steps + + @classmethod + def from_value(cls, value: Any, *, as_extended: bool = True) -> DataWorkflow: + """Start a workflow from an in-memory value.""" + return cls(value, steps=("value",), as_extended=as_extended) + + @classmethod + def decode( + cls, + file_data: str | memoryview | bytes | bytearray, + *, + file_path: FilePath | None = None, + suffix: str | None = None, + as_extended: bool = True, + ) -> DataWorkflow: + """Start a workflow by decoding structured text or bytes.""" + decoded = decode_file(file_data, file_path=file_path, suffix=suffix, as_extended=as_extended) + return cls(decoded, steps=(_decode_step_name(file_path=file_path, suffix=suffix),), as_extended=as_extended) + + @classmethod + def from_file( + cls, + file_path: FilePath, + *, + suffix: str | None = None, + as_extended: bool = True, + charset: str = "utf-8", + errors: str = "strict", + tld: Path | None = None, + ) -> DataWorkflow: + """Read and decode a local file or URL into a workflow.""" + file_data = read_file(file_path, charset=charset, errors=errors, tld=tld) + if file_data is None: + raise FileNotFoundError(str(file_path)) + + decoded = decode_file( + cast(str | memoryview | bytes | bytearray, file_data), + file_path=file_path, + suffix=suffix, + as_extended=as_extended, + ) + return cls(decoded, steps=(f"read:{file_path}",), as_extended=as_extended) + + def then( + self, + step: StepLike, + *, + name: str | None = None, + as_extended: bool | None = None, + ) -> DataWorkflow: + """Apply one transformation and return the next workflow state.""" + workflow_step = _coerce_step(step, name=name) + next_value = workflow_step(self._value) + should_extend = self._as_extended if as_extended is None else as_extended + if should_extend: + next_value = extend_data(next_value) + return DataWorkflow( + next_value, + steps=(*self._steps, workflow_step.name), + as_extended=should_extend, + ) + + def run(self, *steps: StepLike, as_extended: bool | None = None) -> DataWorkflow: + """Apply multiple transformations in order.""" + workflow = self + for step in steps: + workflow = workflow.then(step, as_extended=as_extended) + return workflow + + def as_builtin(self) -> DataWorkflow: + """Return the next workflow state with built-in Python containers.""" + return DataWorkflow(to_builtin(self._value), steps=(*self._steps, "to_builtin"), as_extended=False) + + def as_extended(self) -> DataWorkflow: + """Return the next workflow state with Extended Data containers.""" + return DataWorkflow(extend_data(self._value), steps=(*self._steps, "as_extended"), as_extended=True) + + def result(self) -> WorkflowResult: + """Return a completed workflow result without writing an output artifact.""" + return WorkflowResult(value=self._value, steps=self._steps) + + def write( + self, + file_path: FilePath, + *, + encoding: str | None = None, + charset: str = "utf-8", + allow_empty: bool = False, + tld: Path | None = None, + as_builtin: bool = True, + ) -> WorkflowResult: + """Write the current workflow value and return the completed result.""" + output_value = to_builtin(self._value) if as_builtin else self._value + output_path = write_file( + file_path, + output_value, + encoding=encoding, + charset=charset, + allow_empty=allow_empty, + tld=tld, + ) + if output_path is None: + raise ValueError("Workflow output was empty; pass allow_empty=True to write it") + + return WorkflowResult( + value=self._value, + steps=(*self._steps, f"write:{file_path}"), + output_path=output_path, + ) + + +def _coerce_step(step: StepLike, *, name: str | None = None) -> WorkflowStep: + """Normalize supported step declarations to WorkflowStep.""" + if isinstance(step, WorkflowStep): + if name is None: + return step + return WorkflowStep(name=name, action=step.action) + + if isinstance(step, tuple): + step_name, action = step + return WorkflowStep(name=name or step_name, action=action) + + inferred_name = name + if inferred_name is None: + raw_name = getattr(step, "__name__", None) + inferred_name = raw_name if isinstance(raw_name, str) else step.__class__.__name__ + return WorkflowStep(name=inferred_name, action=step) + + +def _decode_step_name(*, file_path: FilePath | None, suffix: str | None) -> str: + """Return a stable step name for decode-only workflows.""" + if file_path is not None: + return f"decode:{file_path}" + if suffix is not None: + return f"decode:{suffix}" + return "decode" + + +__all__ = [ + "DataWorkflow", + "StepLike", + "WorkflowAction", + "WorkflowResult", + "WorkflowStep", +] diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 3d4f897..702719a 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -34,10 +34,13 @@ def test_clean_major_version_public_names() -> None: def test_root_exports_first_class_integrated_primitives() -> None: """Inputs, logging, and connector fabric are available from the root package.""" assert extended_data.DataDecodeError.__name__ == "DataDecodeError" + assert extended_data.DataWorkflow.__name__ == "DataWorkflow" assert extended_data.InputProvider is InputProvider assert extended_data.Logging is Logging assert extended_data.ConnectorFabric is ConnectorFabric assert extended_data.ConnectorInfo.__name__ == "ConnectorInfo" + assert extended_data.WorkflowResult.__name__ == "WorkflowResult" + assert extended_data.WorkflowStep.__name__ == "WorkflowStep" assert callable(extended_data.directed_inputs) assert callable(extended_data.get_connector) assert callable(extended_data.list_connector_info) diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index aa536ad..3b71754 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -4,8 +4,13 @@ from pathlib import Path +import pytest + from extended_data import ( + DataWorkflow, ExtendedDict, + WorkflowResult, + WorkflowStep, base64_decode, base64_encode, decode_file, @@ -18,6 +23,112 @@ from extended_data.primitives.formats.yaml import YamlTagged +def test_data_workflow_layered_config_round_trip(tmp_path: Path) -> None: + """DataWorkflow composes Tier 3 file IO with Tier 2 container transforms.""" + base_config = { + "service": {"name": "api", "debug": False}, + "ports": [8080], + "features": {"auth": True}, + } + env_config = { + "service": {"debug": True}, + "ports": [8081], + "features": {"metrics": True}, + } + + write_file("config/base.yaml", base_config, tld=tmp_path) + write_file("config/dev.yaml", env_config, tld=tmp_path) + + env_data = DataWorkflow.from_file("config/dev.yaml", tld=tmp_path).value + result = ( + DataWorkflow.from_file("config/base.yaml", tld=tmp_path) + .then(("merge-env", lambda data: data.deep_merge(env_data))) + .write("build/config.yaml", tld=tmp_path) + ) + + assert isinstance(result, WorkflowResult) + assert result.output_path == tmp_path / "build" / "config.yaml" + assert result.steps == ("read:config/base.yaml", "merge-env", "write:build/config.yaml") + assert result.as_builtin() == { + "service": {"name": "api", "debug": True}, + "ports": [8080, 8081], + "features": {"auth": True, "metrics": True}, + } + assert decode_file(read_file(result.output_path), file_path=result.output_path) == result.as_builtin() + + +def test_data_workflow_runs_named_value_transforms() -> None: + """DataWorkflow can normalize in-memory API payloads through named steps.""" + raw_payload = { + "HTTPResponseCode": 200, + "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), + "Tags": ["api", "api", "docs"], + } + + workflow = DataWorkflow.from_value(raw_payload).run( + ("deduplicate", lambda data: data.deduplicate()), + ("unhump", lambda data: data.unhump()), + ) + result = workflow.result() + + assert workflow.steps == ("value", "deduplicate", "unhump") + assert isinstance(workflow.value, ExtendedDict) + assert result.as_builtin() == { + "http_response_code": 200, + "selected_services": ["api", "worker"], + "tags": ["api", "docs"], + } + + +def test_data_workflow_preserves_extended_policy_after_file_decode(tmp_path: Path) -> None: + """Decoded workflows keep promoting plain transform outputs by default.""" + write_file("config/service.json", {"service": {"name": "api"}}, tld=tmp_path) + + result = ( + DataWorkflow.from_file("config/service.json", tld=tmp_path) + .then(("project", lambda _data: {"name": "api"})) + .result() + ) + + assert isinstance(result.value, ExtendedDict) + assert result.value["name"].upper_first() == "Api" + + +def test_workflow_step_can_be_reused() -> None: + """WorkflowStep gives reusable transforms first-class names.""" + select_service_name = WorkflowStep("select-service-name", lambda data: data["service"]["name"].upper_first()) + + result = DataWorkflow.decode('{"service": {"name": "api"}}', suffix="json").then(select_service_name).result() + + assert result.steps == ("decode:json", "select-service-name") + assert result.value == "Api" + + +def test_data_workflow_can_lower_and_promote_values() -> None: + """Workflow states can move between Tier 2 containers and built-ins explicitly.""" + workflow = DataWorkflow.from_value({"service": {"name": "api"}}) + builtin = workflow.as_builtin() + extended = builtin.as_extended() + + assert isinstance(workflow.value, ExtendedDict) + assert isinstance(builtin.value, dict) + assert not isinstance(builtin.value, ExtendedDict) + assert isinstance(extended.value, ExtendedDict) + assert extended.value["service"]["name"].upper_first() == "Api" + + +def test_data_workflow_missing_file_fails_loudly(tmp_path: Path) -> None: + """Missing workflow inputs are hard failures, not placeholder results.""" + with pytest.raises(FileNotFoundError): + DataWorkflow.from_file("config/missing.yaml", tld=tmp_path) + + +def test_data_workflow_empty_write_fails_loudly(tmp_path: Path) -> None: + """Empty workflow outputs require an explicit opt-in.""" + with pytest.raises(ValueError, match="Workflow output was empty"): + DataWorkflow.from_value(None).write("build/empty.json", tld=tmp_path) + + def test_layered_config_workflow_round_trip(tmp_path: Path) -> None: """Compose file helpers and deep merging through a layered config workflow.""" base_config = { From 1ddc0764718402e38f3ecc643156b544ad0c306d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:36:31 -0500 Subject: [PATCH 029/287] fix: verify meshy webhook signatures --- src/extended_data/connectors/meshy/README.md | 18 +++++ .../connectors/meshy/webhooks/handler.py | 65 ++++++++++++++++-- tests/connectors/meshy/test_webhooks.py | 68 ++++++++++++++++++- 3 files changed, 144 insertions(+), 7 deletions(-) diff --git a/src/extended_data/connectors/meshy/README.md b/src/extended_data/connectors/meshy/README.md index 41527e4..96f59c1 100644 --- a/src/extended_data/connectors/meshy/README.md +++ b/src/extended_data/connectors/meshy/README.md @@ -78,6 +78,24 @@ generator = AssetGenerator() manifest = generator.generate_model(spec, wait=True) ``` +### Webhooks + +`WebhookHandler` can verify raw request bodies before parsing or mutating task +state. Configure a shared secret and pass the raw body plus the signature header +value to `handle_signed_webhook()`: + +```python +from extended_data.connectors.meshy.webhooks import WebhookHandler + +handler = WebhookHandler(repository=repo, webhook_secret="shared-secret") +result = handler.handle_signed_webhook(raw_body, request.headers["X-Webhook-Signature"]) +``` + +Signatures are HMAC-SHA256 over the raw payload bytes. Hex, Base64, URL-safe +Base64, and `sha256=`-prefixed values are accepted. If you do not configure a +secret, `verify_signature()` returns `False` instead of accepting unsigned +payloads. + ### Preset Specs Pre-configured specs for common assets: diff --git a/src/extended_data/connectors/meshy/webhooks/handler.py b/src/extended_data/connectors/meshy/webhooks/handler.py index 290286e..f77b1bd 100644 --- a/src/extended_data/connectors/meshy/webhooks/handler.py +++ b/src/extended_data/connectors/meshy/webhooks/handler.py @@ -2,7 +2,9 @@ from __future__ import annotations +import base64 import hashlib +import hmac from datetime import datetime, timezone from typing import Any @@ -30,15 +32,43 @@ def __init__( self, repository: TaskRepository, download_artifacts: bool = True, - ): + webhook_secret: str | bytes | None = None, + ) -> None: """Initialize webhook handler. Args: repository: TaskRepository for updating state download_artifacts: Whether to download GLB files on SUCCEEDED + webhook_secret: Shared secret used to verify HMAC-SHA256 signatures """ self.repository = repository self.download_artifacts = download_artifacts + self.webhook_secret = webhook_secret + + def handle_signed_webhook( + self, + payload: bytes, + signature: str, + project: str | None = None, + spec_hash: str | None = None, + ) -> dict[str, Any]: + """Verify a raw webhook payload before parsing and processing it.""" + if not self.verify_signature(payload, signature): + return { + "status": "error", + "message": "Invalid webhook signature", + } + + try: + parsed_payload = MeshyWebhookPayload.model_validate_json(payload) + except ValueError as exc: + return { + "status": "error", + "message": "Invalid webhook payload", + "error": str(exc), + } + + return self.handle_webhook(parsed_payload, project=project, spec_hash=spec_hash) def handle_webhook( self, payload: MeshyWebhookPayload, project: str | None = None, spec_hash: str | None = None @@ -140,6 +170,33 @@ def _download_glb_artifact(self, project: str, spec_hash: str, service: str, glb except Exception: return None - def verify_signature(self, payload: bytes, signature: str) -> bool: - """Verify webhook signature (stubbed for testing).""" - return True # Stub for testing + def verify_signature( + self, + payload: bytes, + signature: str, + *, + secret: str | bytes | None = None, + ) -> bool: + """Verify an HMAC-SHA256 webhook signature for a raw payload.""" + secret_value = self.webhook_secret if secret is None else secret + if secret_value is None or not signature.strip(): + return False + + secret_bytes = secret_value.encode("utf-8") if isinstance(secret_value, str) else secret_value + if not secret_bytes: + return False + + digest = hmac.new(secret_bytes, payload, hashlib.sha256).digest() + expected_hex = digest.hex() + expected_base64 = base64.b64encode(digest).decode("ascii") + expected_urlsafe_base64 = base64.urlsafe_b64encode(digest).decode("ascii") + + signature_value = signature.strip() + if signature_value.casefold().startswith("sha256="): + signature_value = signature_value.split("=", 1)[1].strip() + + return ( + hmac.compare_digest(signature_value.casefold(), expected_hex) + or hmac.compare_digest(signature_value, expected_base64) + or hmac.compare_digest(signature_value, expected_urlsafe_base64) + ) diff --git a/tests/connectors/meshy/test_webhooks.py b/tests/connectors/meshy/test_webhooks.py index 3bf9d91..327ed23 100644 --- a/tests/connectors/meshy/test_webhooks.py +++ b/tests/connectors/meshy/test_webhooks.py @@ -2,6 +2,11 @@ from __future__ import annotations +import base64 +import hashlib +import hmac +import json + from datetime import datetime, timezone from unittest.mock import MagicMock, patch @@ -260,9 +265,66 @@ def test_handle_webhook_no_download_when_disabled(self, mock_repository, webhook assert result["artifacts_downloaded"] == 0 mock_base.download.assert_not_called() - def test_verify_signature_stub(self, webhook_handler): - """Test that signature verification stub returns True.""" - assert webhook_handler.verify_signature(b"payload", "signature") is True + def test_verify_signature_requires_secret(self, webhook_handler): + """Unsigned handlers reject signatures instead of accepting placeholders.""" + payload = b'{"id":"task-12345-abcde"}' + signature = hmac.new(b"secret", payload, hashlib.sha256).hexdigest() + + assert webhook_handler.verify_signature(payload, signature) is False + + def test_verify_signature_accepts_hmac_sha256_hex(self, mock_repository): + """Verify raw payloads with HMAC-SHA256 hex signatures.""" + payload = b'{"id":"task-12345-abcde"}' + signature = hmac.new(b"secret", payload, hashlib.sha256).hexdigest() + handler = WebhookHandler(repository=mock_repository, webhook_secret="secret") + + assert handler.verify_signature(payload, signature) is True + assert handler.verify_signature(payload, f"sha256={signature.upper()}") is True + assert handler.verify_signature(b'{"id":"tampered"}', signature) is False + assert handler.verify_signature(payload, "not-a-signature") is False + + def test_verify_signature_accepts_hmac_sha256_base64(self, mock_repository): + """Verify raw payloads with HMAC-SHA256 base64 signatures.""" + payload = b'{"id":"task-12345-abcde"}' + digest = hmac.new(b"secret", payload, hashlib.sha256).digest() + signature = base64.b64encode(digest).decode("ascii") + handler = WebhookHandler(repository=mock_repository, webhook_secret=b"secret") + + assert handler.verify_signature(payload, signature) is True + + def test_handle_signed_webhook_rejects_invalid_signature( + self, mock_repository, webhook_payload_succeeded + ): + """Invalid signatures fail before payload parsing or repository mutation.""" + payload = json.dumps(webhook_payload_succeeded, separators=(",", ":")).encode("utf-8") + handler = WebhookHandler(repository=mock_repository, webhook_secret="secret") + + result = handler.handle_signed_webhook(payload, "invalid") + + assert result == { + "status": "error", + "message": "Invalid webhook signature", + } + mock_repository.find_task_by_id.assert_not_called() + mock_repository.record_task_update.assert_not_called() + + def test_handle_signed_webhook_processes_valid_signature( + self, mock_repository, webhook_payload_succeeded + ): + """Valid signed raw payloads are parsed and processed.""" + payload = json.dumps(webhook_payload_succeeded, separators=(",", ":")).encode("utf-8") + signature = hmac.new(b"secret", payload, hashlib.sha256).hexdigest() + handler = WebhookHandler( + repository=mock_repository, + download_artifacts=False, + webhook_secret="secret", + ) + + result = handler.handle_signed_webhook(payload, signature) + + assert result["status"] == "success" + assert result["task_id"] == "task-12345-abcde" + mock_repository.record_task_update.assert_called_once() class TestWebhookHandlerArtifactDownload: From f8d1f676a40b2ae84ccc20989dcbe24328b20d3d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:40:13 -0500 Subject: [PATCH 030/287] docs: align meshy connector examples --- src/extended_data/connectors/meshy/README.md | 130 ++++++++---------- .../meshy/persistence/vector_store.py | 2 +- tests/connectors/meshy/conftest.py | 2 +- 3 files changed, 61 insertions(+), 73 deletions(-) diff --git a/src/extended_data/connectors/meshy/README.md b/src/extended_data/connectors/meshy/README.md index 96f59c1..95cb259 100644 --- a/src/extended_data/connectors/meshy/README.md +++ b/src/extended_data/connectors/meshy/README.md @@ -1,84 +1,73 @@ -# Meshy SDK +# Meshy Connector -Modular Python package for generating game assets via Meshy API. +Meshy support is part of `extended-data` and lives under +`extended_data.connectors.meshy`. It provides functional API helpers, a +`MeshyConnector` fabric adapter, job orchestration, webhook handling, AI tool +adapters, and an MCP server. -## Features +## Install -- **All Endpoints**: Text-to-3D, Text-to-Texture, Image-to-3D -- **Rate Limiting**: Automatic 429 handling with exponential backoff -- **Type Safety**: Pydantic models for all API types -- **Job Orchestration**: High-level `AssetGenerator` for game asset workflows -- **Auto-Download**: Fetches GLB models, PBR textures, thumbnails -- **Metadata**: JSON manifests for ECS integration - -## Quick Start - -```python -from tools.meshy import AssetGenerator, otter_player_spec - -# Generate player character -generator = AssetGenerator(output_root="client/public") -manifest = generator.generate_model(otter_player_spec(), wait=True) - -print(f"Model: {manifest.model_path}") -print(f"Textures: {manifest.texture_paths}") +```bash +pip install "extended-data[meshy]" ``` -## CLI Usage +Use the `vector` extra only when you need local vector search over generated +asset metadata: ```bash -python3 scripts/generate_assets.py +pip install "extended-data[meshy,vector]" ``` -Generates 6 core assets: -- Player otter -- 2 NPC otters -- Bass fish -- Cattail reeds -- Wooden dock - -Assets output to `client/public/models/` with manifests. +## Functional API -## API +```python +from extended_data.connectors.meshy import text3d +from extended_data.connectors.meshy.models import ArtStyle, Text3DRequest + +task_id = text3d.create( + Text3DRequest( + mode="preview", + prompt="game-ready low-poly wooden crate with metal bands", + art_style=ArtStyle.REALISTIC, + target_polycount=5000, + enable_pbr=True, + ) +) -### Client +result = text3d.poll(task_id) +print(result.status) +``` -```python -from tools.meshy import MeshyClient, Text3DRequest, ArtStyle +The package also exposes `image3d`, `rigging`, `animate`, and `retexture` +modules from `extended_data.connectors.meshy`. -client = MeshyClient() # Uses MESHY_API_KEY env var +## Connector Fabric -# Create task -task_id = client.create_text_to_3d(Text3DRequest( - prompt="anthropomorphic otter character", - art_style=ArtStyle.REALISTIC, - target_polycount=15000, - enable_pbr=True -)) +```python +from extended_data import ConnectorFabric -# Poll until complete -result = client.poll_until_complete(task_id, task_type="text-to-3d") -client.download_file(result.model_urls.glb, "output.glb") +fabric = ConnectorFabric(inputs={"MESHY_API_KEY": "..."}, from_environment=False) +meshy = fabric.get_connector("meshy") ``` -### Asset Generator +## Job Orchestration ```python -from tools.meshy import AssetGenerator, GameAssetSpec, AssetIntent, ArtStyle - -spec = GameAssetSpec( - intent=AssetIntent.CREATURE_PREY, - description="realistic marsh frog, green skin, sitting pose", - art_style=ArtStyle.REALISTIC, - target_polycount=5000, - output_path="models/creatures" -) +from extended_data.connectors.meshy.jobs import AssetGenerator, example_character_spec -generator = AssetGenerator() -manifest = generator.generate_model(spec, wait=True) +generator = AssetGenerator(output_root="client/public") +manifest = generator.generate_model(example_character_spec(), wait=True) + +print(manifest.model_path) ``` -### Webhooks +Built-in example specs are available as: + +- `example_character_spec()` +- `example_prop_spec()` +- `example_environment_spec()` + +## Webhooks `WebhookHandler` can verify raw request bodies before parsing or mutating task state. Configure a shared secret and pass the raw body plus the signature header @@ -96,19 +85,18 @@ Base64, and `sha256=`-prefixed values are accepted. If you do not configure a secret, `verify_signature()` returns `False` instead of accepting unsigned payloads. -### Preset Specs +## Tools And MCP -Pre-configured specs for common assets: +```python +from extended_data.connectors.meshy.tools import get_langchain_tools, get_strands_tools, get_tools -- `otter_player_spec()` - Player character -- `otter_npc_male_spec()` - Male NPC -- `otter_npc_female_spec()` - Female NPC -- `fish_bass_spec()` - Bass fish -- `cattail_reeds_spec()` - Marsh vegetation -- `wooden_dock_spec()` - Dock structure +tool_definitions = get_tools() +langchain_tools = get_langchain_tools() +strands_tools = get_strands_tools() +``` -## Dependencies +Run the Meshy MCP server with: -- `httpx` - HTTP client -- `tenacity` - Retry logic -- `pydantic` - Type validation +```bash +meshy-mcp +``` diff --git a/src/extended_data/connectors/meshy/persistence/vector_store.py b/src/extended_data/connectors/meshy/persistence/vector_store.py index d018833..8ef9829 100644 --- a/src/extended_data/connectors/meshy/persistence/vector_store.py +++ b/src/extended_data/connectors/meshy/persistence/vector_store.py @@ -27,7 +27,7 @@ existing = store.get_by_spec_hash("abc123") Requirements: - pip install mesh-toolkit[vector] + pip install "extended-data[meshy,vector]" The vector extra includes: - sqlite-vec (vector similarity extension) diff --git a/tests/connectors/meshy/conftest.py b/tests/connectors/meshy/conftest.py index 91c379f..fec352b 100644 --- a/tests/connectors/meshy/conftest.py +++ b/tests/connectors/meshy/conftest.py @@ -1,4 +1,4 @@ -"""Pytest fixtures for mesh-toolkit tests.""" +"""Pytest fixtures for Meshy connector tests.""" from __future__ import annotations From 26d3f3454d1eccb7fd16769f97fc13285673b1da Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:45:38 -0500 Subject: [PATCH 031/287] fix: align secrets tool error handling default --- src/extended_data/connectors/secrets/tools.py | 6 ++-- tests/connectors/test_secrets.py | 34 +++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index ecd699e..3a8fe88 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -36,8 +36,8 @@ class RunPipelineSchema(BaseModel): description="Comma-separated list of targets to sync (empty for all)", ) continue_on_error: bool = Field( - False, - description="Continue processing if errors occur", + True, + description="Continue processing remaining targets after an error", ) @@ -78,7 +78,7 @@ def run_pipeline( dry_run: bool = False, operation: str = "pipeline", targets: str | None = None, - continue_on_error: bool = False, + continue_on_error: bool = True, ) -> dict[str, Any]: """Run the secrets synchronization pipeline. diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index e647394..019f7d2 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -11,7 +11,9 @@ SecretsConnector, SyncOperation, SyncOptions, + SyncResult, ) +from extended_data.connectors.secrets.tools import RunPipelineSchema, run_pipeline @pytest.fixture @@ -172,3 +174,35 @@ def test_cli_validate_config(mock_run: MagicMock, connector: SecretsConnector) - args = mock_run.call_args[0][0] assert "validate" in args + + +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_run_pipeline_tool_default_continue_on_error_matches_cli(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.run_pipeline.return_value = SyncResult(success=True, secrets_processed=3) + + result = run_pipeline("config.yaml") + + options = mock_connector.run_pipeline.call_args.args[1] + assert isinstance(options, SyncOptions) + assert options.continue_on_error is True + assert result["success"] is True + assert result["secrets_processed"] == 3 + + +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_run_pipeline_tool_can_disable_continue_on_error(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.run_pipeline.return_value = SyncResult(success=True) + + run_pipeline("config.yaml", continue_on_error=False) + + options = mock_connector.run_pipeline.call_args.args[1] + assert isinstance(options, SyncOptions) + assert options.continue_on_error is False + + +def test_run_pipeline_schema_default_continue_on_error_matches_cli() -> None: + schema = RunPipelineSchema(config_path="config.yaml") + + assert schema.continue_on_error is True From c80428883e1f8bc3e676d31e12e1086bfcfa8aa8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 02:55:34 -0500 Subject: [PATCH 032/287] fix: require secretsync result envelope --- README.md | 4 ++ docs/package-surface.md | 5 ++ .../connectors/secrets/__init__.py | 8 +++ tests/connectors/test_secrets.py | 57 +++++++++++++++++++ tests/examples/test_safe_examples.py | 30 ++++++++++ 5 files changed, 104 insertions(+) diff --git a/README.md b/README.md index a764d2d..6546c38 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,10 @@ uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with generic vendor lookup. +The `secrets` connector integrates with the standalone `secretsync` CLI or +native bindings. CLI fallback expects `secretsync pipeline --output json` to +return the stable pipeline result envelope used by this package. + The package is intentionally tiered: - Tier 1 functions stay stateless and composable. diff --git a/docs/package-surface.md b/docs/package-surface.md index f1cab32..076e1da 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -128,6 +128,11 @@ Connectors that inherit `VendorConnectorBase` can keep raw transport access with verb-specific helpers to decode HTTP JSON, YAML, TOML, HCL, or text responses through the same Tier 2 container bridge used by file and input decoding. +The `secrets` adapter is the Python-facing bridge to the standalone +`secretsync` project. It uses native bindings when present and otherwise falls +back to the CLI, which must emit the stable `secretsync pipeline --output json` +result envelope for both dry-run and apply runs. + Use the catalog helpers when a workflow needs to inspect which integrations can run in the current environment: diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 887882e..93c92a0 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -402,6 +402,14 @@ def _cli_run_pipeline( if result.returncode == 0: output = json.loads(result.stdout) + if not isinstance(output, dict) or "success" not in output: + return SyncResult( + success=False, + error_message=( + "Unsupported secretsync JSON output: expected pipeline result envelope. " + "Upgrade secretsync or use native bindings." + ), + ) return SyncResult.from_cli_output(output) else: return SyncResult( diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index 019f7d2..e0d8d29 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -137,6 +137,63 @@ def test_cli_run_pipeline_default_output_is_json(mock_run: MagicMock, connector: assert "--continue-on-error=true" in args +@patch("subprocess.run") +def test_cli_run_pipeline_parses_result_envelope(mock_run: MagicMock, connector: SecretsConnector) -> None: + output = { + "success": True, + "target_count": 2, + "secrets_processed": 5, + "secrets_added": 1, + "secrets_modified": 2, + "secrets_removed": 0, + "secrets_unchanged": 2, + "duration_ms": 321, + "results": [ + {"target": "prod", "phase": "merge", "success": True}, + {"target": "prod", "phase": "sync", "success": True}, + ], + "diff_output": '{"summary":{"added":1}}', + "diff": {"dry_run": True}, + } + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps(output), + stderr="", + ) + + result = connector.run_pipeline("config.yaml") + + assert result.success is True + assert result.target_count == 2 + assert result.secrets_processed == 5 + assert result.secrets_added == 1 + assert result.secrets_modified == 2 + assert result.secrets_unchanged == 2 + assert result.duration_ms == 321 + assert json.loads(result.results_json) == output["results"] + assert result.diff_output == '{"summary":{"added":1}}' + + +@patch("subprocess.run") +def test_cli_run_pipeline_rejects_legacy_raw_diff_json(mock_run: MagicMock, connector: SecretsConnector) -> None: + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps( + { + "dry_run": True, + "summary": {"added": 1, "modified": 0, "removed": 0, "unchanged": 0}, + "targets": [], + } + ), + stderr="", + ) + + result = connector.run_pipeline("config.yaml", SyncOptions(dry_run=True, compute_diff=True)) + + assert result.success is False + assert "expected pipeline result envelope" in result.error_message + + @patch("subprocess.run") def test_cli_run_pipeline_only_emits_supported_cli_flags(mock_run: MagicMock, connector: SecretsConnector) -> None: mock_run.return_value = MagicMock( diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index 2ef54ef..c6bb899 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -2,7 +2,9 @@ from __future__ import annotations +import importlib.util import os +import py_compile import subprocess import sys @@ -26,6 +28,14 @@ "examples/logging/markers_and_storage.py", "examples/logging/verbosity_control.py", ] +CONNECTOR_EXAMPLES = [ + "examples/connectors/basic_aws.py", + "examples/connectors/basic_google.py", + "examples/connectors/basic_meshy.py", + "examples/connectors/langchain_tools.py", + "examples/connectors/mcp_server.py", +] +ALL_EXAMPLES = SAFE_EXAMPLES + CONNECTOR_EXAMPLES @pytest.mark.parametrize("example_path", SAFE_EXAMPLES) @@ -45,3 +55,23 @@ def test_safe_example_runs(example_path: str, tmp_path: Path) -> None: ) assert result.returncode == 0, f"{example_path} failed\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + + +@pytest.mark.parametrize("example_path", ALL_EXAMPLES) +def test_example_compiles(example_path: str, tmp_path: Path) -> None: + """Every example should at least remain syntactically valid.""" + py_compile.compile(str(REPO_ROOT / example_path), cfile=str(tmp_path / "example.pyc"), doraise=True) + + +@pytest.mark.parametrize("example_path", CONNECTOR_EXAMPLES) +def test_connector_example_imports_without_live_credentials(example_path: str) -> None: + """Credential-gated connector examples should keep import-time side effects out.""" + module_path = REPO_ROOT / example_path + module_name = example_path.replace("/", "_").removesuffix(".py") + spec = importlib.util.spec_from_file_location(module_name, module_path) + assert spec is not None + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + + assert callable(module.main) From 4bac60983099f375b9546dc4938e3ae6cc90a1d4 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:00:18 -0500 Subject: [PATCH 033/287] feat: expose secrets types at package root --- README.md | 9 +++++++++ docs/package-surface.md | 11 +++++++++++ src/extended_data/__init__.py | 11 +++++++++++ src/extended_data/secrets/__init__.py | 2 ++ tests/core/test_package_surface.py | 8 +++++++- 5 files changed, 40 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6546c38..77d65de 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,15 @@ The `secrets` connector integrates with the standalone `secretsync` CLI or native bindings. CLI fallback expects `secretsync pipeline --output json` to return the stable pipeline result envelope used by this package. +```python +from extended_data import SecretsConnector, SyncOptions + +result = SecretsConnector(prefer_native=False).run_pipeline( + "pipeline.yaml", + SyncOptions(dry_run=True), +) +``` + The package is intentionally tiered: - Tier 1 functions stay stateless and composable. diff --git a/docs/package-surface.md b/docs/package-surface.md index 076e1da..eab3b1a 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -14,6 +14,8 @@ from extended_data import ( ExtendedString, InputProvider, Logging, + SecretsConnector, + SyncOptions, decode_json, extend_data, encode_yaml, @@ -133,6 +135,15 @@ The `secrets` adapter is the Python-facing bridge to the standalone back to the CLI, which must emit the stable `secretsync pipeline --output json` result envelope for both dry-run and apply runs. +```python +from extended_data import SecretsConnector, SyncOptions + +result = SecretsConnector(prefer_native=False).run_pipeline( + "pipeline.yaml", + SyncOptions(dry_run=True), +) +``` + Use the catalog helpers when a workflow needs to inspect which integrations can run in the current environment: diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index feddb36..e25bdef 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -128,6 +128,7 @@ ) from extended_data.inputs import InputProvider, directed_inputs, input_config from extended_data.logging import ExitRunError, KeyTransform, Logging + from extended_data.secrets import OutputFormat, SecretsConnector, SyncOperation, SyncOptions, SyncResult _LAZY_EXPORTS = { @@ -137,6 +138,11 @@ "InputProvider": ("extended_data.inputs", "InputProvider"), "KeyTransform": ("extended_data.logging", "KeyTransform"), "Logging": ("extended_data.logging", "Logging"), + "OutputFormat": ("extended_data.secrets", "OutputFormat"), + "SecretsConnector": ("extended_data.secrets", "SecretsConnector"), + "SyncOperation": ("extended_data.secrets", "SyncOperation"), + "SyncOptions": ("extended_data.secrets", "SyncOptions"), + "SyncResult": ("extended_data.secrets", "SyncResult"), "VendorConnectorBase": ("extended_data.connectors", "VendorConnectorBase"), "directed_inputs": ("extended_data.inputs", "directed_inputs"), "get_connector": ("extended_data.connectors", "get_connector"), @@ -173,7 +179,12 @@ def __getattr__(name: str) -> Any: "InputProvider", "KeyTransform", "Logging", + "OutputFormat", + "SecretsConnector", "SortedDefaultDict", + "SyncOperation", + "SyncOptions", + "SyncResult", "VendorConnectorBase", "WorkflowResult", "WorkflowStep", diff --git a/src/extended_data/secrets/__init__.py b/src/extended_data/secrets/__init__.py index c7158cf..0287f51 100644 --- a/src/extended_data/secrets/__init__.py +++ b/src/extended_data/secrets/__init__.py @@ -1,5 +1,6 @@ """Secret synchronization adapters for Extended Data.""" +from extended_data._version import __version__ from extended_data.connectors.secrets import ( ConfigInfo, OutputFormat, @@ -21,6 +22,7 @@ "SyncOperation", "SyncOptions", "SyncResult", + "__version__", "get_crewai_tools", "get_langchain_tools", "get_strands_tools", diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 702719a..dfea899 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -7,7 +7,7 @@ import extended_data import extended_data.logging as lifecycle_logging -from extended_data import connectors, inputs +from extended_data import connectors, inputs, secrets from extended_data.connectors.connectors import ConnectorFabric from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -21,6 +21,7 @@ def test_package_version_is_distribution_version() -> None: assert connectors.__version__ == expected assert inputs.__version__ == expected assert lifecycle_logging.__version__ == expected + assert secrets.__version__ == expected def test_clean_major_version_public_names() -> None: @@ -41,6 +42,11 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert extended_data.ConnectorInfo.__name__ == "ConnectorInfo" assert extended_data.WorkflowResult.__name__ == "WorkflowResult" assert extended_data.WorkflowStep.__name__ == "WorkflowStep" + assert extended_data.SecretsConnector is secrets.SecretsConnector + assert extended_data.SyncOptions is secrets.SyncOptions + assert extended_data.SyncResult is secrets.SyncResult + assert extended_data.SyncOperation is secrets.SyncOperation + assert extended_data.OutputFormat is secrets.OutputFormat assert callable(extended_data.directed_inputs) assert callable(extended_data.get_connector) assert callable(extended_data.list_connector_info) From 0c570b4a508468bf873cd35ac2011f0a104143a4 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:03:22 -0500 Subject: [PATCH 034/287] fix: preserve secrets tools public exports --- src/extended_data/secrets/tools.py | 1 + tests/core/test_package_surface.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/src/extended_data/secrets/tools.py b/src/extended_data/secrets/tools.py index 712c45c..e576d67 100644 --- a/src/extended_data/secrets/tools.py +++ b/src/extended_data/secrets/tools.py @@ -1,3 +1,4 @@ """Tool exports for secret synchronization workflows.""" from extended_data.connectors.secrets.tools import * # noqa: F403 +from extended_data.connectors.secrets.tools import __all__ # noqa: F401 diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index dfea899..989cf5b 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -6,6 +6,7 @@ import extended_data import extended_data.logging as lifecycle_logging +import extended_data.secrets.tools as secrets_tools from extended_data import connectors, inputs, secrets from extended_data.connectors.connectors import ConnectorFabric @@ -50,3 +51,9 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert callable(extended_data.directed_inputs) assert callable(extended_data.get_connector) assert callable(extended_data.list_connector_info) + + +def test_secrets_tools_alias_preserves_public_exports() -> None: + """The shorter secrets tool path mirrors the canonical connector module.""" + assert "run_pipeline" in secrets_tools.__all__ + assert callable(secrets_tools.run_pipeline) From 2b15a42f14988c2683253b2f1d4ccc6669dadd29 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:06:04 -0500 Subject: [PATCH 035/287] fix: parse secretsync failure envelopes --- .../connectors/secrets/__init__.py | 42 +++++++---- tests/connectors/test_secrets.py | 70 +++++++++++++++++++ 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 93c92a0..9a5a941 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -400,22 +400,40 @@ def _cli_run_pipeline( check=False, ) + stdout = result.stdout.strip() + if stdout: + try: + output = json.loads(stdout) + except json.JSONDecodeError as e: + if result.returncode == 0: + return SyncResult( + success=False, + error_message=f"Failed to parse output: {e}", + ) + else: + if not isinstance(output, dict) or "success" not in output: + return SyncResult( + success=False, + error_message=( + "Unsupported secretsync JSON output: expected pipeline result envelope. " + "Upgrade secretsync or use native bindings." + ), + ) + parsed = SyncResult.from_cli_output(output) + if result.returncode != 0 and not parsed.error_message: + parsed.error_message = result.stderr or f"secretsync exited with status {result.returncode}" + return parsed + if result.returncode == 0: - output = json.loads(result.stdout) - if not isinstance(output, dict) or "success" not in output: - return SyncResult( - success=False, - error_message=( - "Unsupported secretsync JSON output: expected pipeline result envelope. " - "Upgrade secretsync or use native bindings." - ), - ) - return SyncResult.from_cli_output(output) - else: return SyncResult( success=False, - error_message=result.stderr or result.stdout, + error_message="secretsync produced no JSON output", ) + + return SyncResult( + success=False, + error_message=result.stderr or result.stdout, + ) except subprocess.TimeoutExpired: return SyncResult( success=False, diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index e0d8d29..22af25d 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -194,6 +194,76 @@ def test_cli_run_pipeline_rejects_legacy_raw_diff_json(mock_run: MagicMock, conn assert "expected pipeline result envelope" in result.error_message +@patch("subprocess.run") +def test_cli_run_pipeline_parses_failure_result_envelope(mock_run: MagicMock, connector: SecretsConnector) -> None: + mock_run.return_value = MagicMock( + returncode=1, + stdout=json.dumps( + { + "success": False, + "target_count": 1, + "secrets_processed": 2, + "error_message": "pipeline completed with errors", + "results": [{"target": "prod", "phase": "sync", "success": False, "error": "denied"}], + } + ), + stderr="Error: pipeline completed with errors\n", + ) + + result = connector.run_pipeline("config.yaml") + + assert result.success is False + assert result.target_count == 1 + assert result.secrets_processed == 2 + assert result.error_message == "pipeline completed with errors" + assert json.loads(result.results_json)[0]["error"] == "denied" + + +@patch("subprocess.run") +def test_cli_run_pipeline_failure_envelope_uses_stderr_when_error_message_missing( + mock_run: MagicMock, + connector: SecretsConnector, +) -> None: + mock_run.return_value = MagicMock( + returncode=1, + stdout=json.dumps({"success": False, "results": []}), + stderr="Error: boom\n", + ) + + result = connector.run_pipeline("config.yaml") + + assert result.success is False + assert result.error_message == "Error: boom\n" + + +@patch("subprocess.run") +def test_cli_run_pipeline_success_without_json_is_error(mock_run: MagicMock, connector: SecretsConnector) -> None: + mock_run.return_value = MagicMock( + returncode=0, + stdout="", + stderr="", + ) + + result = connector.run_pipeline("config.yaml") + + assert result.success is False + assert "produced no JSON output" in result.error_message + + +@patch("subprocess.run") +def test_cli_run_pipeline_non_json_failure_uses_cli_output(mock_run: MagicMock, connector: SecretsConnector) -> None: + mock_run.return_value = MagicMock( + returncode=1, + stdout="not json", + stderr="", + ) + + result = connector.run_pipeline("config.yaml") + + assert result.success is False + assert result.error_message == "not json" + + @patch("subprocess.run") def test_cli_run_pipeline_only_emits_supported_cli_flags(mock_run: MagicMock, connector: SecretsConnector) -> None: mock_run.return_value = MagicMock( From f60be06d6b84973e777c47432294b747e71d4621 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:10:03 -0500 Subject: [PATCH 036/287] fix: parse connector call extras --- src/extended_data/connectors/cli.py | 23 ++++++++-- tests/connectors/test_cli.py | 68 ++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index e39b0e7..68f681c 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -121,10 +121,15 @@ def cmd_call(args: argparse.Namespace) -> int: # Parse extra arguments kwargs = {} + json_output = bool(getattr(args, "json", False)) extra = args.extra or [] i = 0 while i < len(extra): arg = extra[i] + if arg == "--json": + json_output = True + i += 1 + continue if arg.startswith("--"): key = arg[2:].replace("-", "_") if i + 1 < len(extra) and not extra[i + 1].startswith("--"): @@ -140,13 +145,22 @@ def cmd_call(args: argparse.Namespace) -> int: connector = get_connector(connector_name) method = getattr(connector, method_name, None) - if method is None: + if method is None or not callable(method): + _write_stderr(f"Connector {connector_name!r} has no callable method {method_name!r}") return 1 - method(**kwargs) + result = method(**kwargs) + if result is not None: + if json_output: + _write_stdout(_json_output(result)) + elif isinstance(result, str): + _write_stdout(result) + else: + _write_stdout(_json_output(result)) return 0 - except Exception: + except Exception as e: + _write_stderr(str(e)) return 1 @@ -252,9 +266,10 @@ def main() -> int: # Call command call_parser = subparsers.add_parser("call", help="Call a connector method") + call_parser.add_argument("--json", action="store_true", help="JSON output") call_parser.add_argument("connector", help="Connector name") call_parser.add_argument("method", help="Method name") - call_parser.add_argument("extra", nargs="*", help="Method arguments (--arg value)") + call_parser.add_argument("extra", nargs=argparse.REMAINDER, help="Method arguments (--arg value)") call_parser.set_defaults(func=cmd_call) # MCP command diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 61f53f7..40f80c8 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -4,11 +4,11 @@ import argparse -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest -from extended_data.connectors.cli import cmd_info, cmd_list, cmd_methods, main +from extended_data.connectors.cli import cmd_call, cmd_info, cmd_list, cmd_methods, main def test_cli_list(): @@ -61,6 +61,70 @@ def test_cli_methods_lists_public_methods(): assert "Decode an HTTP response body" in output +def test_cli_call_parses_dynamic_keyword_arguments() -> None: + """Call command accepts documented --arg value pairs after the method.""" + connector = MagicMock() + connector.fetch.return_value = {"ok": True} + + with ( + patch("sys.argv", ["extended-data", "call", "example", "fetch", "--enabled", "true", "--count", "3"]), + patch("extended_data.connectors.cli.get_connector", return_value=connector), + patch("sys.stdout.write") as mock_write, + ): + exit_code = main() + + assert exit_code == 0 + connector.fetch.assert_called_once_with(enabled=True, count=3) + output = "".join(call.args[0] for call in mock_write.call_args_list if call.args) + assert '"ok": true' in output + + +def test_cli_call_accepts_json_flag_after_method() -> None: + """Call command treats trailing --json as a CLI flag, not a method kwarg.""" + connector = MagicMock() + connector.fetch.return_value = {"ok": True} + args = argparse.Namespace(connector="example", method="fetch", extra=["--json"], json=False) + + with ( + patch("extended_data.connectors.cli.get_connector", return_value=connector), + patch("sys.stdout.write") as mock_write, + ): + exit_code = cmd_call(args) + + assert exit_code == 0 + connector.fetch.assert_called_once_with() + assert '"ok": true' in mock_write.call_args.args[0] + + +def test_cli_call_reports_missing_method() -> None: + """Call command reports missing methods instead of failing silently.""" + args = argparse.Namespace(connector="example", method="missing", extra=[], json=False) + connector = object() + + with ( + patch("extended_data.connectors.cli.get_connector", return_value=connector), + patch("sys.stderr.write") as mock_write, + ): + exit_code = cmd_call(args) + + assert exit_code == 1 + assert "has no callable method" in mock_write.call_args.args[0] + + +def test_cli_call_reports_connector_errors() -> None: + """Call command writes connector errors to stderr.""" + args = argparse.Namespace(connector="example", method="fetch", extra=[], json=False) + + with ( + patch("extended_data.connectors.cli.get_connector", side_effect=RuntimeError("boom")), + patch("sys.stderr.write") as mock_write, + ): + exit_code = cmd_call(args) + + assert exit_code == 1 + assert "boom" in mock_write.call_args.args[0] + + def test_cli_main_help(): """Test main CLI entry point with help.""" with patch("sys.argv", ["extended-data", "--help"]): From e9acfa076fb5b9f91d5949f12e8bf362f54cf7a5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:16:48 -0500 Subject: [PATCH 037/287] test: lock connector registry metadata --- .../connectors/test_optional_dependencies.py | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/connectors/test_optional_dependencies.py b/tests/connectors/test_optional_dependencies.py index 9437b3a..5e63a1e 100644 --- a/tests/connectors/test_optional_dependencies.py +++ b/tests/connectors/test_optional_dependencies.py @@ -2,11 +2,58 @@ from __future__ import annotations +from pathlib import Path from types import SimpleNamespace import pytest +import tomlkit -from extended_data.connectors import _optional +from extended_data.connectors import _optional, registry + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def _pyproject() -> tomlkit.TOMLDocument: + return tomlkit.parse((REPO_ROOT / "pyproject.toml").read_text()) + + +def test_builtin_connector_metadata_maps_stay_aligned() -> None: + """Built-in connector registries should fail fast when metadata drifts.""" + names = set(registry.BUILTIN_CONNECTORS) + + assert names == set(_optional.CONNECTOR_REQUIREMENTS) + assert names == set(_optional.CONNECTOR_EXTRAS) + + for name, spec in registry.BUILTIN_CONNECTORS.items(): + assert _optional.get_extra_for_connector(name) == spec.extra + + +def test_builtin_connectors_are_registered_as_entry_points() -> None: + """Every built-in connector should be published through the connector entry point group.""" + entry_points = _pyproject()["project"]["entry-points"]["extended_data.connectors"] + + assert set(entry_points) == set(registry.BUILTIN_CONNECTORS) + + for name, spec in registry.BUILTIN_CONNECTORS.items(): + assert entry_points[name] == f"{spec.module_path}:{spec.class_name}" + + +def test_connector_extras_exist_in_pyproject() -> None: + """Connector extras referenced by registry metadata should exist in pyproject.""" + extras = _pyproject()["project"]["optional-dependencies"] + + for name, extra in _optional.CONNECTOR_EXTRAS.items(): + assert extra in extras, f"{name} uses missing extra {extra}" + + +def test_connector_requirement_packages_map_to_connector_extras() -> None: + """Connector import checks should point users to the same extra as the connector itself.""" + for name, requirements in _optional.CONNECTOR_REQUIREMENTS.items(): + extra = _optional.CONNECTOR_EXTRAS[name] + + for requirement in requirements: + assert _optional.PACKAGE_TO_EXTRA[requirement] == extra def test_get_crewai_tool_decorator_explains_user_managed_install(monkeypatch) -> None: From a668d6872dd75914350444c827b9962982061779 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:21:52 -0500 Subject: [PATCH 038/287] fix: require connector entry point registration --- src/extended_data/connectors/registry.py | 45 ++++++++---------- tests/connectors/test_connectors.py | 58 ++++++++---------------- 2 files changed, 38 insertions(+), 65 deletions(-) diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 43f441e..98cfab0 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -153,36 +153,10 @@ def _discover_connectors() -> dict[str, builtins.type[VendorConnectorBase]]: warnings.warn(f"Failed to load connector '{ep.name}': {e}", stacklevel=2) - # Also include built-in connectors not yet in entry points - # (for development/transition period) - _register_builtins(connectors) - _connector_cache = connectors return connectors -def _register_builtins(connectors: dict[str, builtins.type[VendorConnectorBase]]) -> None: - """Register built-in connectors that may not be in entry points yet.""" - for name, spec in BUILTIN_CONNECTORS.items(): - if name in connectors: - _missing_builtin_connectors.pop(name, None) - continue # Entry point takes precedence - try: - import importlib - - module = importlib.import_module(spec.module_path) - cls = getattr(module, spec.class_name, None) - if cls is not None: - connectors[name] = cls - _missing_builtin_connectors.pop(name, None) - else: - _missing_builtin_connectors[name] = ImportError( - f"Could not find {spec.class_name} in {spec.module_path}" - ) - except ImportError as e: - _missing_builtin_connectors[name] = e # Optional dependency not installed - - def _raise_missing_builtin_connector(name: str, error: ImportError) -> NoReturn: """Raise a clear install hint for a known built-in connector.""" install = get_connector_install_command(name) or f"pip install extended-data[{BUILTIN_CONNECTORS[name].extra}]" @@ -198,6 +172,16 @@ def _raise_missing_builtin_connector(name: str, error: ImportError) -> NoReturn: raise ImportError(msg) from error +def _raise_unregistered_builtin_connector(name: str) -> NoReturn: + """Raise a packaging error when a declared built-in connector has no entry point.""" + spec = BUILTIN_CONNECTORS[name] + raise RuntimeError( + f"The built-in '{name}' connector is declared but is not registered in the " + "extended_data.connectors entry point group. " + f'Expected: {name} = "{spec.module_path}:{spec.class_name}"' + ) + + def list_connectors() -> dict[str, builtins.type[VendorConnectorBase]]: """List all available connectors. @@ -225,6 +209,8 @@ def get_connector_class(name: str) -> builtins.type[VendorConnectorBase]: if name_lower not in connectors: if name_lower in _missing_builtin_connectors: _raise_missing_builtin_connector(name_lower, _missing_builtin_connectors[name_lower]) + if name_lower in BUILTIN_CONNECTORS: + _raise_unregistered_builtin_connector(name_lower) available = ", ".join(sorted(connectors.keys())) raise ValueError(f"Unknown connector: {name}. Available: {available}") @@ -303,6 +289,11 @@ def _available_connector_info(name: str, cls: builtins.type[VendorConnectorBase] def _missing_builtin_connector_info(name: str, error: ImportError | None) -> ConnectorInfo: """Build metadata for a known built-in connector that cannot be loaded.""" spec = BUILTIN_CONNECTORS[name] + error_message = ( + str(error) + if error + else "Built-in connector is declared but is not registered in the extended_data.connectors entry point group." + ) return ConnectorInfo( name=name, @@ -316,7 +307,7 @@ def _missing_builtin_connector_info(name: str, error: ImportError | None) -> Con module=spec.module_path, base_url=None, description=None, - error=str(error) if error else "Connector class could not be loaded.", + error=error_message, ) diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index f3c780f..90db95d 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -2,14 +2,12 @@ from __future__ import annotations -from types import SimpleNamespace from unittest.mock import MagicMock, patch import pytest from extended_data.connectors import registry from extended_data.connectors.connectors import ConnectorFabric -from extended_data.connectors.registry import _register_builtins # Helper to check if optional dependencies are available @@ -358,6 +356,26 @@ def test_get_connector_info_includes_known_missing_builtin(self, monkeypatch): assert info["install"] == "pip install extended-data[github]" assert info["class"] == "GitHubConnector" + def test_get_connector_class_rejects_unregistered_builtin_entry_point(self, monkeypatch): + """Declared built-ins must be registered through entry points.""" + monkeypatch.setattr(registry, "_connector_cache", {}) + monkeypatch.setattr(registry, "_missing_builtin_connectors", {}) + + with pytest.raises(RuntimeError, match="not registered"): + registry.get_connector_class(" github ") + + def test_get_connector_info_reports_unregistered_builtin_entry_point(self, monkeypatch): + """Registry metadata exposes missing built-in entry-point registration.""" + monkeypatch.setattr(registry, "_connector_cache", {}) + monkeypatch.setattr(registry, "_missing_builtin_connectors", {}) + + info = registry.get_connector_info(" github ") + + assert info["name"] == "github" + assert info["available"] is False + assert info["extra"] == "github" + assert "not registered" in info["error"] + def test_lazy_builtin_with_missing_requirements_is_unavailable(self): """Lazy-loadable built-ins still report unavailable when extras are missing.""" registry.clear_cache() @@ -378,39 +396,3 @@ def test_available_only_catalog_filters_missing_lazy_builtins(self): info = registry.list_connector_info(include_unavailable=False) assert all(connector["available"] for connector in info) - - def test_register_builtins_tracks_missing_optional_dependency(self, monkeypatch): - """Built-in discovery remembers optional dependency import failures.""" - monkeypatch.setattr(registry, "_missing_builtin_connectors", {}) - - def fake_import_module(module_path): - if module_path == "extended_data.connectors.github": - raise ImportError("No module named 'github'") - return SimpleNamespace() - - monkeypatch.setattr("importlib.import_module", fake_import_module) - - _register_builtins({}) - - assert "github" in registry._missing_builtin_connectors - - def test_register_builtins_includes_specialized_google_connectors(self): - """Registry builtins expose the advertised specialized Google connectors.""" - pytest.importorskip("googleapiclient") - connectors = {} - - _register_builtins(connectors) - - assert connectors["google"].__name__ == "GoogleConnector" - assert connectors["google_cloud"].__name__ == "GoogleCloudConnector" - assert connectors["google_workspace"].__name__ == "GoogleWorkspaceConnector" - assert connectors["google_billing"].__name__ == "GoogleBillingConnector" - - def test_register_builtins_loads_github_entrypoint_name(self): - """Registry builtins keep the GitHub connector spelling compatible with entry points.""" - pytest.importorskip("github") - connectors = {} - - _register_builtins(connectors) - - assert connectors["github"].__name__ == "GitHubConnector" From 28d6210ca50bbef89e155fcafaadc19110258deb Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:24:43 -0500 Subject: [PATCH 039/287] docs: document strict connector registration --- docs/package-surface.md | 5 ++++- tests/connectors/test_connectors.py | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index eab3b1a..ef0c1ea 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -177,4 +177,7 @@ Optional dependency checks live in `extended_data.connectors._optional`; there are no old package compatibility shims in the public API. When a known built-in connector is requested without its optional extra installed, the registry raises an `ImportError` with the exact `extended-data[...]` install target instead of -reporting the connector as unknown. +reporting the connector as unknown. Built-in connectors must also be registered +through the `extended_data.connectors` entry point group; missing entry-point +registration is treated as a package configuration error instead of being +patched over by direct source imports. diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 90db95d..ca2f692 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -376,8 +376,8 @@ def test_get_connector_info_reports_unregistered_builtin_entry_point(self, monke assert info["extra"] == "github" assert "not registered" in info["error"] - def test_lazy_builtin_with_missing_requirements_is_unavailable(self): - """Lazy-loadable built-ins still report unavailable when extras are missing.""" + def test_builtin_with_missing_requirements_is_unavailable(self): + """Entry-point registered built-ins report unavailable when extras are missing.""" registry.clear_cache() if not _has_module("boto3"): @@ -389,8 +389,8 @@ def test_lazy_builtin_with_missing_requirements_is_unavailable(self): with pytest.raises(ImportError, match=r"extended-data\[aws\]"): registry.get_connector_class("aws") - def test_available_only_catalog_filters_missing_lazy_builtins(self): - """Available-only metadata excludes lazy built-ins with missing extras.""" + def test_available_only_catalog_filters_missing_builtins(self): + """Available-only metadata excludes built-ins with missing extras.""" registry.clear_cache() info = registry.list_connector_info(include_unavailable=False) From 1cdf20a428e3179f98186adf151b76148bd7c247 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:31:29 -0500 Subject: [PATCH 040/287] fix: export real connector root classes --- docs/package-surface.md | 6 ++ src/extended_data/connectors/__init__.py | 96 +++++++------------ src/extended_data/connectors/aws/__init__.py | 42 +------- .../connectors/aws/codedeploy.py | 23 ++++- src/extended_data/connectors/aws/s3.py | 10 +- tests/core/test_package_surface.py | 17 ++++ 6 files changed, 87 insertions(+), 107 deletions(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index ef0c1ea..1d605de 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -125,6 +125,12 @@ Both paths share the same input provider and lifecycle logger, and both cache instances by connector type and constructor inputs. Generic connector names are stripped and lowercased before lookup. +Every built-in connector class registered by name is also exported from +`extended_data.connectors`. Those exports are real classes, not `None` +sentinels. Vendor SDKs load when connector instances need them, so package +import remains lightweight while missing optional extras still fail at the +operation boundary with install guidance. + Connectors that inherit `VendorConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other verb-specific helpers to decode HTTP JSON, YAML, TOML, HCL, or text responses diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index b1f699f..cf61ea2 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -53,8 +53,16 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): from extended_data._version import __version__ -# Core imports (always available) +# Core package primitives from extended_data.connectors import meshy +from extended_data.connectors.anthropic import AnthropicConnector +from extended_data.connectors.aws import ( + AWSConnector, + AWSConnectorFull, + AWSOrganizationsMixin, + AWSS3Mixin, + AWSSSOmixin, +) from extended_data.connectors.base import VendorConnectorBase from extended_data.connectors.cloud_params import ( get_aws_call_params, @@ -63,72 +71,28 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): ) from extended_data.connectors.connectors import ConnectorFabric -# Connectors with no extra dependencies (always available) +# Built-in connector classes; vendor SDKs are loaded by connector instances. from extended_data.connectors.cursor import CursorConnector +from extended_data.connectors.github import GitHubConnector +from extended_data.connectors.google import ( + GoogleBillingConnector, + GoogleBillingMixin, + GoogleCloudConnector, + GoogleCloudMixin, + GoogleConnector, + GoogleConnectorFull, + GoogleServicesMixin, + GoogleWorkspaceConnector, + GoogleWorkspaceMixin, + JulesConnector, +) +from extended_data.connectors.meshy import MeshyConnector +from extended_data.connectors.secrets import SecretsConnector +from extended_data.connectors.slack import SlackConnector +from extended_data.connectors.vault import VaultConnector from extended_data.connectors.zoom import ZoomConnector -# Optional connectors - wrapped in try/except for graceful degradation -# These require optional dependencies: pip install extended-data[] - -# Anthropic connector (requires: pip install extended-data[anthropic]) -try: - from extended_data.connectors.anthropic import AnthropicConnector -except ImportError: - AnthropicConnector = None # type: ignore[misc, assignment] - -# AWS connector (requires: pip install extended-data[aws]) -try: - from extended_data.connectors.aws import ( - AWSConnector, - AWSConnectorFull, - AWSOrganizationsMixin, - AWSS3Mixin, - AWSSSOmixin, - ) -except ImportError: - AWSConnector = None # type: ignore[misc, assignment] - AWSConnectorFull = None # type: ignore[misc, assignment] - AWSOrganizationsMixin = None # type: ignore[misc, assignment] - AWSS3Mixin = None # type: ignore[misc, assignment] - AWSSSOmixin = None # type: ignore[misc, assignment] - -# GitHub connector (requires: pip install extended-data[github]) -try: - from extended_data.connectors.github import GitHubConnector -except ImportError: - GitHubConnector = None # type: ignore[misc, assignment] - -# Google connector (requires: pip install extended-data[google]) -try: - from extended_data.connectors.google import ( - GoogleBillingMixin, - GoogleCloudMixin, - GoogleConnector, - GoogleConnectorFull, - GoogleServicesMixin, - GoogleWorkspaceMixin, - ) -except ImportError: - GoogleConnector = None # type: ignore[misc, assignment] - GoogleConnectorFull = None # type: ignore[misc, assignment] - GoogleWorkspaceMixin = None # type: ignore[misc, assignment] - GoogleCloudMixin = None # type: ignore[misc, assignment] - GoogleBillingMixin = None # type: ignore[misc, assignment] - GoogleServicesMixin = None # type: ignore[misc, assignment] - -# Slack connector (requires: pip install extended-data[slack]) -try: - from extended_data.connectors.slack import SlackConnector -except ImportError: - SlackConnector = None # type: ignore[misc, assignment] - -# Vault connector (requires: pip install extended-data[vault]) -try: - from extended_data.connectors.vault import VaultConnector -except ImportError: - VaultConnector = None # type: ignore[misc, assignment] - __all__ = [ "AWSConnector", "AWSConnectorFull", @@ -140,12 +104,18 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): "ConnectorInfo", "CursorConnector", "GitHubConnector", + "GoogleBillingConnector", "GoogleBillingMixin", + "GoogleCloudConnector", "GoogleCloudMixin", "GoogleConnector", "GoogleConnectorFull", "GoogleServicesMixin", + "GoogleWorkspaceConnector", "GoogleWorkspaceMixin", + "JulesConnector", + "MeshyConnector", + "SecretsConnector", "SlackConnector", "VaultConnector", "VendorConnectorBase", diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index e4a0681..4e29fce 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any from extended_data import is_nothing -from extended_data.connectors._optional import is_connector_available, require_extra +from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.logging import Logging @@ -602,42 +602,10 @@ def load_vendors_from_asm(prefix: str = "/vendors/") -> dict[str, str]: return vendors -if TYPE_CHECKING: - from extended_data.connectors.aws.codedeploy import ( - create_codedeploy_deployment, - get_aws_codedeploy_deployments, - ) - from extended_data.connectors.aws.organizations import AWSOrganizationsMixin - from extended_data.connectors.aws.s3 import AWSS3Mixin - from extended_data.connectors.aws.sso import AWSSSOmixin -elif is_connector_available("aws"): - # Import submodule operations to make them available when the AWS SDK is present. - from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments - from extended_data.connectors.aws.organizations import AWSOrganizationsMixin - from extended_data.connectors.aws.s3 import AWSS3Mixin - from extended_data.connectors.aws.sso import AWSSSOmixin -else: - - class AWSOrganizationsMixin: - """Placeholder mixin used when the aws extra is not installed.""" - - - class AWSS3Mixin: - """Placeholder mixin used when the aws extra is not installed.""" - - - class AWSSSOmixin: - """Placeholder mixin used when the aws extra is not installed.""" - - - def create_codedeploy_deployment(*args: Any, **kwargs: Any) -> Any: - """Require the aws extra before creating CodeDeploy deployments.""" - _load_aws_sdk() - - - def get_aws_codedeploy_deployments(*args: Any, **kwargs: Any) -> Any: - """Require the aws extra before listing CodeDeploy deployments.""" - _load_aws_sdk() +from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments +from extended_data.connectors.aws.organizations import AWSOrganizationsMixin +from extended_data.connectors.aws.s3 import AWSS3Mixin +from extended_data.connectors.aws.sso import AWSSSOmixin class AWSConnectorFull(AWSConnector, AWSOrganizationsMixin, AWSSSOmixin, AWSS3Mixin): diff --git a/src/extended_data/connectors/aws/codedeploy.py b/src/extended_data/connectors/aws/codedeploy.py index 98bff86..301de4c 100644 --- a/src/extended_data/connectors/aws/codedeploy.py +++ b/src/extended_data/connectors/aws/codedeploy.py @@ -9,16 +9,29 @@ from collections.abc import Iterable, Sequence from datetime import datetime, timezone -from typing import Any - -from botocore.client import BaseClient -from botocore.config import Config -from botocore.exceptions import ClientError, WaiterError +from typing import TYPE_CHECKING, Any from extended_data.connectors.aws import AWSConnector from extended_data.logging import Logging +if TYPE_CHECKING: + from botocore.client import BaseClient + from botocore.config import Config + from botocore.exceptions import ClientError, WaiterError +else: + try: + from botocore.exceptions import ClientError, WaiterError + except ImportError: + + class ClientError(Exception): + """Fallback exception used until botocore is imported.""" + + + class WaiterError(Exception): + """Fallback exception used until botocore is imported.""" + + _BATCH_GET_LIMIT = 25 _VALID_FILE_BEHAVIORS = {"DISALLOW", "OVERWRITE", "RETAIN"} _DEPLOYMENT_STATUS_MAP = { diff --git a/src/extended_data/connectors/aws/s3.py b/src/extended_data/connectors/aws/s3.py index 59f7c7a..518afe1 100644 --- a/src/extended_data/connectors/aws/s3.py +++ b/src/extended_data/connectors/aws/s3.py @@ -9,13 +9,19 @@ from typing import TYPE_CHECKING, Any -from botocore.exceptions import ClientError - from extended_data import unhump_map if TYPE_CHECKING: from boto3.resources.base import ServiceResource + from botocore.exceptions import ClientError +else: + try: + from botocore.exceptions import ClientError + except ImportError: + + class ClientError(Exception): + """Fallback exception used until botocore is imported.""" class AWSS3Mixin: diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 989cf5b..6c0ddd8 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -10,6 +10,7 @@ from extended_data import connectors, inputs, secrets from extended_data.connectors.connectors import ConnectorFabric +from extended_data.connectors.registry import BUILTIN_CONNECTORS from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -53,6 +54,22 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert callable(extended_data.list_connector_info) +def test_connectors_root_exports_builtin_connector_classes() -> None: + """Every built-in registry connector class is exported from the connector package root.""" + for spec in BUILTIN_CONNECTORS.values(): + value = getattr(connectors, spec.class_name) + + assert isinstance(value, type) + assert value.__name__ == spec.class_name + + +def test_aws_full_connector_keeps_operation_mixins_without_aws_extra() -> None: + """AWSConnectorFull should expose real operation mixins even before boto3 is installed.""" + assert callable(connectors.AWSConnectorFull.list_s3_buckets) + assert callable(connectors.AWSConnectorFull.get_organization_accounts) + assert callable(connectors.AWSConnectorFull.list_sso_users) + + def test_secrets_tools_alias_preserves_public_exports() -> None: """The shorter secrets tool path mirrors the canonical connector module.""" assert "run_pipeline" in secrets_tools.__all__ From b5afd5252493f713d16e253ef4ee83c7c70a275c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:34:25 -0500 Subject: [PATCH 041/287] fix: fail visibly for missing langchain tools --- src/extended_data/connectors/base.py | 6 +++--- tests/connectors/test_base.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 6cecb08..b8ead14 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -448,9 +448,9 @@ def get_tools(self) -> list[StructuredTool]: """ try: from langchain_core.tools import StructuredTool - except ImportError: - self.logger.warning("langchain-core not installed, returning empty tools list") - return [] + except ImportError as e: + msg = "langchain-core is required for LangChain tools. Install with: pip install extended-data[langchain]" + raise ImportError(msg) from e tools = [] for name, func in self._tool_functions.items(): diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index d9c076c..aa4ff01 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -2,9 +2,12 @@ from __future__ import annotations +import builtins + from unittest.mock import MagicMock import httpx +import pytest from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedString @@ -100,3 +103,19 @@ def test_request_data_decodes_response_body() -> None: mock_client.request.assert_called_once() assert mock_client.request.call_args.args[0] == "GET" assert mock_client.request.call_args.args[1] == "https://api.example.com/status" + + +def test_get_tools_requires_langchain_extra(monkeypatch) -> None: + """Base LangChain tool export should fail visibly when langchain-core is missing.""" + connector = _connector() + original_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name == "langchain_core.tools": + raise ImportError("blocked langchain-core") + return original_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", fake_import) + + with pytest.raises(ImportError, match=r"extended-data\[langchain\]"): + connector.get_tools() From f7ce49b1f409575d4d30ac017c2af8c977ae7818 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:40:52 -0500 Subject: [PATCH 042/287] test: lock public package exports --- README.md | 3 +- docs/package-surface.md | 4 +++ src/extended_data/__init__.py | 14 +++++++++ tests/core/test_package_surface.py | 46 +++++++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 77d65de..085a732 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ CrewAI releases pull vulnerable `chromadb` versions transitively. ## Usage ```python -from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file, decode_json, encode_yaml +from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file, decode_json, encode_yaml, number_to_words logger = Logging(logger_name="example") inputs = InputProvider(inputs={"GITHUB_OWNER": "jbcom"}, from_environment=False) @@ -42,6 +42,7 @@ workflow = DataWorkflow.from_value(payload).then(("normalize", lambda data: data print(encode_yaml(payload.data)) print(decoded_file["service"]["name"].upper_first()) +print(number_to_words(42)) print(workflow.as_builtin()) ``` diff --git a/docs/package-surface.md b/docs/package-surface.md index 1d605de..aaa06eb 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -20,6 +20,8 @@ from extended_data import ( extend_data, encode_yaml, flatten_map, + normalize_data_encoding, + number_to_words, to_builtin, ) ``` @@ -44,6 +46,8 @@ name = ExtendedString("API Response Value").to_snake_case() payload = ExtendedDict({"outer": {"inner": 1}}).flatten() items = ExtendedList([1, [2, [3]]]).flatten() tags = ExtendedSet({"prod", "prod", ""}).compact() +words = number_to_words(42) +encoding = normalize_data_encoding("YML") ``` `ExtendedDict`, `ExtendedList`, and `ExtendedSet` recursively promote nested diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index e25bdef..0734eba 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -62,7 +62,15 @@ zipmap, ) from extended_data.primitives.matching import is_non_empty_match, is_partial_match +from extended_data.primitives.numbers import ( + from_roman, + number_to_currency, + number_to_ordinal, + number_to_words, + to_roman, +) from extended_data.primitives.sequences import filter_list, flatten_list +from extended_data.primitives.serialization import normalize_data_encoding from extended_data.primitives.splitting import split_dict_by_type, split_list_by_type from extended_data.primitives.state import ( all_non_empty, @@ -225,6 +233,7 @@ def __getattr__(name: str) -> Any: "first_non_empty_value_from_map", "flatten_list", "flatten_map", + "from_roman", "get_available_methods", "get_caller", "get_connector", @@ -252,6 +261,10 @@ def __getattr__(name: str) -> Any: "make_hashable", "make_raw_data_export_safe", "match_file_extensions", + "normalize_data_encoding", + "number_to_currency", + "number_to_ordinal", + "number_to_words", "ordinalize", "pluralize", "read_file", @@ -277,6 +290,7 @@ def __getattr__(name: str) -> Any: "to_camel_case", "to_kebab_case", "to_pascal_case", + "to_roman", "to_snake_case", "truncate", "typeof", diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 6c0ddd8..89343de 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -3,18 +3,43 @@ from __future__ import annotations from importlib.metadata import version +from types import ModuleType import extended_data import extended_data.logging as lifecycle_logging import extended_data.secrets.tools as secrets_tools -from extended_data import connectors, inputs, secrets +from extended_data import connectors, containers, inputs, io, primitives, secrets, workflows from extended_data.connectors.connectors import ConnectorFabric from extended_data.connectors.registry import BUILTIN_CONNECTORS from extended_data.inputs import InputProvider from extended_data.logging import Logging +PUBLIC_MODULES = ( + extended_data, + primitives, + containers, + io, + inputs, + lifecycle_logging, + connectors, + secrets, + workflows, +) + + +def _assert_public_exports_resolve(module: ModuleType) -> None: + exports = module.__all__ + + assert len(exports) == len(set(exports)), f"{module.__name__}.__all__ contains duplicates" + + for name in exports: + value = getattr(module, name) + + assert value is not None, f"{module.__name__}.{name} exported None" + + def test_package_version_is_distribution_version() -> None: """All integrated package namespaces expose the distribution version.""" expected = version("extended-data") @@ -26,6 +51,22 @@ def test_package_version_is_distribution_version() -> None: assert secrets.__version__ == expected +def test_public_all_exports_resolve_to_real_values() -> None: + """Public package modules should not advertise missing or sentinel exports.""" + for module in PUBLIC_MODULES: + _assert_public_exports_resolve(module) + + +def test_public_all_exports_are_import_star_visible() -> None: + """Star imports should expose exactly the advertised public names.""" + for module in PUBLIC_MODULES: + namespace: dict[str, object] = {} + exec(f"from {module.__name__} import *", {}, namespace) + namespace.pop("__builtins__", None) + + assert set(namespace) == set(module.__all__) + + def test_clean_major_version_public_names() -> None: """The public surface uses integrated extended-data names.""" assert inputs.InputProvider.__name__ == "InputProvider" @@ -50,6 +91,9 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert extended_data.SyncOperation is secrets.SyncOperation assert extended_data.OutputFormat is secrets.OutputFormat assert callable(extended_data.directed_inputs) + assert extended_data.number_to_words(42) == "forty-two" + assert extended_data.to_roman(42) == "XLII" + assert extended_data.normalize_data_encoding("YML") == "yaml" assert callable(extended_data.get_connector) assert callable(extended_data.list_connector_info) From ec92efa1a0b2bef3ef63d5e330534d8a8218b6da Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:43:30 -0500 Subject: [PATCH 043/287] fix: expose workflow aliases at package root --- src/extended_data/__init__.py | 4 +++- tests/core/test_package_surface.py | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 0734eba..fe136aa 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -120,7 +120,7 @@ strtotime, typeof, ) -from extended_data.workflows import DataWorkflow, WorkflowResult, WorkflowStep +from extended_data.workflows import DataWorkflow, StepLike, WorkflowAction, WorkflowResult, WorkflowStep if TYPE_CHECKING: @@ -190,10 +190,12 @@ def __getattr__(name: str) -> Any: "OutputFormat", "SecretsConnector", "SortedDefaultDict", + "StepLike", "SyncOperation", "SyncOptions", "SyncResult", "VendorConnectorBase", + "WorkflowAction", "WorkflowResult", "WorkflowStep", "__version__", diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 89343de..fd4f17a 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -67,6 +67,12 @@ def test_public_all_exports_are_import_star_visible() -> None: assert set(namespace) == set(module.__all__) +def test_root_exports_tiered_data_surfaces() -> None: + """The root package should expose the integrated primitive, container, IO, and workflow surfaces.""" + for module in (primitives, containers, io, workflows): + assert set(module.__all__) <= set(extended_data.__all__), module.__name__ + + def test_clean_major_version_public_names() -> None: """The public surface uses integrated extended-data names.""" assert inputs.InputProvider.__name__ == "InputProvider" From 0453423cbd4f13eb8d40ed838394e69ab54ecfe5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:51:39 -0500 Subject: [PATCH 044/287] fix: lower extended containers in encoders --- README.md | 2 +- docs/package-surface.md | 2 ++ .../primitives/formats/_normalization.py | 15 +++++++++++ src/extended_data/primitives/formats/hcl.py | 6 +++-- src/extended_data/primitives/formats/json.py | 3 ++- src/extended_data/primitives/formats/toml.py | 3 ++- .../primitives/formats/yaml/utils.py | 3 ++- tests/core/test_hcl2_utils.py | 13 +++++++++ tests/core/test_json_utils.py | 12 +++++++++ tests/core/test_toml_utils.py | 14 ++++++++++ tests/core/test_yaml_utils.py | 12 +++++++++ tests/examples/test_safe_examples.py | 27 +++++++++++++++++++ 12 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 src/extended_data/primitives/formats/_normalization.py diff --git a/README.md b/README.md index 085a732..8054e59 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ payload = ExtendedDict(data).deep_merge({"source": "example"}) decoded_file = decode_file('{"service": {"name": "api"}}', suffix="json", as_extended=True) workflow = DataWorkflow.from_value(payload).then(("normalize", lambda data: data.unhump())).result() -print(encode_yaml(payload.data)) +print(encode_yaml(payload)) print(decoded_file["service"]["name"].upper_first()) print(number_to_words(42)) print(workflow.as_builtin()) diff --git a/docs/package-surface.md b/docs/package-surface.md index aaa06eb..0b99edd 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -70,6 +70,8 @@ assert payload["service"]["name"].upper_first() == "Api" Use `extend_data(value)` to promote existing plain data and `to_builtin(value)` to lower extended containers back to standard Python data. +Format encoders lower Tier 2 containers the same way before serializing JSON, +YAML, TOML, and HCL output. `DataWorkflow` is the Tier 3 composition surface for higher-order data processing. It reads or decodes structured data through the file and format diff --git a/src/extended_data/primitives/formats/_normalization.py b/src/extended_data/primitives/formats/_normalization.py new file mode 100644 index 0000000..b9b19e7 --- /dev/null +++ b/src/extended_data/primitives/formats/_normalization.py @@ -0,0 +1,15 @@ +"""Internal helpers for normalizing data before format encoding.""" + +from __future__ import annotations + +from typing import Any + + +def lower_extended_data(value: Any) -> Any: + """Lower Tier 2 containers to plain values before handing data to codecs.""" + from extended_data.containers.factory import to_builtin + + return to_builtin(value) + + +__all__ = ["lower_extended_data"] diff --git a/src/extended_data/primitives/formats/hcl.py b/src/extended_data/primitives/formats/hcl.py index 89b64c8..68b9252 100644 --- a/src/extended_data/primitives/formats/hcl.py +++ b/src/extended_data/primitives/formats/hcl.py @@ -13,6 +13,7 @@ from lark.exceptions import LarkError +from extended_data.primitives.formats._normalization import lower_extended_data from extended_data.primitives.formats.errors import DataDecodeError, invalid_utf8_error from extended_data.primitives.strings import bytestostr from extended_data.primitives.types import convert_special_types @@ -254,9 +255,10 @@ def encode_hcl2(data: Any) -> str: Returns: str: The encoded HCL2 string. """ - if not isinstance(data, Mapping): + normalized_data = lower_extended_data(data) + if not isinstance(normalized_data, Mapping): message = "HCL encoding requires a mapping at the document root." raise TypeError(message) - serialized = _serialize_hcl_body(convert_special_types(data), indent_level=0) + serialized = _serialize_hcl_body(convert_special_types(normalized_data), indent_level=0) return serialized.rstrip() diff --git a/src/extended_data/primitives/formats/json.py b/src/extended_data/primitives/formats/json.py index fd0fc03..b1f6d7f 100644 --- a/src/extended_data/primitives/formats/json.py +++ b/src/extended_data/primitives/formats/json.py @@ -10,6 +10,7 @@ import orjson +from extended_data.primitives.formats._normalization import lower_extended_data from extended_data.primitives.formats.errors import DataDecodeError @@ -96,4 +97,4 @@ def encode_json( option |= orjson.OPT_APPEND_NEWLINE # Use orjson.dumps to encode the object with the calculated options - return orjson.dumps(raw_data, default=default, option=option).decode("utf-8") + return orjson.dumps(lower_extended_data(raw_data), default=default, option=option).decode("utf-8") diff --git a/src/extended_data/primitives/formats/toml.py b/src/extended_data/primitives/formats/toml.py index 5980add..e46ba5b 100644 --- a/src/extended_data/primitives/formats/toml.py +++ b/src/extended_data/primitives/formats/toml.py @@ -9,6 +9,7 @@ import tomlkit +from extended_data.primitives.formats._normalization import lower_extended_data from extended_data.primitives.formats.errors import DataDecodeError, invalid_utf8_error from extended_data.primitives.strings import bytestostr from extended_data.primitives.types import convert_special_types @@ -43,5 +44,5 @@ def encode_toml(raw_data: Any) -> str: str: The encoded TOML string. """ # Convert unsupported types to simpler forms before encoding - converted_data = convert_special_types(raw_data) + converted_data = convert_special_types(lower_extended_data(raw_data)) return tomlkit.dumps(converted_data) diff --git a/src/extended_data/primitives/formats/yaml/utils.py b/src/extended_data/primitives/formats/yaml/utils.py index 2214d60..09e9846 100644 --- a/src/extended_data/primitives/formats/yaml/utils.py +++ b/src/extended_data/primitives/formats/yaml/utils.py @@ -10,6 +10,7 @@ import yaml +from extended_data.primitives.formats._normalization import lower_extended_data from extended_data.primitives.formats.errors import DataDecodeError, invalid_utf8_error from extended_data.primitives.formats.yaml.dumpers import PureDumper from extended_data.primitives.formats.yaml.loaders import PureLoader @@ -45,7 +46,7 @@ def encode_yaml(raw_data: Any) -> str: Returns: str: The encoded YAML string. """ - return yaml.dump(raw_data, Dumper=PureDumper, allow_unicode=True, sort_keys=False) + return yaml.dump(lower_extended_data(raw_data), Dumper=PureDumper, allow_unicode=True, sort_keys=False) def is_yaml_data(data: Any) -> bool: diff --git a/tests/core/test_hcl2_utils.py b/tests/core/test_hcl2_utils.py index 96a2b70..853d049 100644 --- a/tests/core/test_hcl2_utils.py +++ b/tests/core/test_hcl2_utils.py @@ -4,6 +4,7 @@ import pytest +from extended_data.containers import ExtendedDict from extended_data.primitives.formats import hcl as hcl2_utils from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.hcl import decode_hcl2, encode_hcl2 @@ -295,3 +296,15 @@ def test_encode_hcl2_rejects_non_mapping_root() -> None: """Reject document roots that are not HCL bodies.""" with pytest.raises(TypeError, match="mapping at the document root"): encode_hcl2(["not", "a", "mapping"]) + + +@pytest.mark.parametrize("use_data_attribute", [False, True]) +def test_encode_hcl2_lowers_extended_containers(use_data_attribute: bool) -> None: + """Encode Tier 2 containers before validating and rendering HCL.""" + payload = ExtendedDict({"locals": [{"service_name": "api", "ports": [80, 443]}]}) + raw_data = payload.data if use_data_attribute else payload + + encoded = encode_hcl2(raw_data) + + assert 'service_name = "api"' in encoded + assert decode_hcl2(encoded) == {"locals": [{"service_name": "api", "ports": [80, 443]}]} diff --git a/tests/core/test_json_utils.py b/tests/core/test_json_utils.py index fd12f36..f1be24b 100644 --- a/tests/core/test_json_utils.py +++ b/tests/core/test_json_utils.py @@ -15,6 +15,7 @@ import pytest +from extended_data.containers import ExtendedDict from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.json import decode_json, encode_json @@ -111,3 +112,14 @@ def test_encode_json_bytes_output(simple_dict: dict) -> None: """ result = encode_json(simple_dict) assert isinstance(result, str) + + +@pytest.mark.parametrize("use_data_attribute", [False, True]) +def test_encode_json_lowers_extended_containers(use_data_attribute: bool) -> None: + """Encode Tier 2 containers as their plain JSON-compatible contents.""" + payload = ExtendedDict({"status": "ok", "items": ["one"]}) + raw_data = payload.data if use_data_attribute else payload + + result = encode_json(raw_data, sort_keys=True) + + assert decode_json(result) == {"items": ["one"], "status": "ok"} diff --git a/tests/core/test_toml_utils.py b/tests/core/test_toml_utils.py index cb71185..4163aaa 100644 --- a/tests/core/test_toml_utils.py +++ b/tests/core/test_toml_utils.py @@ -17,6 +17,7 @@ import pytest import tomlkit +from extended_data.containers import ExtendedDict from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.toml import decode_toml, encode_toml @@ -70,3 +71,16 @@ def test_encode_toml_converts_tuple_like_composites() -> None: parsed = tomlkit.parse(result) assert parsed["items"] == ["alpha", "beta"] assert sorted(parsed["values"]) == [1, 2] + + +@pytest.mark.parametrize("use_data_attribute", [False, True]) +def test_encode_toml_lowers_extended_containers(use_data_attribute: bool) -> None: + """Encode Tier 2 containers through TOML's existing primitive normalization.""" + payload = ExtendedDict({"service": {"name": "api"}, "ports": [80, 443]}) + raw_data = payload.data if use_data_attribute else payload + + result = encode_toml(raw_data) + + parsed = tomlkit.parse(result) + assert parsed["service"]["name"] == "api" + assert parsed["ports"] == [80, 443] diff --git a/tests/core/test_yaml_utils.py b/tests/core/test_yaml_utils.py index 6e2bffc..66b2e56 100644 --- a/tests/core/test_yaml_utils.py +++ b/tests/core/test_yaml_utils.py @@ -22,6 +22,7 @@ from yaml import MappingNode, ScalarNode, SequenceNode +from extended_data.containers import ExtendedDict from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.yaml import ( LiteralScalarString, @@ -92,6 +93,17 @@ def test_encode_yaml(simple_yaml_fixture: str) -> None: assert result_data == expected_data +@pytest.mark.parametrize("use_data_attribute", [False, True]) +def test_encode_yaml_lowers_extended_containers(use_data_attribute: bool) -> None: + """Encode Tier 2 containers while preserving YAML-compatible built-ins.""" + payload = ExtendedDict({"status": "ok", "items": ["one"]}) + raw_data = payload.data if use_data_attribute else payload + + result = encode_yaml(raw_data) + + assert decode_yaml(result) == {"status": "ok", "items": ["one"]} + + def test_yaml_construct_undefined() -> None: """Tests decoding of YAML data with a custom tag. diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index c6bb899..3a5e648 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -5,6 +5,7 @@ import importlib.util import os import py_compile +import re import subprocess import sys @@ -38,6 +39,14 @@ ALL_EXAMPLES = SAFE_EXAMPLES + CONNECTOR_EXAMPLES +def _readme_usage_snippet() -> str: + readme = (REPO_ROOT / "README.md").read_text(encoding="utf-8") + usage_section = readme.split("## Usage", 1)[1].split("## Package Shape", 1)[0] + match = re.search(r"```python\n(?P.*?)\n```", usage_section, re.DOTALL) + assert match is not None + return match.group("code") + + @pytest.mark.parametrize("example_path", SAFE_EXAMPLES) def test_safe_example_runs(example_path: str, tmp_path: Path) -> None: """Keep runnable examples aligned with the installed package surface.""" @@ -57,6 +66,24 @@ def test_safe_example_runs(example_path: str, tmp_path: Path) -> None: assert result.returncode == 0, f"{example_path} failed\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" +def test_readme_usage_snippet_runs(tmp_path: Path) -> None: + """Keep the primary README example executable as a public contract.""" + env = os.environ.copy() + env.pop("OVERRIDE_STDIN", None) + + result = subprocess.run( + [sys.executable, "-c", _readme_usage_snippet()], + cwd=tmp_path, + env=env, + capture_output=True, + text=True, + timeout=15, + check=False, + ) + + assert result.returncode == 0, f"README usage snippet failed\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + + @pytest.mark.parametrize("example_path", ALL_EXAMPLES) def test_example_compiles(example_path: str, tmp_path: Path) -> None: """Every example should at least remain syntactically valid.""" From 4fc3b90c63b2cd3ab2647fa9bb887eff2d907edd Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:55:18 -0500 Subject: [PATCH 045/287] fix: honor connector retry configuration --- src/extended_data/connectors/base.py | 70 +++++++++++++++++++--------- tests/connectors/test_base.py | 37 +++++++++++++++ 2 files changed, 85 insertions(+), 22 deletions(-) diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index b8ead14..38b7a4f 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -38,7 +38,7 @@ def my_operation(self) -> dict: import httpx from tenacity import ( - retry, + Retrying, retry_if_exception_type, stop_after_attempt, wait_exponential, @@ -229,12 +229,14 @@ def _build_url(self, endpoint: str) -> str: endpoint = endpoint.lstrip("/") return f"{base}/{endpoint}" - @retry( - retry=retry_if_exception_type((RateLimitError, httpx.TimeoutException)), - stop=stop_after_attempt(5), - wait=wait_exponential(multiplier=1, min=2, max=30), - ) - def request( + def _max_retry_attempts(self) -> int: + """Return the validated retry attempt count for this connector.""" + if self.MAX_RETRIES < 1: + msg = f"{type(self).__name__}.MAX_RETRIES must be at least 1" + raise ValueError(msg) + return self.MAX_RETRIES + + def _request_once( self, method: str, endpoint: str, @@ -242,21 +244,7 @@ def request( headers: dict[str, str] | None = None, **kwargs: Any, ) -> httpx.Response: - """Make HTTP request with retries and rate limiting. - - Args: - method: HTTP method (GET, POST, PUT, DELETE, etc.) - endpoint: API endpoint (relative to BASE_URL) - headers: Additional headers (merged with defaults) - **kwargs: Passed to httpx.request (json, params, data, etc.) - - Returns: - httpx.Response - - Raises: - RateLimitError: On 429 (will retry automatically) - ConnectorAPIError: On other API errors - """ + """Make one HTTP request attempt with rate limiting and response handling.""" self._rate_limit() url = self._build_url(endpoint) @@ -288,6 +276,44 @@ def request( return response + def request( + self, + method: str, + endpoint: str, + *, + headers: dict[str, str] | None = None, + **kwargs: Any, + ) -> httpx.Response: + """Make HTTP request with retries and rate limiting. + + Args: + method: HTTP method (GET, POST, PUT, DELETE, etc.) + endpoint: API endpoint (relative to BASE_URL) + headers: Additional headers (merged with defaults) + **kwargs: Passed to httpx.request (json, params, data, etc.) + + Returns: + httpx.Response + + Raises: + RateLimitError: On 429 or 5xx responses after retries are exhausted. + ConnectorAPIError: On other API errors. + """ + retryer = Retrying( + retry=retry_if_exception_type((RateLimitError, httpx.TimeoutException)), + stop=stop_after_attempt(self._max_retry_attempts()), + wait=wait_exponential(multiplier=1, min=2, max=30), + sleep=time.sleep, + reraise=True, + ) + + for attempt in retryer: + with attempt: + return self._request_once(method, endpoint, headers=headers, **kwargs) + + message = "Retry loop exited without returning or raising." + raise RuntimeError(message) + @staticmethod def _suffix_from_content_type(content_type: str | None) -> str | None: """Infer a data suffix from an HTTP Content-Type header.""" diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index aa4ff01..e5b36e8 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -105,6 +105,43 @@ def test_request_data_decodes_response_body() -> None: assert mock_client.request.call_args.args[1] == "https://api.example.com/status" +def test_request_uses_connector_max_retries(mocker) -> None: + """Connector subclasses control the retry attempt count.""" + + class TwoAttemptConnector(ExampleConnector): + MAX_RETRIES = 2 + + connector = TwoAttemptConnector(from_environment=False) + mocker.patch("extended_data.connectors.base.time.sleep") + mock_client = MagicMock() + mock_client.request.side_effect = [ + httpx.Response(500, content=b"temporary failure"), + httpx.Response(200, content=b"ok"), + ] + connector._client = mock_client + + response = connector.request("GET", "/status") + + assert response.status_code == 200 + assert mock_client.request.call_count == 2 + + +def test_request_rejects_invalid_max_retries() -> None: + """Invalid retry configuration fails before issuing a request.""" + + class InvalidRetryConnector(ExampleConnector): + MAX_RETRIES = 0 + + connector = InvalidRetryConnector(from_environment=False) + mock_client = MagicMock() + connector._client = mock_client + + with pytest.raises(ValueError, match="MAX_RETRIES must be at least 1"): + connector.request("GET", "/status") + + mock_client.request.assert_not_called() + + def test_get_tools_requires_langchain_extra(monkeypatch) -> None: """Base LangChain tool export should fail visibly when langchain-core is missing.""" connector = _connector() From 15a480ebcf34f42811d0200c46acbccbda26ce51 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 03:58:55 -0500 Subject: [PATCH 046/287] fix: serialize extended containers in cli --- src/extended_data/connectors/cli.py | 14 ++++++-------- tests/connectors/test_cli.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index 68f681c..fe8407c 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -10,15 +10,8 @@ # Call any connector method extended-data call [--arg value ...] - # Interactive mode - extended-data shell - # Start MCP server extended-data mcp - - # Specific connector shortcuts (if implemented) - extended-data jules sources - extended-data cursor agents """ from __future__ import annotations @@ -27,6 +20,7 @@ import json import sys +from collections.abc import Mapping from typing import Any from extended_data.connectors.registry import ( @@ -35,13 +29,17 @@ get_connector_info, list_connector_info, ) +from extended_data.containers.factory import to_builtin def _json_output(data: Any) -> str: """Format data as JSON for output.""" + data = to_builtin(data) if hasattr(data, "model_dump"): data = data.model_dump() - elif hasattr(data, "__iter__") and not isinstance(data, (str, dict)): + elif isinstance(data, Mapping): + data = dict(data) + elif hasattr(data, "__iter__") and not isinstance(data, (str, bytes, bytearray)): data = [d.model_dump() if hasattr(d, "model_dump") else d for d in data] return json.dumps(data, indent=2, default=str) diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 40f80c8..6ea90b8 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -3,12 +3,14 @@ from __future__ import annotations import argparse +import json from unittest.mock import MagicMock, patch import pytest from extended_data.connectors.cli import cmd_call, cmd_info, cmd_list, cmd_methods, main +from extended_data.containers import ExtendedDict def test_cli_list(): @@ -96,6 +98,22 @@ def test_cli_call_accepts_json_flag_after_method() -> None: assert '"ok": true' in mock_write.call_args.args[0] +def test_cli_call_serializes_extended_containers_as_data() -> None: + """Call command renders Tier 2 containers as JSON data, not iterable keys.""" + connector = MagicMock() + connector.fetch.return_value = ExtendedDict({"service": {"name": "api"}}) + args = argparse.Namespace(connector="example", method="fetch", extra=[], json=True) + + with ( + patch("extended_data.connectors.cli.get_connector", return_value=connector), + patch("sys.stdout.write") as mock_write, + ): + exit_code = cmd_call(args) + + assert exit_code == 0 + assert json.loads(mock_write.call_args.args[0]) == {"service": {"name": "api"}} + + def test_cli_call_reports_missing_method() -> None: """Call command reports missing methods instead of failing silently.""" args = argparse.Namespace(connector="example", method="missing", extra=[], json=False) From 3ef69613d1d089102b14711cdaec7d161ac82f0c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:02:00 -0500 Subject: [PATCH 047/287] fix: disable file logging by default --- src/extended_data/logging/logging.py | 5 +++-- tests/connectors/test_base.py | 10 ++++++++++ tests/logging/test_logging.py | 10 ++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/extended_data/logging/logging.py b/src/extended_data/logging/logging.py index 9ad47dd..6ea8966 100644 --- a/src/extended_data/logging/logging.py +++ b/src/extended_data/logging/logging.py @@ -73,7 +73,7 @@ class Logging: def __init__( self, enable_console: bool = False, - enable_file: bool = True, + enable_file: bool = False, logger: logging.Logger | None = None, logger_name: str | None = None, log_file_name: str | None = None, @@ -91,7 +91,8 @@ def __init__( Args: enable_console: Whether to enable console output. - enable_file: Whether to enable file output. + enable_file: Whether to enable file output. Defaults to False so library + consumers do not get log files unless they opt in. logger: An existing logger instance to use. logger_name: The name for a new logger instance. log_file_name: The name of the log file if file logging enabled. diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index e5b36e8..245f6e0 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -26,6 +26,16 @@ def _connector() -> ExampleConnector: return ExampleConnector(from_environment=False, logger=logger) +def test_connector_default_logging_does_not_create_cwd_log_file(tmp_path, monkeypatch) -> None: + """Default connector construction should not write log files as a side effect.""" + monkeypatch.chdir(tmp_path) + + connector = ExampleConnector(from_environment=False) + + assert connector.logging.enable_file is False + assert not (tmp_path / "ExampleConnector.log").exists() + + def test_decode_response_promotes_json_to_extended_containers() -> None: """JSON responses flow through the Tier 2 container bridge.""" connector = _connector() diff --git a/tests/logging/test_logging.py b/tests/logging/test_logging.py index 3be7a4c..608daf2 100644 --- a/tests/logging/test_logging.py +++ b/tests/logging/test_logging.py @@ -22,6 +22,16 @@ def test_logger_initialization() -> None: assert logger.logger is not None +def test_logger_does_not_write_files_by_default(tmp_path, monkeypatch) -> None: + """Default logging should not create files in the caller's working directory.""" + monkeypatch.chdir(tmp_path) + + logger = Logging(logger_name="default_logger") + + assert logger.enable_file is False + assert not (tmp_path / "default_logger.log").exists() + + def test_basic_logging(logger: Logging) -> None: """Test basic message logging without any markers or verbosity. From fb499ccb28c23a4c57a0149a87c8c910d7d308da Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:04:14 -0500 Subject: [PATCH 048/287] docs: document opt-in file logging --- docs/package-surface.md | 5 +++-- examples/logging/README.md | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index 0b99edd..b2c848b 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -102,8 +102,9 @@ container bridge as file and Base64 decoding. Requested input coercions are strict, and diagnostics identify the input key and failed operation without echoing raw values from environment variables, stdin, JSON, YAML, or Base64 payloads. `Logging` provides structured lifecycle logging for applications and -connector workflows. `ConnectorFabric` caches and coordinates vendor connectors -while sharing input loading, logging, data normalization, retry behavior, and +connector workflows without creating log files unless file output is explicitly +enabled. `ConnectorFabric` caches and coordinates vendor connectors while +sharing input loading, logging, data normalization, retry behavior, and serialization. ## Connector Fabric diff --git a/examples/logging/README.md b/examples/logging/README.md index 5421a15..e00823c 100644 --- a/examples/logging/README.md +++ b/examples/logging/README.md @@ -3,6 +3,10 @@ This directory contains working examples for structured lifecycle logging in `extended_data.logging`. +`Logging` does not write log files by default. Pass `enable_file=True` with an +optional `log_file_name`, or set `OVERRIDE_TO_FILE=True`, when a workflow should +create file output. + ## Examples ### basic_logging.py From 0a68097f5154f6a4102812bfd5d2bf133694ff7c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:24:15 -0500 Subject: [PATCH 049/287] feat: preserve tuples in extended containers --- README.md | 3 +- docs/package-surface.md | 16 ++++-- src/extended_data/__init__.py | 11 +++- src/extended_data/containers/__init__.py | 3 +- src/extended_data/containers/factory.py | 10 ++-- src/extended_data/containers/sequences.py | 62 +++++++++++++++++++++++ tests/core/test_containers.py | 38 +++++++++++++- 7 files changed, 131 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 8054e59..88c7e20 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,8 @@ The package is intentionally tiered: Tier 3 decoders can opt into Tier 2 containers with `as_extended=True`, so decoded files, Base64 payloads, and directed inputs can immediately use -`ExtendedDict`, `ExtendedList`, `ExtendedSet`, and `ExtendedString` methods. +`ExtendedDict`, `ExtendedList`, `ExtendedTuple`, `ExtendedSet`, and +`ExtendedString` methods. `DataWorkflow` makes those compositions first-class: read or decode data, apply named transformations, write an output artifact, and keep the step trail in a `WorkflowResult`. Missing workflow inputs and empty writes fail loudly. diff --git a/docs/package-surface.md b/docs/package-surface.md index b2c848b..930155d 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -12,6 +12,7 @@ from extended_data import ( ExtendedList, ExtendedSet, ExtendedString, + ExtendedTuple, InputProvider, Logging, SecretsConnector, @@ -31,8 +32,9 @@ from extended_data import ( - Tier 1 `extended_data.primitives` modules are pure functions and codecs for strings, numbers, maps, lists, matching, state, type coercion, and structured formats. -- Tier 2 `extended_data.containers` classes wrap Python user containers as - `ExtendedString`, `ExtendedDict`, `ExtendedList`, and `ExtendedSet` with +- Tier 2 `extended_data.containers` classes wrap Python container primitives as + `ExtendedString`, `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and + `ExtendedSet` with ergonomic methods over Tier 1 primitives. - Tier 3 processors use the first two tiers to handle files, imports, exports, inputs, API data, vendor integrations, and workflows. @@ -45,14 +47,15 @@ the public error message does not echo the raw payload. name = ExtendedString("API Response Value").to_snake_case() payload = ExtendedDict({"outer": {"inner": 1}}).flatten() items = ExtendedList([1, [2, [3]]]).flatten() +aliases = ExtendedTuple(("api", ("gateway",))).flatten() tags = ExtendedSet({"prod", "prod", ""}).compact() words = number_to_words(42) encoding = normalize_data_encoding("YML") ``` -`ExtendedDict`, `ExtendedList`, and `ExtendedSet` recursively promote nested -plain values on construction and mutation, so method chains can continue through -data loaded from normal Python literals: +`ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and `ExtendedSet` recursively +promote nested plain values on construction and mutation, so method chains can +continue through data loaded from normal Python literals: ```python payload = ExtendedDict({"service": {"name": "api"}}) @@ -70,6 +73,9 @@ assert payload["service"]["name"].upper_first() == "Api" Use `extend_data(value)` to promote existing plain data and `to_builtin(value)` to lower extended containers back to standard Python data. +Tuple values are promoted to `ExtendedTuple` and lowered back to Python tuples, +so the Tier 2 surface does not silently turn immutable input data into mutable +lists. Format encoders lower Tier 2 containers the same way before serializing JSON, YAML, TOML, and HCL output. diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index fe136aa..7e495d9 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -11,7 +11,15 @@ from typing import TYPE_CHECKING, Any from extended_data._version import __version__ -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, extend_data, to_builtin +from extended_data.containers import ( + ExtendedDict, + ExtendedList, + ExtendedSet, + ExtendedString, + ExtendedTuple, + extend_data, + to_builtin, +) from extended_data.io.base64 import base64_decode, base64_encode from extended_data.io.exporters import ( make_raw_data_export_safe, @@ -183,6 +191,7 @@ def __getattr__(name: str) -> Any: "ExtendedList", "ExtendedSet", "ExtendedString", + "ExtendedTuple", "FilePath", "InputProvider", "KeyTransform", diff --git a/src/extended_data/containers/__init__.py b/src/extended_data/containers/__init__.py index 0f830c2..0b59f26 100644 --- a/src/extended_data/containers/__init__.py +++ b/src/extended_data/containers/__init__.py @@ -2,7 +2,7 @@ from extended_data.containers.factory import extend_data, to_builtin from extended_data.containers.mappings import ExtendedDict -from extended_data.containers.sequences import ExtendedList, ExtendedSet +from extended_data.containers.sequences import ExtendedList, ExtendedSet, ExtendedTuple from extended_data.containers.strings import ExtendedString @@ -11,6 +11,7 @@ "ExtendedList", "ExtendedSet", "ExtendedString", + "ExtendedTuple", "extend_data", "to_builtin", ] diff --git a/src/extended_data/containers/factory.py b/src/extended_data/containers/factory.py index 897bb93..ec13ea6 100644 --- a/src/extended_data/containers/factory.py +++ b/src/extended_data/containers/factory.py @@ -6,7 +6,7 @@ from typing import Any from extended_data.containers.mappings import ExtendedDict -from extended_data.containers.sequences import ExtendedList, ExtendedSet +from extended_data.containers.sequences import ExtendedList, ExtendedSet, ExtendedTuple from extended_data.containers.strings import ExtendedString from extended_data.primitives.formats.yaml import LiteralScalarString, YamlPairs, YamlTagged @@ -15,14 +15,16 @@ def extend_data(value: Any) -> Any: """Recursively wrap built-in containers in Extended Data containers.""" if isinstance(value, YamlTagged | YamlPairs | LiteralScalarString): return value - if isinstance(value, ExtendedString | ExtendedDict | ExtendedList | ExtendedSet): + if isinstance(value, ExtendedString | ExtendedDict | ExtendedList | ExtendedSet | ExtendedTuple): return value if isinstance(value, str): return ExtendedString(value) if isinstance(value, Mapping): return ExtendedDict({key: extend_data(item) for key, item in value.items()}) - if isinstance(value, list | tuple): + if isinstance(value, list): return ExtendedList(extend_data(item) for item in value) + if isinstance(value, tuple): + return ExtendedTuple(extend_data(item) for item in value) if isinstance(value, set | frozenset): return ExtendedSet(extend_data(item) for item in value) return value @@ -38,6 +40,8 @@ def to_builtin(value: Any) -> Any: return {key: to_builtin(item) for key, item in value.items()} if isinstance(value, ExtendedList): return [to_builtin(item) for item in value] + if isinstance(value, ExtendedTuple): + return tuple(to_builtin(item) for item in value) if isinstance(value, ExtendedSet): return {to_builtin(item) for item in value} if isinstance(value, Mapping): diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index b864e3a..212c281 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -86,6 +86,68 @@ def unique(self) -> ExtendedList[T]: return ExtendedList(values) +class ExtendedTuple(tuple[T, ...]): + """Tuple wrapper with immutable chainable sequence operations.""" + + __slots__ = () + + def __new__(cls, values: Iterable[T] | None = None) -> ExtendedTuple[T]: + """Initialize the extended tuple.""" + items = () if values is None else values + return super().__new__(cls, (cls._wrap_item(item) for item in items)) + + @staticmethod + def _wrap_item(item: T) -> T: + """Promote nested built-in containers to extended containers.""" + from extended_data.containers.factory import extend_data + + return cast(T, extend_data(item)) + + def flatten(self) -> ExtendedTuple[Any]: + """Return a recursively flattened tuple copy.""" + from extended_data.containers.factory import to_builtin + + def _flatten(items: Iterable[Any]) -> list[Any]: + flattened: list[Any] = [] + for item in items: + plain_item = to_builtin(item) + if isinstance(plain_item, list | tuple): + flattened.extend(_flatten(plain_item)) + else: + flattened.append(plain_item) + return flattened + + return ExtendedTuple(_flatten(self)) + + def compact(self) -> ExtendedTuple[T]: + """Return a copy without values considered empty.""" + return ExtendedTuple(item for item in self if not is_nothing(item)) + + def map(self, func: Callable[[T], U]) -> ExtendedTuple[U]: + """Return a copy with a callable applied to each item.""" + return ExtendedTuple(func(item) for item in self) + + def filter(self, predicate: Callable[[T], bool]) -> ExtendedTuple[T]: + """Return a copy containing items accepted by a predicate.""" + return ExtendedTuple(item for item in self if predicate(item)) + + def unique(self) -> ExtendedTuple[T]: + """Return a copy with duplicate values removed while preserving order.""" + seen: set[Any] = set() + values: list[T] = [] + for item in self: + marker = make_hashable(item) + if marker in seen: + continue + seen.add(marker) + values.append(item) + return ExtendedTuple(values) + + def to_tuple(self) -> tuple[T, ...]: + """Return a plain tuple copy.""" + return tuple(self) + + class ExtendedSet(MutableSet[T]): """Set wrapper with explicit chainable operations.""" diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 7df1259..0fe97cd 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -6,7 +6,15 @@ import extended_data -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, extend_data, to_builtin +from extended_data.containers import ( + ExtendedDict, + ExtendedList, + ExtendedSet, + ExtendedString, + ExtendedTuple, + extend_data, + to_builtin, +) def test_extended_string_chains_primitive_transforms() -> None: @@ -109,6 +117,29 @@ def test_extended_set_promotes_string_values() -> None: assert to_builtin(value) == {"api", "worker"} +def test_extended_tuple_preserves_immutable_sequence_shape() -> None: + """ExtendedTuple composes sequence primitives without becoming an ExtendedList.""" + value = ExtendedTuple((1, (2, [3]), "", 2)) + + assert value.flatten() == (1, 2, 3, "", 2) + assert value.compact() == (1, (2, [3]), 2) + assert value.unique() == (1, (2, [3]), "", 2) + assert value.filter(lambda item: isinstance(item, int)) == (1, 2) + assert value.map(lambda item: item * 2 if isinstance(item, int) else item) == (2, (2, [3]), "", 4) + + +def test_extended_tuple_promotes_nested_values() -> None: + """ExtendedTuple keeps tuple-shaped values in the Tier 2 surface.""" + value = ExtendedTuple(({"name": "api"}, ["jobs"])) + + assert isinstance(value[0], ExtendedDict) + assert isinstance(value[0]["name"], ExtendedString) + assert isinstance(value[1], ExtendedList) + assert isinstance(value[1][0], ExtendedString) + assert value.to_tuple() == ({"name": "api"}, ["jobs"]) + assert to_builtin(value) == ({"name": "api"}, ["jobs"]) + + def test_extend_data_recursively_wraps_builtin_containers() -> None: """The container factory promotes plain values into the Tier 2 surface.""" wrapped = extend_data( @@ -116,6 +147,7 @@ def test_extend_data_recursively_wraps_builtin_containers() -> None: "service": {"name": "api"}, "ports": [8080, 8081], "tags": {"prod", "api"}, + "aliases": ("api", "gateway"), } ) @@ -124,6 +156,7 @@ def test_extend_data_recursively_wraps_builtin_containers() -> None: assert isinstance(wrapped["service"]["name"], ExtendedString) assert isinstance(wrapped["ports"], ExtendedList) assert isinstance(wrapped["tags"], ExtendedSet) + assert isinstance(wrapped["aliases"], ExtendedTuple) assert wrapped["service"]["name"].upper_first() == "Api" @@ -134,6 +167,7 @@ def test_to_builtin_recursively_unwraps_extended_containers() -> None: "service": ExtendedDict({"name": ExtendedString("api")}), "ports": ExtendedList([8080, 8081]), "tags": ExtendedSet({"prod", "api"}), + "aliases": ExtendedTuple(("api", "gateway")), } ) @@ -143,6 +177,7 @@ def test_to_builtin_recursively_unwraps_extended_containers() -> None: assert plain["service"] == {"name": "api"} assert plain["ports"] == [8080, 8081] assert plain["tags"] == {"prod", "api"} + assert plain["aliases"] == ("api", "gateway") def test_container_classes_are_root_exports() -> None: @@ -151,5 +186,6 @@ def test_container_classes_are_root_exports() -> None: assert extended_data.ExtendedDict is ExtendedDict assert extended_data.ExtendedList is ExtendedList assert extended_data.ExtendedSet is ExtendedSet + assert extended_data.ExtendedTuple is ExtendedTuple assert extended_data.extend_data is extend_data assert extended_data.to_builtin is to_builtin From 33a2cc5ce1b12ce1379a2d1346df8d4c30d2c3d8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:26:43 -0500 Subject: [PATCH 050/287] test: cover tuple workflow serialization --- tests/core/test_workflows.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 3b71754..369f758 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -9,6 +9,7 @@ from extended_data import ( DataWorkflow, ExtendedDict, + ExtendedTuple, WorkflowResult, WorkflowStep, base64_decode, @@ -117,6 +118,18 @@ def test_data_workflow_can_lower_and_promote_values() -> None: assert extended.value["service"]["name"].upper_first() == "Api" +def test_data_workflow_preserves_tuples_until_serialization(tmp_path: Path) -> None: + """Workflow values keep tuple shape in memory and serialize to JSON arrays at the edge.""" + workflow = DataWorkflow.from_value({"aliases": ("api", "gateway")}) + + assert isinstance(workflow.value["aliases"], ExtendedTuple) + assert workflow.result().as_builtin() == {"aliases": ("api", "gateway")} + + result = workflow.write("build/aliases.json", tld=tmp_path) + + assert decode_file(read_file(result.output_path), file_path=result.output_path) == {"aliases": ["api", "gateway"]} + + def test_data_workflow_missing_file_fails_loudly(tmp_path: Path) -> None: """Missing workflow inputs are hard failures, not placeholder results.""" with pytest.raises(FileNotFoundError): From 140e0ce2adda5eb5458e354e22fcdd1bc0f64bdc Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:36:42 -0500 Subject: [PATCH 051/287] feat: promote github connector payloads --- README.md | 7 +- docs/package-surface.md | 12 ++ examples/connectors/README.md | 6 + src/extended_data/connectors/base.py | 8 +- .../connectors/github/__init__.py | 116 ++++++++--------- src/extended_data/connectors/github/tools.py | 32 ++--- tests/connectors/test_base.py | 27 +++- tests/connectors/test_github_connector.py | 89 ++++++++++++- .../test_github_payload_contract.py | 123 ++++++++++++++++++ tests/connectors/test_github_tools.py | 15 +++ 10 files changed, 358 insertions(+), 77 deletions(-) create mode 100644 tests/connectors/test_github_payload_contract.py diff --git a/README.md b/README.md index 88c7e20..322383f 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ extended-data info github --json ```text extended_data/ primitives/ Tier 1 pure functions and codecs - containers/ Tier 2 ExtendedString/Dict/List/Set wrappers + containers/ Tier 2 ExtendedString/Dict/List/Tuple/Set wrappers io/ Tier 3 file, import, export, and base64 processors inputs/ InputProvider and decorator-based input injection logging/ structured lifecycle logging @@ -92,6 +92,11 @@ Vendor connectors are first-class adapters in the data fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with generic vendor lookup. +Connector data payloads are promoted into Tier 2 containers at the boundary, so +decoded files, HTTP response data, GraphQL responses, and SDK-shaped maps can +use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. +Raw SDK/client objects and raw transport responses remain available from the +methods that explicitly return them. The `secrets` connector integrates with the standalone `secretsync` CLI or native bindings. CLI fallback expects `secretsync pipeline --output json` to diff --git a/docs/package-surface.md b/docs/package-surface.md index 930155d..4603978 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -148,6 +148,18 @@ Connectors that inherit `VendorConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other verb-specific helpers to decode HTTP JSON, YAML, TOML, HCL, or text responses through the same Tier 2 container bridge used by file and input decoding. +Connector methods that return vendor data payloads should call +`extend_result()` at the return boundary, making SDK-shaped dictionaries, +lists, decoded repository files, GraphQL results, and workflow-builder output +first-class `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and +`ExtendedString` values. This is an intentional major-version break from plain +`dict`/`list` payloads; use `to_builtin()` at serialization, CLI, MCP, or SDK +handoff boundaries. + +```python +payload = github.get_repository_file("service.json") +assert payload["service"]["name"].upper_first() == "Api" +``` The `secrets` adapter is the Python-facing bridge to the standalone `secretsync` project. It uses native bindings when present and otherwise falls diff --git a/examples/connectors/README.md b/examples/connectors/README.md index 044ab42..af2da39 100644 --- a/examples/connectors/README.md +++ b/examples/connectors/README.md @@ -3,6 +3,12 @@ This directory contains working examples for `extended_data.connectors` and the vendor adapters that hang off `ConnectorFabric`. +Connector examples assume the major-version `extended-data` contract: vendor +data payloads are promoted into Tier 2 containers at connector boundaries. +Callers can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods on +decoded API, file, and SDK-shaped results, then call `to_builtin()` only when a +plain Python payload is needed for serialization or SDK handoff. + ## Quick Start Install extended-data with the extras you need: diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 38b7a4f..6608c54 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -359,6 +359,12 @@ def decode_response( return decode_file(response.content, suffix=resolved_suffix, as_extended=as_extended) + def extend_result(self, value: Any) -> Any: + """Promote connector data payloads into Tier 2 containers.""" + from extended_data.containers import extend_data + + return extend_data(value) + def request_data( self, method: str, @@ -561,4 +567,4 @@ def handle_ai_tool_call(self, name: str, arguments: dict[str, Any]) -> Any: raise ValueError(msg) func = self._tool_functions[name] - return func(**arguments) + return self.extend_result(func(**arguments)) diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index c83b238..4c31e19 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -11,8 +11,7 @@ from ruamel.yaml import YAML from extended_data import ( - decode_json, - decode_yaml, + decode_file, get_encoding_for_file_path, is_nothing, wrap_raw_data_for_export, @@ -34,7 +33,6 @@ class GitHubFallbackError(Exception): """Fallback exception used until PyGithub is imported.""" - GithubException = GitHubFallbackError UnknownObjectException = GitHubFallbackError @@ -196,23 +194,17 @@ def get_retval(d: Any, s: str | None, p: str) -> Any: decode = False if not decode or is_nothing(file_data): - return get_retval(file_data, file_sha, file_path_text) + return self.extend_result(get_retval(file_data, file_sha, file_path_text)) # Decode file content based on file type encoding = get_encoding_for_file_path(file_path_text) try: - if encoding == "json": - decoded_data = decode_json(file_data) - elif encoding == "yaml": - decoded_data = decode_yaml(file_data) - else: - # For raw or unknown types, return the string as-is - decoded_data = file_data + decoded_data = decode_file(file_data, file_path=file_path_text, as_extended=True) except Exception as exc: self.logger.warning(f"Failed to decode {file_path_text} as {encoding}: {exc}") decoded_data = file_data - return get_retval(decoded_data, file_sha, file_path_text) + return self.extend_result(get_retval(decoded_data, file_sha, file_path_text)) def update_repository_file( self, @@ -353,7 +345,7 @@ def list_org_members( } self.logger.info(f"Retrieved {len(members)} organization members") - return members + return self.extend_result(members) def get_org_member(self, username: str) -> dict[str, Any] | None: """Get a specific organization member. @@ -367,16 +359,18 @@ def get_org_member(self, username: str) -> dict[str, Any] | None: try: member = self.git.get_user(username) membership = self.org.get_user_membership(member) - return { - "id": member.id, - "login": member.login, - "name": member.name, - "email": member.email, - "role": membership.role, - "state": membership.state, - "avatar_url": member.avatar_url, - "html_url": member.html_url, - } + return self.extend_result( + { + "id": member.id, + "login": member.login, + "name": member.name, + "email": member.email, + "role": membership.role, + "state": membership.state, + "avatar_url": member.avatar_url, + "html_url": member.html_url, + } + ) except UnknownObjectException: self.logger.warning(f"User not found: {username}") return None @@ -437,7 +431,7 @@ def list_repositories( repos[repo.name] = repo_data self.logger.info(f"Retrieved {len(repos)} repositories") - return repos + return self.extend_result(repos) def get_repository(self, repo_name: str) -> dict[str, Any] | None: """Get a specific repository. @@ -450,20 +444,22 @@ def get_repository(self, repo_name: str) -> dict[str, Any] | None: """ try: repo = self.git.get_repo(f"{self.GITHUB_OWNER}/{repo_name}") - return { - "id": repo.id, - "name": repo.name, - "full_name": repo.full_name, - "description": repo.description, - "private": repo.private, - "archived": repo.archived, - "default_branch": repo.default_branch, - "html_url": repo.html_url, - "clone_url": repo.clone_url, - "ssh_url": repo.ssh_url, - "language": repo.language, - "topics": repo.topics, - } + return self.extend_result( + { + "id": repo.id, + "name": repo.name, + "full_name": repo.full_name, + "description": repo.description, + "private": repo.private, + "archived": repo.archived, + "default_branch": repo.default_branch, + "html_url": repo.html_url, + "clone_url": repo.clone_url, + "ssh_url": repo.ssh_url, + "language": repo.language, + "topics": repo.topics, + } + ) except UnknownObjectException: self.logger.warning(f"Repository not found: {repo_name}") return None @@ -530,7 +526,7 @@ def list_teams( teams[team.slug] = team_data self.logger.info(f"Retrieved {len(teams)} teams") - return teams + return self.extend_result(teams) def get_team(self, team_slug: str) -> dict[str, Any] | None: """Get a specific team. @@ -543,17 +539,19 @@ def get_team(self, team_slug: str) -> dict[str, Any] | None: """ try: team = self.org.get_team_by_slug(team_slug) - return { - "id": team.id, - "name": team.name, - "slug": team.slug, - "description": team.description, - "privacy": team.privacy, - "permission": team.permission, - "html_url": team.html_url, - "members_count": team.members_count, - "repos_count": team.repos_count, - } + return self.extend_result( + { + "id": team.id, + "name": team.name, + "slug": team.slug, + "description": team.description, + "privacy": team.privacy, + "permission": team.permission, + "html_url": team.html_url, + "members_count": team.members_count, + "repos_count": team.repos_count, + } + ) except UnknownObjectException: self.logger.warning(f"Team not found: {team_slug}") return None @@ -616,10 +614,12 @@ def execute_graphql(self, query: str, variables: dict[str, Any] | None = None) - Query response data. """ headers = {"Authorization": f"Bearer {self.GITHUB_TOKEN}"} - return self.graphql_client.execute( - query=query, - variables=variables or {}, - headers=headers, + return self.extend_result( + self.graphql_client.execute( + query=query, + variables=variables or {}, + headers=headers, + ) ) # ========================================================================= @@ -686,7 +686,7 @@ def get_users_with_verified_emails( enriched[username] = member_data self.logger.info(f"Retrieved verified emails for {len(enriched)} users") - return enriched + return self.extend_result(enriched) # ========================================================================= # GitHub Actions Workflows @@ -734,7 +734,7 @@ def build_workflow( workflow["jobs"] = jobs - return workflow + return self.extend_result(workflow) def build_workflow_job( self, @@ -789,7 +789,7 @@ def build_workflow_job( job["steps"] = steps or [] - return job + return self.extend_result(job) def build_workflow_step( self, @@ -841,7 +841,7 @@ def build_workflow_step( if env: step["env"] = env - return step + return self.extend_result(step) def create_python_ci_workflow( self, diff --git a/src/extended_data/connectors/github/tools.py b/src/extended_data/connectors/github/tools.py index 7dc51e9..64b31a8 100644 --- a/src/extended_data/connectors/github/tools.py +++ b/src/extended_data/connectors/github/tools.py @@ -10,6 +10,8 @@ from pydantic import BaseModel, Field +from extended_data.containers import extend_data + # ============================================================================= # Input Schemas @@ -102,7 +104,7 @@ def list_repositories( repo_data = data.copy() repo_data["name"] = name result.append(repo_data) - return result + return extend_data(result) def get_repository( @@ -127,9 +129,8 @@ def get_repository( data = connector.get_repository(repo_name) if data: - return {"status": "found", **data} - else: - return {"status": "not_found", "name": repo_name} + return extend_data({"status": "found", **data}) + return extend_data({"status": "not_found", "name": repo_name}) def list_teams( @@ -154,7 +155,7 @@ def list_teams( connector = GitHubConnector(github_owner=github_owner, github_token=github_token) teams = connector.list_teams(include_members=include_members, include_repos=include_repos) - return list(teams.values()) + return extend_data(list(teams.values())) def get_team( @@ -179,9 +180,8 @@ def get_team( data = connector.get_team(team_slug) if data: - return {"status": "found", **data} - else: - return {"status": "not_found", "slug": team_slug} + return extend_data({"status": "found", **data}) + return extend_data({"status": "not_found", "slug": team_slug}) def list_org_members( @@ -206,7 +206,7 @@ def list_org_members( connector = GitHubConnector(github_owner=github_owner, github_token=github_token) members = connector.list_org_members(role=role, include_pending=include_pending) - return list(members.values()) + return extend_data(list(members.values())) def get_repository_file( @@ -247,12 +247,14 @@ def get_repository_file( status = "empty" if content is None else "retrieved" - return { - "status": status, - "path": file_path, - "content": content, - "sha": sha, - } + return extend_data( + { + "status": status, + "path": file_path, + "content": content, + "sha": sha, + } + ) # ============================================================================= diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index 245f6e0..a55db58 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -10,7 +10,7 @@ import pytest from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import ExtendedDict, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.logging import Logging @@ -115,6 +115,31 @@ def test_request_data_decodes_response_body() -> None: assert mock_client.request.call_args.args[1] == "https://api.example.com/status" +def test_extend_result_promotes_connector_payloads() -> None: + """Connector data payloads cross into the Tier 2 container layer explicitly.""" + connector = _connector() + + data = connector.extend_result({"service": {"name": "api"}, "tags": ["core"]}) + + assert isinstance(data, ExtendedDict) + assert isinstance(data["service"], ExtendedDict) + assert isinstance(data["service"]["name"], ExtendedString) + assert isinstance(data["tags"], ExtendedList) + assert data["service"]["name"].upper_first() == "Api" + + +def test_handle_ai_tool_call_promotes_result_payloads() -> None: + """AI tool dispatch should expose extended containers, not raw dict payloads.""" + connector = _connector() + connector.register_tool(lambda: {"status": "ok", "items": ["one"]}, name="status") + + result = connector.handle_ai_tool_call("status", {}) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["items"], ExtendedList) + assert result["status"].upper_first() == "Ok" + + def test_request_uses_connector_max_retries(mocker) -> None: """Connector subclasses control the retry attempt count.""" diff --git a/tests/connectors/test_github_connector.py b/tests/connectors/test_github_connector.py index f09b71d..ca3183d 100644 --- a/tests/connectors/test_github_connector.py +++ b/tests/connectors/test_github_connector.py @@ -11,6 +11,7 @@ from extended_data.connectors import GitHubConnector as RootGitHubConnector from extended_data.connectors.github import GitHubConnector +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString class TestGitHubConnector: @@ -96,4 +97,90 @@ def test_get_repository_file(self, mock_github_class, base_connector_kwargs): ) content = connector.get_repository_file("test.json") - assert content is not None + assert isinstance(content, ExtendedDict) + assert isinstance(content["test"], ExtendedString) + assert content["test"].upper_first() == "Data" + + @patch("extended_data.connectors.github.Github") + def test_get_repository_file_with_metadata_returns_extended_tuple(self, mock_github_class, base_connector_kwargs): + """Repository file metadata tuples preserve shape while promoting decoded content.""" + mock_github = MagicMock() + mock_org = MagicMock() + mock_repo = MagicMock() + mock_file = MagicMock() + mock_file.decoded_content = b'{"test": "data"}' + mock_file.sha = "abc123" + mock_file.content = "test content" + + mock_repo.get_contents.return_value = mock_file + mock_repo.default_branch = "main" + mock_github.get_organization.return_value = mock_org + mock_github.get_repo.return_value = mock_repo + mock_github_class.return_value = mock_github + + connector = GitHubConnector( + github_owner="test-org", github_repo="test-repo", github_token="test-token", **base_connector_kwargs + ) + + content, sha, path = connector.get_repository_file("test.json", return_sha=True, return_path=True) + + assert isinstance(content, ExtendedDict) + assert isinstance(content["test"], ExtendedString) + assert sha == "abc123" + assert path == "test.json" + + @patch("extended_data.connectors.github.Github") + def test_list_repositories_promotes_vendor_payloads(self, mock_github_class, base_connector_kwargs): + """Vendor SDK list payloads should return extended containers.""" + mock_github = MagicMock() + mock_org = MagicMock() + mock_repo = MagicMock() + mock_repo.id = 1 + mock_repo.name = "api-service" + mock_repo.full_name = "test-org/api-service" + mock_repo.description = "API service" + mock_repo.private = False + mock_repo.archived = False + mock_repo.default_branch = "main" + mock_repo.html_url = "https://github.com/test-org/api-service" + mock_repo.clone_url = "https://github.com/test-org/api-service.git" + mock_repo.ssh_url = "git@github.com:test-org/api-service.git" + mock_repo.language = "Python" + mock_repo.topics = ["data", "vendor"] + mock_repo.created_at = None + mock_repo.updated_at = None + mock_repo.pushed_at = None + + mock_org.get_repos.return_value = [mock_repo] + mock_github.get_organization.return_value = mock_org + mock_github_class.return_value = mock_github + + connector = GitHubConnector(github_owner="test-org", github_token="test-token", **base_connector_kwargs) + + repos = connector.list_repositories() + + assert isinstance(repos, ExtendedDict) + assert isinstance(repos["api-service"], ExtendedDict) + assert isinstance(repos["api-service"]["name"], ExtendedString) + assert isinstance(repos["api-service"]["topics"], ExtendedList) + assert repos["api-service"]["name"].to_snake_case() == "api_service" + + @patch("extended_data.connectors.github.Github") + def test_build_workflow_helpers_return_extended_data(self, mock_github_class, base_connector_kwargs): + """GitHub workflow builders should also produce first-class extended data.""" + mock_github = MagicMock() + mock_org = MagicMock() + mock_github.get_organization.return_value = mock_org + mock_github_class.return_value = mock_github + + connector = GitHubConnector(github_owner="test-org", github_token="test-token", **base_connector_kwargs) + + step = connector.build_workflow_step(name="Run tests", run="pytest") + job = connector.build_workflow_job(steps=[step]) + workflow = connector.build_workflow(name="CI", on={"pull_request": {}}, jobs={"test": job}) + + assert isinstance(step, ExtendedDict) + assert isinstance(job, ExtendedDict) + assert isinstance(workflow, ExtendedDict) + assert isinstance(workflow["jobs"]["test"]["steps"], ExtendedList) + assert workflow["jobs"]["test"]["steps"][0]["run"].upper_first() == "Pytest" diff --git a/tests/connectors/test_github_payload_contract.py b/tests/connectors/test_github_payload_contract.py new file mode 100644 index 0000000..c45641e --- /dev/null +++ b/tests/connectors/test_github_payload_contract.py @@ -0,0 +1,123 @@ +"""Dependency-free GitHub connector payload contract tests.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from extended_data.connectors.github import GitHubConnector +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple + + +def _connector() -> GitHubConnector: + """Build a GitHubConnector shell without importing optional SDK dependencies.""" + connector = GitHubConnector.__new__(GitHubConnector) + connector.GITHUB_OWNER = "test-org" + connector.GITHUB_TOKEN = "test-token" + connector.GITHUB_BRANCH = "main" + connector.logger = MagicMock() + connector.repo = MagicMock() + connector.org = MagicMock() + connector.git = MagicMock() + connector.graphql_client = MagicMock() + return connector + + +def test_repository_file_decodes_into_extended_payload_with_metadata() -> None: + """Decoded repository files should enter the Tier 2 fabric immediately.""" + connector = _connector() + mock_file = MagicMock() + mock_file.decoded_content = b'{"service":{"name":"api"}}' + mock_file.sha = "abc123" + mock_file.content = "test content" + connector.repo.get_contents.return_value = mock_file + + result = connector.get_repository_file("service.json", return_sha=True, return_path=True) + + assert isinstance(result, ExtendedTuple) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["service"]["name"], ExtendedString) + assert result[0]["service"]["name"].upper_first() == "Api" + assert result[1:] == ("abc123", "service.json") + + +def test_list_repositories_promotes_sdk_payloads() -> None: + """Repository listing payloads should be extended containers, not raw dicts.""" + connector = _connector() + repo = MagicMock() + repo.id = 1 + repo.name = "api-service" + repo.full_name = "test-org/api-service" + repo.description = "API service" + repo.private = False + repo.archived = False + repo.default_branch = "main" + repo.html_url = "https://github.com/test-org/api-service" + repo.clone_url = "https://github.com/test-org/api-service.git" + repo.ssh_url = "git@github.com:test-org/api-service.git" + repo.language = "Python" + repo.topics = ["data", "vendor"] + repo.created_at = None + repo.updated_at = None + repo.pushed_at = None + connector.org.get_repos.return_value = [repo] + + result = connector.list_repositories() + + assert isinstance(result, ExtendedDict) + assert isinstance(result["api-service"], ExtendedDict) + assert isinstance(result["api-service"]["topics"], ExtendedList) + assert result["api-service"]["name"].to_snake_case() == "api_service" + + +def test_execute_graphql_promotes_response_payload() -> None: + """GraphQL response dictionaries should expose nested extended containers.""" + connector = _connector() + connector.graphql_client.execute.return_value = { + "data": {"user": {"login": "octocat", "organizationVerifiedDomainEmails": ["octo@example.com"]}} + } + + result = connector.execute_graphql("query($login: String!) { user(login: $login) { login } }", {"login": "octocat"}) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["data"]["user"], ExtendedDict) + assert isinstance(result["data"]["user"]["organizationVerifiedDomainEmails"], ExtendedList) + assert result["data"]["user"]["login"].upper_first() == "Octocat" + + +def test_verified_email_enrichment_returns_extended_payload() -> None: + """Derived GitHub user payloads should remain in the extended container layer.""" + connector = _connector() + connector.graphql_client.execute.return_value = { + "data": { + "user": { + "login": "octocat", + "email": "octocat@example.com", + "organizationVerifiedDomainEmails": ["octocat@example.com"], + } + } + } + + result = connector.get_users_with_verified_emails( + members={"octocat": {"login": "octocat", "role": "member"}}, + domain_filter="example.com", + ) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["octocat"], ExtendedDict) + assert isinstance(result["octocat"]["verified_emails"], ExtendedList) + assert result["octocat"]["primary_email"].upper_first() == "Octocat@example.com" + + +def test_workflow_builders_return_extended_data() -> None: + """Local GitHub workflow builders should produce first-class extended data.""" + connector = _connector() + + step = connector.build_workflow_step(name="Run tests", run="pytest") + job = connector.build_workflow_job(steps=[step]) + workflow = connector.build_workflow(name="CI", on={"pull_request": {}}, jobs={"test": job}) + + assert isinstance(step, ExtendedDict) + assert isinstance(job, ExtendedDict) + assert isinstance(workflow, ExtendedDict) + assert isinstance(workflow["jobs"]["test"]["steps"], ExtendedList) + assert workflow["jobs"]["test"]["steps"][0]["run"].upper_first() == "Pytest" diff --git a/tests/connectors/test_github_tools.py b/tests/connectors/test_github_tools.py index 592befb..809d1ad 100644 --- a/tests/connectors/test_github_tools.py +++ b/tests/connectors/test_github_tools.py @@ -8,6 +8,8 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + # Patch target for GitHubConnector - patch at source since tools.py imports lazily inside functions GITHUB_CONNECTOR_PATCH = "extended_data.connectors.github.GitHubConnector" @@ -86,9 +88,12 @@ def test_list_repositories_basic(self, mock_connector_class): result = list_repositories(github_owner="test-org", github_token="test-token") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert result[0]["name"] == "repo1" assert result[0]["description"] == "Test repository" + assert isinstance(result[0]["description"], ExtendedString) assert result[1]["name"] == "repo2" @patch(GITHUB_CONNECTOR_PATCH) @@ -125,6 +130,7 @@ def test_get_repository_basic(self, mock_connector_class): result = get_repository(github_owner="test-org", github_token="test-token", repo_name="test-repo") + assert isinstance(result, ExtendedDict) assert result["status"] == "found" assert result["name"] == "test-repo" assert result["full_name"] == "org/test-repo" @@ -140,6 +146,7 @@ def test_get_repository_not_found(self, mock_connector_class): result = get_repository(github_owner="test-org", github_token="test-token", repo_name="nonexistent") + assert isinstance(result, ExtendedDict) assert result["status"] == "not_found" assert result["name"] == "nonexistent" @@ -179,6 +186,8 @@ def test_list_teams_basic(self, mock_connector_class): result = list_teams(github_owner="test-org", github_token="test-token") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert result[0]["slug"] == "team1" assert result[0]["name"] == "Team 1" @@ -218,6 +227,7 @@ def test_get_team_basic(self, mock_connector_class): result = get_team(github_owner="test-org", github_token="test-token", team_slug="test-team") + assert isinstance(result, ExtendedDict) assert result["status"] == "found" assert result["slug"] == "test-team" assert result["name"] == "Test Team" @@ -233,6 +243,7 @@ def test_get_team_not_found(self, mock_connector_class): result = get_team(github_owner="test-org", github_token="test-token", team_slug="nonexistent") + assert isinstance(result, ExtendedDict) assert result["status"] == "not_found" assert result["slug"] == "nonexistent" @@ -270,6 +281,8 @@ def test_list_org_members_basic(self, mock_connector_class): result = list_org_members(github_owner="test-org", github_token="test-token") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert result[0]["login"] == "user1" assert result[0]["role"] == "member" @@ -309,6 +322,7 @@ def test_get_repository_file_basic(self, mock_connector_class): file_path="test.json", ) + assert isinstance(result, ExtendedDict) assert result["path"] == "test.json" assert result["content"] == '{"test": "content"}' assert result["sha"] == "abc123" @@ -352,6 +366,7 @@ def test_get_repository_file_empty(self, mock_connector_class): file_path="empty.txt", ) + assert isinstance(result, ExtendedDict) assert result["status"] == "empty" From 0f8d28bab34cb81aca4c3f770c3db4197e943ea9 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:39:59 -0500 Subject: [PATCH 052/287] feat: promote zoom connector payloads --- src/extended_data/connectors/zoom/__init__.py | 8 ++++---- src/extended_data/connectors/zoom/tools.py | 10 ++++++---- tests/connectors/test_zoom_connector.py | 12 ++++++++++++ tests/connectors/test_zoom_tools.py | 11 +++++++++++ 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index be9430a..e6c1ac0 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -86,7 +86,7 @@ def get_zoom_users(self) -> dict[str, dict[str, Any]]: except requests.exceptions.RequestException as exc: raise RuntimeError(f"Failed to get Zoom users: {exc}") from exc - return users + return self.extend_result(users) def remove_zoom_user(self, email: str) -> None: """Remove a Zoom user.""" @@ -145,7 +145,7 @@ def get_user(self, user_id: str) -> dict[str, Any]: try: response = requests.get(url, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - return response.json() + return self.extend_result(response.json()) except requests.exceptions.RequestException as exc: raise RuntimeError(f"Failed to get Zoom user {user_id}: {exc}") from exc @@ -167,7 +167,7 @@ def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> list[d response = requests.get(url, headers=headers, params=params, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() data = response.json() - return data.get("meetings", []) + return self.extend_result(data.get("meetings", [])) except requests.exceptions.RequestException as exc: raise RuntimeError(f"Failed to list meetings for user {user_id}: {exc}") from exc @@ -186,7 +186,7 @@ def get_meeting(self, meeting_id: str) -> dict[str, Any]: try: response = requests.get(url, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - return response.json() + return self.extend_result(response.json()) except requests.exceptions.RequestException as exc: raise RuntimeError(f"Failed to get meeting {meeting_id}: {exc}") from exc diff --git a/src/extended_data/connectors/zoom/tools.py b/src/extended_data/connectors/zoom/tools.py index 04f0f82..111721e 100644 --- a/src/extended_data/connectors/zoom/tools.py +++ b/src/extended_data/connectors/zoom/tools.py @@ -10,6 +10,8 @@ from pydantic import BaseModel, Field +from extended_data.containers import extend_data + # ============================================================================= # Input Schemas @@ -64,7 +66,7 @@ def list_users(max_results: int = 100) -> list[dict[str, Any]]: users = connector.list_users() # Sort by email for consistent output in tests sorted_users = [users[email] for email in sorted(users.keys())] - return sorted_users[:max_results] + return extend_data(sorted_users[:max_results]) def get_user(user_id: str) -> dict[str, Any]: @@ -79,7 +81,7 @@ def get_user(user_id: str) -> dict[str, Any]: from extended_data.connectors.zoom import ZoomConnector connector = ZoomConnector() - return connector.get_user(user_id) + return extend_data(connector.get_user(user_id)) def list_meetings( @@ -101,7 +103,7 @@ def list_meetings( connector = ZoomConnector() meetings = connector.list_meetings(user_id, meeting_type) - return meetings[:max_results] + return extend_data(meetings[:max_results]) def get_meeting(meeting_id: str) -> dict[str, Any]: @@ -116,7 +118,7 @@ def get_meeting(meeting_id: str) -> dict[str, Any]: from extended_data.connectors.zoom import ZoomConnector connector = ZoomConnector() - return connector.get_meeting(meeting_id) + return extend_data(connector.get_meeting(meeting_id)) # ============================================================================= diff --git a/tests/connectors/test_zoom_connector.py b/tests/connectors/test_zoom_connector.py index d0b2aad..ee9b205 100644 --- a/tests/connectors/test_zoom_connector.py +++ b/tests/connectors/test_zoom_connector.py @@ -7,6 +7,7 @@ import pytest from extended_data.connectors.zoom import ZoomConnector +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString class TestZoomConnector: @@ -89,6 +90,9 @@ def test_get_zoom_users(self, mock_post, mock_get, base_connector_kwargs): ) users = connector.get_zoom_users() + assert isinstance(users, ExtendedDict) + assert isinstance(users["user1@example.com"], ExtendedDict) + assert isinstance(users["user1@example.com"]["first_name"], ExtendedString) assert "user1@example.com" in users assert "user2@example.com" in users assert len(users) == 2 @@ -143,6 +147,8 @@ def test_list_users(self, mock_post, mock_get, base_connector_kwargs): ) users = connector.list_users() + assert isinstance(users, ExtendedDict) + assert isinstance(users["user1@example.com"], ExtendedDict) assert "user1@example.com" in users @patch("extended_data.connectors.zoom.requests.get") @@ -172,6 +178,8 @@ def test_get_user(self, mock_post, mock_get, base_connector_kwargs): ) user = connector.get_user("user1@example.com") + assert isinstance(user, ExtendedDict) + assert isinstance(user["first_name"], ExtendedString) assert user["email"] == "user1@example.com" assert user["id"] == "123" @@ -202,6 +210,8 @@ def test_list_meetings(self, mock_post, mock_get, base_connector_kwargs): ) meetings = connector.list_meetings("user1@example.com") + assert isinstance(meetings, ExtendedList) + assert isinstance(meetings[0], ExtendedDict) assert len(meetings) == 2 assert meetings[0]["id"] == "111" @@ -231,5 +241,7 @@ def test_get_meeting(self, mock_post, mock_get, base_connector_kwargs): ) meeting = connector.get_meeting("111") + assert isinstance(meeting, ExtendedDict) + assert isinstance(meeting["topic"], ExtendedString) assert meeting["id"] == "111" assert meeting["topic"] == "Team Meeting" diff --git a/tests/connectors/test_zoom_tools.py b/tests/connectors/test_zoom_tools.py index 0455602..6944fc3 100644 --- a/tests/connectors/test_zoom_tools.py +++ b/tests/connectors/test_zoom_tools.py @@ -4,6 +4,8 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + CONNECTOR_PATCH = "extended_data.connectors.zoom.ZoomConnector" @@ -76,9 +78,12 @@ def test_list_users_basic(self, mock_connector_class): result = list_users() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert result[0]["email"] == "user1@example.com" assert result[0]["id"] == "123" + assert isinstance(result[0]["first_name"], ExtendedString) assert result[0]["first_name"] == "John" assert result[1]["email"] == "user2@example.com" @@ -142,6 +147,8 @@ def test_get_user_basic(self, mock_connector_class): result = get_user("user1@example.com") + assert isinstance(result, ExtendedDict) + assert isinstance(result["first_name"], ExtendedString) assert result["email"] == "user1@example.com" assert result["id"] == "123" assert result["first_name"] == "John" @@ -205,6 +212,8 @@ def test_list_meetings_basic(self, mock_connector_class): result = list_meetings("user1@example.com") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert result[0]["id"] == "111" assert result[0]["topic"] == "Team Meeting" @@ -274,6 +283,8 @@ def test_get_meeting_basic(self, mock_connector_class): result = get_meeting("111") + assert isinstance(result, ExtendedDict) + assert isinstance(result["topic"], ExtendedString) assert result["id"] == "111" assert result["topic"] == "Team Meeting" assert result["host_email"] == "host@example.com" From dac79cae0df501380aa1ad6ac1d273c01e2b2b97 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:42:34 -0500 Subject: [PATCH 053/287] feat: promote cursor tool payloads --- .../connectors/cursor/__init__.py | 2 +- src/extended_data/connectors/cursor/tools.py | 33 ++++++++++++------- tests/connectors/test_cursor.py | 22 +++++++++++++ tests/connectors/test_cursor_tools.py | 12 +++++-- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index 3f6bae6..1949cc3 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -619,4 +619,4 @@ def list_models(self) -> list[str]: if not data: return [] - return data.get("models", []) + return self.extend_result(data.get("models", [])) diff --git a/src/extended_data/connectors/cursor/tools.py b/src/extended_data/connectors/cursor/tools.py index 4cf43ad..f7ceb3f 100644 --- a/src/extended_data/connectors/cursor/tools.py +++ b/src/extended_data/connectors/cursor/tools.py @@ -10,6 +10,13 @@ from pydantic import BaseModel, Field +from extended_data.containers import extend_data + + +def _state_value(state: Any) -> Any: + """Return enum values for tool payloads while preserving plain strings.""" + return getattr(state, "value", state) + class LaunchAgentSchema(BaseModel): """Pydantic schema for the cursor_launch_agent tool.""" @@ -53,11 +60,13 @@ def cursor_launch_agent( branch_name=branch_name, ) - return { - "agent_id": agent.id, - "state": agent.state, - "repository": agent.repository, - } + return extend_data( + { + "agent_id": agent.id, + "state": _state_value(agent.state), + "repository": agent.repository, + } + ) def cursor_get_agent_status(agent_id: str) -> dict[str, Any]: @@ -74,12 +83,14 @@ def cursor_get_agent_status(agent_id: str) -> dict[str, Any]: connector = CursorConnector() agent = connector.get_agent_status(agent_id) - return { - "agent_id": agent.id, - "state": agent.state, - "error": agent.error, - "pr_url": agent.pr_url, - } + return extend_data( + { + "agent_id": agent.id, + "state": _state_value(agent.state), + "error": agent.error, + "pr_url": agent.pr_url, + } + ) TOOL_DEFINITIONS = [ diff --git a/tests/connectors/test_cursor.py b/tests/connectors/test_cursor.py index 6d80b85..11c4ac0 100644 --- a/tests/connectors/test_cursor.py +++ b/tests/connectors/test_cursor.py @@ -22,6 +22,7 @@ validate_repository, validate_webhook_url, ) +from extended_data.containers import ExtendedList, ExtendedString class TestValidators: @@ -246,3 +247,24 @@ def test_launch_agent_validation(self, mock_client_class): with pytest.raises(CursorValidationError, match="format"): connector.launch_agent(prompt_text="Hello", repository="invalid") + + @patch("extended_data.connectors.cursor.httpx.Client") + def test_list_models_returns_extended_list(self, mock_client_class): + """list_models should expose model names as an extended container.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.text = '{"models": ["cursor-small", "cursor-large"]}' + mock_response.json.return_value = {"models": ["cursor-small", "cursor-large"]} + mock_client.request.return_value = mock_response + + connector = CursorConnector(api_key="test-key") + models = connector.list_models() + + assert isinstance(models, ExtendedList) + assert isinstance(models[0], ExtendedString) + assert models[0].to_snake_case() == "cursor_small" diff --git a/tests/connectors/test_cursor_tools.py b/tests/connectors/test_cursor_tools.py index 99c4de5..1a03c1c 100644 --- a/tests/connectors/test_cursor_tools.py +++ b/tests/connectors/test_cursor_tools.py @@ -4,6 +4,9 @@ from unittest.mock import MagicMock, patch +from extended_data.connectors.cursor import AgentState +from extended_data.containers import ExtendedDict, ExtendedString + def test_cursor_launch_agent(): """Test launch_agent tool.""" @@ -13,14 +16,17 @@ def test_cursor_launch_agent(): mock_connector = MagicMock() mock_agent = MagicMock() mock_agent.id = "agent_123" - mock_agent.state = "running" + mock_agent.state = AgentState.RUNNING mock_agent.repository = "org/repo" mock_connector.launch_agent.return_value = mock_agent mock_connector_class.return_value = mock_connector result = cursor_launch_agent(prompt="Fix bug", repository="org/repo") + assert isinstance(result, ExtendedDict) + assert isinstance(result["agent_id"], ExtendedString) assert result["agent_id"] == "agent_123" assert result["state"] == "running" + assert result["repository"].sanitize() == "org_repo" def test_cursor_get_agent_status(): @@ -31,13 +37,15 @@ def test_cursor_get_agent_status(): mock_connector = MagicMock() mock_agent = MagicMock() mock_agent.id = "agent_123" - mock_agent.state = "finished" + mock_agent.state = AgentState.FINISHED mock_agent.error = None mock_agent.pr_url = "https://github.com/org/repo/pull/1" mock_connector.get_agent_status.return_value = mock_agent mock_connector_class.return_value = mock_connector result = cursor_get_agent_status(agent_id="agent_123") + assert isinstance(result, ExtendedDict) + assert isinstance(result["state"], ExtendedString) assert result["agent_id"] == "agent_123" assert result["state"] == "finished" assert result["pr_url"] == "https://github.com/org/repo/pull/1" From 964b5035d8975f1b1b3e83c87fd21564dbe8cdb8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:44:31 -0500 Subject: [PATCH 054/287] feat: promote slack connector payloads --- .../connectors/slack/__init__.py | 15 ++++++------ src/extended_data/connectors/slack/tools.py | 24 +++++++++++-------- tests/connectors/test_slack_connector.py | 10 ++++++++ tests/connectors/test_slack_tools.py | 14 +++++++++++ 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 206e837..fa436e7 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -293,7 +293,8 @@ def get_bot_channels(self) -> dict[str, dict[str, Any]]: SlackAPIError: If Slack returns an error. """ try: - return {channel["name"]: channel for channel in self.bot_web_client.users_conversations()["channels"]} + channels = {channel["name"]: channel for channel in self.bot_web_client.users_conversations()["channels"]} + return self.extend_result(channels) except SlackApiError as exc: raise SlackAPIError(exc.response) from exc @@ -340,7 +341,7 @@ def list_users( ) if include_deleted and include_bots and include_app_users: - return response + return self.extend_result(response) filtered = {} for user_id, user_data in response.items(): @@ -356,7 +357,7 @@ def list_users( continue filtered[user_id] = user_data - return filtered + return self.extend_result(filtered) def list_usergroups( self, @@ -405,9 +406,9 @@ def list_usergroups( ) if not normalized_ids: - return response + return self.extend_result(response) - return {gid: gdata for gid, gdata in response.items() if gid in normalized_ids} + return self.extend_result({gid: gdata for gid, gdata in response.items() if gid in normalized_ids}) def list_conversations( self, @@ -464,9 +465,9 @@ def list_conversations( ) if not channels_only: - return response + return self.extend_result(response) - return {cid: cdata for cid, cdata in response.items() if cdata.get("is_channel")} + return self.extend_result({cid: cdata for cid, cdata in response.items() if cdata.get("is_channel")}) def _call_api( self, diff --git a/src/extended_data/connectors/slack/tools.py b/src/extended_data/connectors/slack/tools.py index e42074c..6f50b1d 100644 --- a/src/extended_data/connectors/slack/tools.py +++ b/src/extended_data/connectors/slack/tools.py @@ -29,6 +29,8 @@ from pydantic import BaseModel, Field +from extended_data.containers import extend_data + if TYPE_CHECKING: from extended_data.connectors.slack import SlackConnector @@ -133,7 +135,7 @@ def list_channels( } ) - return result + return extend_data(result) def list_users( @@ -172,7 +174,7 @@ def list_users( } ) - return result + return extend_data(result) def send_message( @@ -197,12 +199,14 @@ def send_message( thread_id=thread_id or None, ) - return { - "channel": channel, - "text": text, - "timestamp": timestamp, - "status": "sent", - } + return extend_data( + { + "channel": channel, + "text": text, + "timestamp": timestamp, + "status": "sent", + } + ) def get_channel_history( @@ -229,7 +233,7 @@ def get_channel_history( break if not channel_id: - return [] + return extend_data([]) # Get conversation history using the internal _call_api method history = connector._call_api( @@ -252,7 +256,7 @@ def get_channel_history( } ) - return result + return extend_data(result) # ============================================================================= diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index 50af92f..48bb385 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -9,6 +9,7 @@ import pytest from extended_data.connectors.slack import SlackConnector +from extended_data.containers import ExtendedDict, ExtendedString def test_slack_connector_requires_slack_sdk_when_constructed_without_extra(): @@ -48,6 +49,9 @@ def test_get_bot_channels(self, mock_webclient_class, base_connector_kwargs): connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) channels = connector.get_bot_channels() + assert isinstance(channels, ExtendedDict) + assert isinstance(channels["general"], ExtendedDict) + assert isinstance(channels["general"]["id"], ExtendedString) assert "general" in channels assert channels["general"]["id"] == "C12345" @@ -98,6 +102,8 @@ def test_list_users_filters_deleted( include_app_users=False, ) + assert isinstance(users, ExtendedDict) + assert isinstance(users["U1"], ExtendedDict) assert list(users.keys()) == ["U1"] mock_call_api.assert_called_once_with( "users_list", @@ -135,6 +141,8 @@ def test_list_usergroups_filters_ids( usergroup_ids="S1,S3", ) + assert isinstance(groups, ExtendedDict) + assert isinstance(groups["S1"]["name"], ExtendedString) assert groups == {"S1": {"id": "S1", "name": "Ops"}} mock_call_api.assert_called_once_with( "usergroups_list", @@ -174,6 +182,8 @@ def test_list_conversations_channels_only( cursor="cursor123", ) + assert isinstance(conversations, ExtendedDict) + assert isinstance(conversations["C1"], ExtendedDict) assert conversations == {"C1": {"id": "C1", "is_channel": True}} mock_call_api.assert_called_once_with( "conversations_list", diff --git a/tests/connectors/test_slack_tools.py b/tests/connectors/test_slack_tools.py index a785411..5aef2f8 100644 --- a/tests/connectors/test_slack_tools.py +++ b/tests/connectors/test_slack_tools.py @@ -12,6 +12,8 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + @pytest.fixture(autouse=True) def mock_slack_sdk(): @@ -90,9 +92,12 @@ def test_list_channels_basic(self, mock_slack_sdk): with patch("extended_data.connectors.slack.tools._get_connector", return_value=mock_connector): result = list_channels() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert result[0]["id"] == "C12345" assert result[0]["name"] == "general" + assert isinstance(result[0]["name"], ExtendedString) assert result[0]["member_count"] == 42 def test_list_channels_with_archived(self, mock_slack_sdk): @@ -138,9 +143,12 @@ def test_list_users_basic(self, mock_slack_sdk): with patch("extended_data.connectors.slack.tools._get_connector", return_value=mock_connector): result = list_users() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert result[0]["id"] == "U12345" assert result[0]["name"] == "john.doe" + assert isinstance(result[0]["email"], ExtendedString) assert result[0]["email"] == "john@example.com" assert result[0]["is_admin"] is True @@ -172,6 +180,8 @@ def test_send_message_basic(self, mock_slack_sdk): with patch("extended_data.connectors.slack.tools._get_connector", return_value=mock_connector): result = send_message(channel="general", text="Hello, world!") + assert isinstance(result, ExtendedDict) + assert isinstance(result["channel"], ExtendedString) assert result["channel"] == "general" assert result["text"] == "Hello, world!" assert result["timestamp"] == "1234567890.123456" @@ -223,9 +233,12 @@ def test_get_channel_history_basic(self, mock_slack_sdk): with patch("extended_data.connectors.slack.tools._get_connector", return_value=mock_connector): result = get_channel_history(channel="general") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert result[0]["timestamp"] == "1234567890.123456" assert result[0]["user"] == "U12345" + assert isinstance(result[0]["text"], ExtendedString) assert result[0]["text"] == "Hello, world!" def test_get_channel_history_channel_not_found(self, mock_slack_sdk): @@ -238,6 +251,7 @@ def test_get_channel_history_channel_not_found(self, mock_slack_sdk): with patch("extended_data.connectors.slack.tools._get_connector", return_value=mock_connector): result = get_channel_history(channel="nonexistent") + assert isinstance(result, ExtendedList) assert len(result) == 0 From 89448bcf60de44029d68427cfb1e4e49a7ea3404 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:47:54 -0500 Subject: [PATCH 055/287] feat: promote anthropic tool payloads --- .../connectors/anthropic/tools.py | 24 +++++++++++-------- tests/connectors/test_anthropic_tools.py | 8 +++++++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/extended_data/connectors/anthropic/tools.py b/src/extended_data/connectors/anthropic/tools.py index 1c21099..04f3707 100644 --- a/src/extended_data/connectors/anthropic/tools.py +++ b/src/extended_data/connectors/anthropic/tools.py @@ -10,6 +10,8 @@ from pydantic import BaseModel, Field +from extended_data.containers import extend_data + class CreateMessageSchema(BaseModel): """Pydantic schema for the anthropic_create_message tool.""" @@ -51,15 +53,17 @@ def anthropic_create_message( system=system, ) - return { - "id": response.id, - "text": response.text, - "model": response.model, - "usage": { - "input_tokens": response.usage.input_tokens, - "output_tokens": response.usage.output_tokens, - }, - } + return extend_data( + { + "id": response.id, + "text": response.text, + "model": response.model, + "usage": { + "input_tokens": response.usage.input_tokens, + "output_tokens": response.usage.output_tokens, + }, + } + ) def anthropic_list_models() -> list[dict[str, Any]]: @@ -73,7 +77,7 @@ def anthropic_list_models() -> list[dict[str, Any]]: connector = AnthropicConnector() models = connector.list_models() - return [{"id": m.id, "display_name": m.display_name} for m in models] + return extend_data([{"id": m.id, "display_name": m.display_name} for m in models]) TOOL_DEFINITIONS = [ diff --git a/tests/connectors/test_anthropic_tools.py b/tests/connectors/test_anthropic_tools.py index bfab7b1..fcb6204 100644 --- a/tests/connectors/test_anthropic_tools.py +++ b/tests/connectors/test_anthropic_tools.py @@ -4,6 +4,8 @@ from unittest.mock import MagicMock, patch +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + def test_anthropic_list_models(): """Test list_models tool.""" @@ -18,6 +20,9 @@ def test_anthropic_list_models(): mock_connector_class.return_value = mock_connector result = anthropic_list_models() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["id"], ExtendedString) assert len(result) == 1 assert result[0]["id"] == "claude-3-opus" @@ -38,5 +43,8 @@ def test_anthropic_create_message(): mock_connector_class.return_value = mock_connector result = anthropic_create_message(model="claude-3-opus", prompt="Hi") + assert isinstance(result, ExtendedDict) + assert isinstance(result["text"], ExtendedString) + assert isinstance(result["usage"], ExtendedDict) assert result["id"] == "msg_123" assert result["text"] == "Hello!" From 4d87d26d3d5187c1897275e76274cf555af58849 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:50:44 -0500 Subject: [PATCH 056/287] feat: promote vault connector payloads --- .../connectors/vault/__init__.py | 21 +++++++++++-------- src/extended_data/connectors/vault/tools.py | 21 ++++++++++++------- tests/connectors/test_vault_connector.py | 12 +++++++++++ tests/connectors/test_vault_tools.py | 21 +++++++++++++++---- 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 137e0b1..76a6741 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -214,7 +214,7 @@ def list_secrets( ] except VaultError as e: self.logger.warning(f"Invalid root path {display_root}: {e}") - return secrets + return self.extend_result(secrets) stack: deque[tuple[str, int]] = deque(initial_paths) @@ -247,7 +247,7 @@ def list_secrets( self.logger.warning(f"Failed to list path {current_path}: {e}") self.logger.info(f"Listed {len(secrets)} Vault secrets") - return secrets + return self.extend_result(secrets) def read_secret( self, @@ -268,7 +268,10 @@ def read_secret( path=path, mount_point=mount_point, ) - return result.get("data", {}).get("data") + data = result.get("data", {}).get("data") + if data is None: + return None + return self.extend_result(data) except VaultError as e: self.logger.warning(f"Failed to read secret {path}: {e}") return None @@ -318,7 +321,7 @@ def get_secret( + (f"/{secret_name}" if not is_nothing(secret_name) else "") + f": {e}" ) - return secret_data + return self.extend_result(secret_data) if secret_data is not None else None # No secret_name provided - search under path self.logger.info(f"Finding secrets under {path}") @@ -364,7 +367,7 @@ def get_secret( if found_match: secret_data = matching_secret_data - return secret_data + return self.extend_result(secret_data) if secret_data is not None else None def write_secret( self, @@ -421,14 +424,14 @@ def list_aws_iam_roles( response = aws_secrets.list_roles(mount_point=mount_point) except VaultError as e: self.logger.warning(f"Failed to list AWS IAM roles from mount {mount_point}: {e}") - return [] + return self.extend_result([]) role_names = response.get("data", {}).get("keys", []) or [] if name_prefix: role_names = [role for role in role_names if role.startswith(name_prefix)] self.logger.info(f"Found {len(role_names)} AWS IAM roles under mount {mount_point}") - return role_names + return self.extend_result(role_names) def get_aws_iam_role( self, @@ -461,7 +464,7 @@ def get_aws_iam_role( self.logger.warning(f"AWS IAM role {role_name} exists but returned no data") return None - return role_data + return self.extend_result(role_data) def generate_aws_credentials( self, @@ -509,7 +512,7 @@ def generate_aws_credentials( raise RuntimeError(f"Vault returned empty credentials for role {role_name}") self.logger.info(f"Generated AWS credentials for role {role_name}") - return credentials + return self.extend_result(credentials) from extended_data.connectors.vault.tools import ( diff --git a/src/extended_data/connectors/vault/tools.py b/src/extended_data/connectors/vault/tools.py index 7984d8e..01a8e7e 100644 --- a/src/extended_data/connectors/vault/tools.py +++ b/src/extended_data/connectors/vault/tools.py @@ -6,10 +6,13 @@ from __future__ import annotations +from collections.abc import Mapping from typing import Any from pydantic import BaseModel, Field +from extended_data.containers import extend_data + # ============================================================================= # Input Schemas @@ -63,10 +66,10 @@ def list_secrets( "path": path, "mount_point": mount_point, "data": data, - "key_count": len(data) if isinstance(data, dict) else 0, + "key_count": len(data) if isinstance(data, Mapping) else 0, } ) - return result + return extend_data(result) def read_secret( @@ -87,12 +90,14 @@ def read_secret( connector = VaultConnector() data = connector.read_secret(path=path, mount_point=mount_point) - return { - "path": path, - "mount_point": mount_point, - "data": data or {}, - "found": data is not None, - } + return extend_data( + { + "path": path, + "mount_point": mount_point, + "data": data or {}, + "found": data is not None, + } + ) # ============================================================================= diff --git a/tests/connectors/test_vault_connector.py b/tests/connectors/test_vault_connector.py index d3954da..6a7c48f 100644 --- a/tests/connectors/test_vault_connector.py +++ b/tests/connectors/test_vault_connector.py @@ -12,6 +12,7 @@ from hvac.exceptions import VaultError +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.connectors.vault import VaultConnector @@ -115,6 +116,9 @@ def read_side_effect(path, mount_point): secrets = connector.list_secrets() + assert isinstance(secrets, ExtendedDict) + assert isinstance(secrets["shared"], ExtendedDict) + assert isinstance(secrets["shared"]["value"], ExtendedString) assert secrets == { "shared": {"value": "shared"}, "finance/dev": {"value": "dev"}, @@ -136,6 +140,7 @@ def test_list_secrets_handles_invalid_root(self, base_connector_kwargs): secrets = connector.list_secrets(root_path="does/not/exist") + assert isinstance(secrets, ExtendedDict) assert secrets == {} mock_client.secrets.kv.v2.list_secrets.assert_called_once_with( path="does/not/exist", @@ -173,6 +178,8 @@ def test_list_aws_iam_roles_filters_prefix(self, base_connector_kwargs): roles = connector.list_aws_iam_roles(name_prefix="prod") + assert isinstance(roles, ExtendedList) + assert isinstance(roles[0], ExtendedString) assert roles == ["prod-sync"] mock_client.secrets.aws.list_roles.assert_called_once_with(mount_point="aws") @@ -190,6 +197,7 @@ def test_list_aws_iam_roles_handles_errors(self, base_connector_kwargs): roles = connector.list_aws_iam_roles() + assert isinstance(roles, ExtendedList) assert roles == [] def test_get_aws_iam_role_returns_data(self, base_connector_kwargs): @@ -206,6 +214,8 @@ def test_get_aws_iam_role_returns_data(self, base_connector_kwargs): role_data = connector.get_aws_iam_role(role_name="prod") + assert isinstance(role_data, ExtendedDict) + assert isinstance(role_data["arn"], ExtendedString) assert role_data == {"arn": "arn:aws:iam::123:role/prod"} mock_client.secrets.aws.read_role.assert_called_once_with(name="prod", mount_point="aws") @@ -239,6 +249,8 @@ def test_generate_aws_credentials_success(self, base_connector_kwargs): credentials = connector.generate_aws_credentials(role_name="prod", ttl="1h", credential_type="sts") + assert isinstance(credentials, ExtendedDict) + assert isinstance(credentials["access_key"], ExtendedString) assert credentials["access_key"] == "AKIA" mock_client.secrets.aws.generate_credentials.assert_called_once_with( name="prod", diff --git a/tests/connectors/test_vault_tools.py b/tests/connectors/test_vault_tools.py index 34ddfbe..99e7829 100644 --- a/tests/connectors/test_vault_tools.py +++ b/tests/connectors/test_vault_tools.py @@ -8,6 +8,8 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data + # Patch target for VaultConnector - must patch where it's used (in tools.py), not where it's defined VAULT_CONNECTOR_PATCH = "extended_data.connectors.vault.VaultConnector" @@ -60,14 +62,20 @@ def test_list_secrets_basic(self, mock_connector_class): from extended_data.connectors.vault.tools import list_secrets mock_connector = MagicMock() - mock_connector.list_secrets.return_value = { - "app/db-password": {"username": "admin", "password": "secret123"}, - "app/api-key": {"key": "abc123xyz"}, - } + mock_connector.list_secrets.return_value = extend_data( + { + "app/db-password": {"username": "admin", "password": "secret123"}, + "app/api-key": {"key": "abc123xyz"}, + } + ) mock_connector_class.return_value = mock_connector result = list_secrets() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["path"], ExtendedString) + assert isinstance(result[0]["data"], ExtendedDict) assert len(result) == 2 assert result[0]["path"] == "app/db-password" assert result[0]["mount_point"] == "secret" @@ -146,6 +154,9 @@ def test_read_secret_found(self, mock_connector_class): result = read_secret("app/db-password") + assert isinstance(result, ExtendedDict) + assert isinstance(result["path"], ExtendedString) + assert isinstance(result["data"], ExtendedDict) assert result["path"] == "app/db-password" assert result["mount_point"] == "secret" assert result["data"]["username"] == "admin" @@ -163,6 +174,8 @@ def test_read_secret_not_found(self, mock_connector_class): result = read_secret("app/missing-secret") + assert isinstance(result, ExtendedDict) + assert isinstance(result["data"], ExtendedDict) assert result["path"] == "app/missing-secret" assert result["mount_point"] == "secret" assert result["data"] == {} From e6207313425de20574344336653a4b61e5aa51a1 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:53:10 -0500 Subject: [PATCH 057/287] feat: promote aws tool payloads --- src/extended_data/connectors/aws/tools.py | 101 ++++++++------- tests/connectors/test_aws_tools.py | 146 +++++++++++++++------- 2 files changed, 157 insertions(+), 90 deletions(-) diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index d1c809b..9729e87 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -27,10 +27,13 @@ from __future__ import annotations +from collections.abc import Mapping from typing import Any from pydantic import BaseModel, Field +from extended_data.containers import extend_data + # ============================================================================= # Input Schemas @@ -91,7 +94,7 @@ def get_caller_account_id() -> dict[str, str]: connector = AWSConnectorFull() account_id = connector.get_caller_account_id() - return {"account_id": account_id} + return extend_data({"account_id": account_id}) def list_s3_buckets() -> list[dict[str, Any]]: @@ -104,14 +107,16 @@ def list_s3_buckets() -> list[dict[str, Any]]: connector = AWSConnectorFull() buckets = connector.list_s3_buckets() - return [ - { - "name": name, - "creation_date": str(data.get("CreationDate", "")), - "region": data.get("region", ""), - } - for name, data in buckets.items() - ] + return extend_data( + [ + { + "name": name, + "creation_date": str(data.get("CreationDate", "")), + "region": data.get("region", ""), + } + for name, data in buckets.items() + ] + ) def list_s3_objects(bucket: str) -> list[dict[str, Any]]: @@ -127,14 +132,14 @@ def list_s3_objects(bucket: str) -> list[dict[str, Any]]: connector = AWSConnectorFull() objects_raw: Any = connector.list_objects(bucket) - if isinstance(objects_raw, dict): + if isinstance(objects_raw, Mapping): objects = [{"key": key, **data} for key, data in objects_raw.items()] else: objects = objects_raw result: list[dict[str, Any]] = [] for data in objects: - if not isinstance(data, dict): + if not isinstance(data, Mapping): continue result.append( { @@ -143,7 +148,7 @@ def list_s3_objects(bucket: str) -> list[dict[str, Any]]: "last_modified": str(data.get("last_modified", data.get("LastModified", ""))), } ) - return result + return extend_data(result) def list_accounts() -> list[dict[str, Any]]: @@ -156,15 +161,17 @@ def list_accounts() -> list[dict[str, Any]]: connector = AWSConnectorFull() accounts = connector.get_accounts() - return [ - { - "id": acc_id, - "name": data.get("Name", ""), - "email": data.get("Email", ""), - "status": data.get("Status", ""), - } - for acc_id, data in accounts.items() - ] + return extend_data( + [ + { + "id": acc_id, + "name": data.get("Name", ""), + "email": data.get("Email", ""), + "status": data.get("Status", ""), + } + for acc_id, data in accounts.items() + ] + ) def list_sso_users() -> list[dict[str, Any]]: @@ -177,15 +184,17 @@ def list_sso_users() -> list[dict[str, Any]]: connector = AWSConnectorFull() users = connector.list_sso_users() - return [ - { - "user_id": user_id, - "user_name": data.get("user_name", ""), - "display_name": data.get("display_name", ""), - "email": data.get("primary_email", {}).get("value", ""), - } - for user_id, data in users.items() - ] + return extend_data( + [ + { + "user_id": user_id, + "user_name": data.get("user_name", ""), + "display_name": data.get("display_name", ""), + "email": data.get("primary_email", {}).get("value", ""), + } + for user_id, data in users.items() + ] + ) def list_sso_groups() -> list[dict[str, Any]]: @@ -198,14 +207,16 @@ def list_sso_groups() -> list[dict[str, Any]]: connector = AWSConnectorFull() groups = connector.list_sso_groups() - return [ - { - "group_id": group_id, - "display_name": data.get("display_name", ""), - "member_count": len(data.get("members", [])), - } - for group_id, data in groups.items() - ] + return extend_data( + [ + { + "group_id": group_id, + "display_name": data.get("display_name", ""), + "member_count": len(data.get("members", [])), + } + for group_id, data in groups.items() + ] + ) def list_secrets( @@ -241,7 +252,7 @@ def list_secrets( result.append({"name": name, "arn": None, "value": None}) else: result.append({"name": name, "arn": data.get("ARN"), "value": data}) - return result + return extend_data(result) def get_secret(secret_id: str) -> dict[str, Any]: @@ -257,11 +268,13 @@ def get_secret(secret_id: str) -> dict[str, Any]: connector = AWSConnectorFull() value = connector.get_secret(secret_id) - return { - "secret_name": secret_id, - "secret_value": value, - "status": "retrieved" if value is not None else "not_found", - } + return extend_data( + { + "secret_name": secret_id, + "secret_value": value, + "status": "retrieved" if value is not None else "not_found", + } + ) # ============================================================================= diff --git a/tests/connectors/test_aws_tools.py b/tests/connectors/test_aws_tools.py index 092be4f..ca77f1d 100644 --- a/tests/connectors/test_aws_tools.py +++ b/tests/connectors/test_aws_tools.py @@ -8,6 +8,8 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data + # Patch target for AWSConnectorFull - must patch where it's imported AWS_CONNECTOR_PATCH = "extended_data.connectors.aws.AWSConnectorFull" @@ -51,6 +53,25 @@ def test_tool_names_prefixed(self): assert defn["name"].startswith("aws_"), f"Tool name not prefixed: {defn['name']}" +class TestGetCallerAccountId: + """Tests for get_caller_account_id tool.""" + + @patch(AWS_CONNECTOR_PATCH) + def test_get_caller_account_id(self, mock_connector_class): + """Test account ID lookup.""" + from extended_data.connectors.aws.tools import get_caller_account_id + + mock_connector = MagicMock() + mock_connector.get_caller_account_id.return_value = "123456789012" + mock_connector_class.return_value = mock_connector + + result = get_caller_account_id() + + assert isinstance(result, ExtendedDict) + assert isinstance(result["account_id"], ExtendedString) + assert result["account_id"] == "123456789012" + + class TestListSecrets: """Tests for list_secrets tool.""" @@ -60,22 +81,28 @@ def test_list_secrets_basic(self, mock_connector_class): from extended_data.connectors.aws.tools import list_secrets mock_connector = MagicMock() - mock_connector.list_secrets.return_value = { - "my-secret": { - "ARN": "arn:aws:secretsmanager:us-east-1:123456789012:secret:my-secret", - "Description": "Test secret", - "LastChangedDate": "2024-01-01T00:00:00Z", - }, - "another-secret": { - "ARN": "arn:aws:secretsmanager:us-east-1:123456789012:secret:another-secret", - "Description": "Another test", - "LastChangedDate": "2024-01-02T00:00:00Z", - }, - } + mock_connector.list_secrets.return_value = extend_data( + { + "my-secret": { + "ARN": "arn:aws:secretsmanager:us-east-1:123456789012:secret:my-secret", + "Description": "Test secret", + "LastChangedDate": "2024-01-01T00:00:00Z", + }, + "another-secret": { + "ARN": "arn:aws:secretsmanager:us-east-1:123456789012:secret:another-secret", + "Description": "Another test", + "LastChangedDate": "2024-01-02T00:00:00Z", + }, + } + ) mock_connector_class.return_value = mock_connector result = list_secrets() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) + assert isinstance(result[0]["value"], ExtendedDict) assert len(result) == 2 assert result[0]["name"] == "my-secret" assert "arn" in result[0] @@ -108,6 +135,8 @@ def test_get_secret_basic(self, mock_connector_class): result = get_secret("my-secret") + assert isinstance(result, ExtendedDict) + assert isinstance(result["secret_name"], ExtendedString) assert result["secret_name"] == "my-secret" assert result["secret_value"] == "super-secret-value" assert result["status"] == "retrieved" @@ -122,16 +151,21 @@ def test_list_s3_buckets_basic(self, mock_connector_class): from extended_data.connectors.aws.tools import list_s3_buckets mock_connector = MagicMock() - mock_connector.list_s3_buckets.return_value = { - "my-bucket": { - "CreationDate": "2024-01-01T00:00:00Z", - "region": "us-east-1", - }, - } + mock_connector.list_s3_buckets.return_value = extend_data( + { + "my-bucket": { + "CreationDate": "2024-01-01T00:00:00Z", + "region": "us-east-1", + }, + } + ) mock_connector_class.return_value = mock_connector result = list_s3_buckets() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 1 assert result[0]["name"] == "my-bucket" assert result[0]["region"] == "us-east-1" @@ -146,17 +180,22 @@ def test_list_s3_objects_basic(self, mock_connector_class): from extended_data.connectors.aws.tools import list_s3_objects mock_connector = MagicMock() - mock_connector.list_objects.return_value = { - "file1.txt": { - "Size": 1024, - "LastModified": "2024-01-01T00:00:00Z", - "StorageClass": "STANDARD", - }, - } + mock_connector.list_objects.return_value = extend_data( + { + "file1.txt": { + "Size": 1024, + "LastModified": "2024-01-01T00:00:00Z", + "StorageClass": "STANDARD", + }, + } + ) mock_connector_class.return_value = mock_connector result = list_s3_objects("my-bucket") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["key"], ExtendedString) assert len(result) == 1 assert result[0]["key"] == "file1.txt" assert result[0]["size"] == 1024 @@ -171,17 +210,22 @@ def test_list_accounts_basic(self, mock_connector_class): from extended_data.connectors.aws.tools import list_accounts mock_connector = MagicMock() - mock_connector.get_accounts.return_value = { - "123456789012": { - "Name": "Production", - "Email": "prod@example.com", - "Status": "ACTIVE", - }, - } + mock_connector.get_accounts.return_value = extend_data( + { + "123456789012": { + "Name": "Production", + "Email": "prod@example.com", + "Status": "ACTIVE", + }, + } + ) mock_connector_class.return_value = mock_connector result = list_accounts() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 1 assert result[0]["id"] == "123456789012" assert result[0]["name"] == "Production" @@ -196,17 +240,22 @@ def test_list_sso_users_basic(self, mock_connector_class): from extended_data.connectors.aws.tools import list_sso_users mock_connector = MagicMock() - mock_connector.list_sso_users.return_value = { - "user-123": { - "user_name": "john.doe", - "display_name": "John Doe", - "primary_email": {"value": "john@example.com"}, - }, - } + mock_connector.list_sso_users.return_value = extend_data( + { + "user-123": { + "user_name": "john.doe", + "display_name": "John Doe", + "primary_email": {"value": "john@example.com"}, + }, + } + ) mock_connector_class.return_value = mock_connector result = list_sso_users() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["user_name"], ExtendedString) assert len(result) == 1 assert result[0]["user_id"] == "user-123" assert result[0]["user_name"] == "john.doe" @@ -221,17 +270,22 @@ def test_list_sso_groups_basic(self, mock_connector_class): from extended_data.connectors.aws.tools import list_sso_groups mock_connector = MagicMock() - mock_connector.list_sso_groups.return_value = { - "group-123": { - "display_name": "Admins", - "description": "Admin group", - "members": ["user-1", "user-2"], - }, - } + mock_connector.list_sso_groups.return_value = extend_data( + { + "group-123": { + "display_name": "Admins", + "description": "Admin group", + "members": ["user-1", "user-2"], + }, + } + ) mock_connector_class.return_value = mock_connector result = list_sso_groups() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["display_name"], ExtendedString) assert len(result) == 1 assert result[0]["group_id"] == "group-123" assert result[0]["display_name"] == "Admins" From b5d9c0049c1d9d77aa293aa3c37ea93a79f1aaa4 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:55:31 -0500 Subject: [PATCH 058/287] feat: promote aws secrets payloads --- src/extended_data/connectors/aws/__init__.py | 29 +++++---- tests/connectors/test_aws_connector.py | 68 ++++++++++++++++++++ 2 files changed, 83 insertions(+), 14 deletions(-) diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 4e29fce..4337eb4 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -16,11 +16,13 @@ from __future__ import annotations +from collections.abc import Mapping from typing import TYPE_CHECKING, Any from extended_data import is_nothing from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import extend_data, to_builtin from extended_data.logging import Logging @@ -272,9 +274,8 @@ def get_secret( raise ValueError(f"Failed to get secret for ID '{secret_id}'") from e if "SecretString" in response: - return response["SecretString"] - else: - return response["SecretBinary"].decode("utf-8") + return self.extend_result(response["SecretString"]) + return self.extend_result(response["SecretBinary"].decode("utf-8")) def list_secrets( self, @@ -355,7 +356,7 @@ def list_secrets( secrets[secret_name] = secret_arn self.logger.info(f"Retrieved {len(secrets)} secrets") - return secrets + return self.extend_result(secrets) def create_secret( self, @@ -389,7 +390,7 @@ def create_secret( try: response = secretsmanager.create_secret(**create_kwargs) self.logger.info(f"Created AWS secret ARN: {response.get('ARN')}") - return response + return self.extend_result(response) except ClientError as exc: self.logger.error(f"Failed to create secret {name}", exc_info=True) raise RuntimeError(f"Failed to create secret '{name}'") from exc @@ -419,7 +420,7 @@ def update_secret( try: response = secretsmanager.update_secret(SecretId=secret_id, SecretString=secret_value) self.logger.info(f"Updated AWS secret ARN: {response.get('ARN', secret_id)}") - return response + return self.extend_result(response) except ClientError as exc: self.logger.error(f"Failed to update secret {secret_id}", exc_info=True) raise RuntimeError(f"Failed to update secret '{secret_id}'") from exc @@ -457,7 +458,7 @@ def delete_secret( try: response = secretsmanager.delete_secret(**delete_kwargs) self.logger.info(f"Delete secret request submitted for: {response.get('ARN', secret_id)}") - return response + return self.extend_result(response) except ClientError as exc: self.logger.error(f"Failed to delete secret {secret_id}", exc_info=True) raise RuntimeError(f"Failed to delete secret '{secret_id}'") from exc @@ -489,18 +490,18 @@ def delete_secrets_matching( for secret_name, value in secrets.items(): if isinstance(value, str): secret_arns.append(value) - elif isinstance(value, dict) and "ARN" in value: + elif isinstance(value, Mapping) and "ARN" in value: secret_arns.append(value["ARN"]) else: self.logger.debug(f"Skipping secret {secret_name} due to missing ARN data") if not secret_arns: self.logger.info(f"No secrets found for prefix: {prefix}") - return [] + return self.extend_result([]) if dry_run: self.logger.info(f"Dry run enabled; would delete {len(secret_arns)} secrets for prefix {prefix}") - return secret_arns + return self.extend_result(secret_arns) deleted_arns: list[str] = [] for secret_arn in secret_arns: @@ -513,7 +514,7 @@ def delete_secrets_matching( deleted_arns.append(response.get("ARN", secret_arn)) self.logger.info(f"Deleted {len(deleted_arns)} secrets for prefix {prefix}") - return deleted_arns + return self.extend_result(deleted_arns) def copy_secrets_to_s3( self, @@ -545,7 +546,7 @@ def copy_secrets_to_s3( role_session_name=role_session_name, ) - body = json_module.dumps(secrets) + body = json_module.dumps(to_builtin(secrets)) s3_client.put_object( Bucket=bucket, Key=key, @@ -555,7 +556,7 @@ def copy_secrets_to_s3( s3_uri = f"s3://{bucket}/{key}" self.logger.info(f"Uploaded secrets to {s3_uri}") - return s3_uri + return self.extend_result(s3_uri) @staticmethod def load_vendors_from_asm(prefix: str = "/vendors/") -> dict[str, str]: @@ -599,7 +600,7 @@ def load_vendors_from_asm(prefix: str = "/vendors/") -> dict[str, str]: # Return empty dict if we can't access Secrets Manager pass - return vendors + return extend_data(vendors) from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments diff --git a/tests/connectors/test_aws_connector.py b/tests/connectors/test_aws_connector.py index 895c55d..f316400 100644 --- a/tests/connectors/test_aws_connector.py +++ b/tests/connectors/test_aws_connector.py @@ -12,6 +12,7 @@ from botocore.exceptions import ClientError +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.aws import AWSConnector @@ -139,6 +140,8 @@ def test_list_secrets_returns_arns_with_filters(self, base_connector_kwargs): filters = [{"Key": "description", "Values": ["prod"]}] secrets = connector.list_secrets(filters=filters, name_prefix="/vendors/") + assert isinstance(secrets, ExtendedDict) + assert isinstance(secrets["/vendors/foo"], ExtendedString) assert secrets == {"/vendors/foo": "arn:foo", "/vendors/bar": "arn:bar"} connector.get_aws_client.assert_called_once_with( client_name="secretsmanager", @@ -182,6 +185,8 @@ def test_list_secrets_fetches_values_and_skips_empty(self, base_connector_kwargs role_session_name="session", ) + assert isinstance(secrets, ExtendedDict) + assert isinstance(secrets["secret/a"], ExtendedString) assert secrets == {"secret/a": "value-a", "secret/c": "value-c"} connector.get_aws_client.assert_called_once_with( client_name="secretsmanager", @@ -229,6 +234,18 @@ def test_list_secrets_rejects_path_traversal(self, base_connector_kwargs): with pytest.raises(ValueError, match="invalid characters"): connector.list_secrets(name_prefix="secrets\x00admin") + def test_get_secret_returns_extended_string(self, base_connector_kwargs): + """Ensure get_secret promotes returned secret strings.""" + connector = AWSConnector(**base_connector_kwargs) + mock_client = MagicMock() + mock_client.get_secret_value.return_value = {"SecretString": "secret-value"} + connector.get_aws_client = MagicMock(return_value=mock_client) + + value = connector.get_secret("arn:secret:test") + + assert isinstance(value, ExtendedString) + assert value == "secret-value" + def test_create_secret_with_tags_and_description(self, base_connector_kwargs): """Ensure create_secret builds payload and sends to AWS.""" connector = AWSConnector(**base_connector_kwargs) @@ -244,6 +261,8 @@ def test_create_secret_with_tags_and_description(self, base_connector_kwargs): execution_role_arn="arn:role:override", ) + assert isinstance(response, ExtendedDict) + assert isinstance(response["ARN"], ExtendedString) assert response == {"ARN": "arn:secret:test"} connector.get_aws_client.assert_called_once_with( client_name="secretsmanager", @@ -278,6 +297,8 @@ def test_update_secret_calls_aws(self, base_connector_kwargs): execution_role_arn="arn:role:override", ) + assert isinstance(response, ExtendedDict) + assert isinstance(response["ARN"], ExtendedString) assert response == {"ARN": "arn:secret:test"} connector.get_aws_client.assert_called_once_with( client_name="secretsmanager", @@ -298,6 +319,8 @@ def test_delete_secret_with_recovery_window(self, base_connector_kwargs): execution_role_arn="arn:role:override", ) + assert isinstance(response, ExtendedDict) + assert isinstance(response["ARN"], ExtendedString) assert response == {"ARN": "arn:secret:test"} mock_client.delete_secret.assert_called_once_with(SecretId="arn:secret:test", RecoveryWindowInDays=10) @@ -335,6 +358,8 @@ def test_delete_secrets_matching_dry_run(self, base_connector_kwargs): execution_role_arn="arn:role:override", ) + assert isinstance(to_delete, ExtendedList) + assert isinstance(to_delete[0], ExtendedString) assert to_delete == ["arn:a", "arn:b"] connector.delete_secret.assert_not_called() connector.list_secrets.assert_called_once_with( @@ -357,6 +382,8 @@ def test_delete_secrets_matching_executes_delete(self, base_connector_kwargs): execution_role_arn="arn:role:override", ) + assert isinstance(deleted, ExtendedList) + assert isinstance(deleted[0], ExtendedString) assert deleted == ["arn:a", "arn:b"] connector.delete_secret.assert_has_calls( [ @@ -374,3 +401,44 @@ def test_delete_secrets_matching_executes_delete(self, base_connector_kwargs): ), ] ) + + def test_copy_secrets_to_s3_unwraps_extended_data(self, base_connector_kwargs): + """Ensure copy_secrets_to_s3 uploads JSON built from plain containers.""" + connector = AWSConnector(**base_connector_kwargs) + mock_client = MagicMock() + connector.get_aws_client = MagicMock(return_value=mock_client) + + uri = connector.copy_secrets_to_s3( + secrets=extend_data({"TOKEN": "secret-value"}), + bucket="target-bucket", + key="secrets.json", + ) + + assert isinstance(uri, ExtendedString) + assert uri == "s3://target-bucket/secrets.json" + mock_client.put_object.assert_called_once_with( + Bucket="target-bucket", + Key="secrets.json", + Body=b'{"TOKEN": "secret-value"}', + ContentType="application/json", + ) + + def test_load_vendors_from_asm_returns_extended_mapping(self): + """Ensure load_vendors_from_asm promotes loaded vendor secrets.""" + mock_secretsmanager = MagicMock() + mock_paginator = MagicMock() + mock_paginator.paginate.return_value = [{"SecretList": [{"Name": "/vendors/github_token"}]}] + mock_secretsmanager.get_paginator.return_value = mock_paginator + mock_secretsmanager.get_secret_value.return_value = {"SecretString": "ghp_test"} + + mock_session = MagicMock() + mock_session.client.return_value = mock_secretsmanager + mock_sdk = MagicMock() + mock_sdk.Session.return_value = mock_session + + with patch("extended_data.connectors.aws._load_aws_sdk", return_value=mock_sdk): + vendors = AWSConnector.load_vendors_from_asm(prefix="/vendors/") + + assert isinstance(vendors, ExtendedDict) + assert isinstance(vendors["GITHUB_TOKEN"], ExtendedString) + assert vendors == {"GITHUB_TOKEN": "ghp_test"} From d750f8c08439a5d3a9212af5f2e36617aaaa7a65 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:57:18 -0500 Subject: [PATCH 059/287] feat: promote aws s3 payloads --- src/extended_data/connectors/aws/s3.py | 35 ++++++++++++++------------ tests/connectors/test_aws_s3.py | 32 ++++++++++++++++++++++- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/extended_data/connectors/aws/s3.py b/src/extended_data/connectors/aws/s3.py index 518afe1..824e60a 100644 --- a/src/extended_data/connectors/aws/s3.py +++ b/src/extended_data/connectors/aws/s3.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any from extended_data import unhump_map +from extended_data.containers import to_builtin if TYPE_CHECKING: @@ -56,6 +57,8 @@ def get_aws_resource( **resource_args: Any, ) -> ServiceResource: ... + def extend_result(self, value: Any) -> Any: ... + def list_s3_buckets( self, unhump_buckets: bool = True, @@ -89,7 +92,7 @@ def list_s3_buckets( buckets = {k: unhump_map(v) for k, v in buckets.items()} self.logger.info(f"Retrieved {len(buckets)} buckets") - return buckets + return self.extend_result(buckets) def get_bucket_location( self, @@ -114,7 +117,7 @@ def get_bucket_location( ) response = s3.get_bucket_location(Bucket=bucket_name) - return response.get("LocationConstraint") or "us-east-1" + return self.extend_result(response.get("LocationConstraint") or "us-east-1") def get_object( self, @@ -147,7 +150,7 @@ def get_object( body = response["Body"].read() if decode: - return body.decode("utf-8") + return self.extend_result(body.decode("utf-8")) return body except ClientError as e: if e.response.get("Error", {}).get("Code") == "NoSuchKey": @@ -181,7 +184,7 @@ def get_json_object( if content is None: return None - return json.loads(content) + return self.extend_result(json.loads(content)) def put_object( self, @@ -234,7 +237,7 @@ def put_object( response = s3.put_object(**put_args) self.logger.debug(f"Put object to s3://{bucket}/{key}") - return response + return self.extend_result(response) def put_json_object( self, @@ -258,7 +261,7 @@ def put_json_object( Returns: The S3 put_object response. """ - body = json.dumps(data, indent=indent, default=str) + body = json.dumps(to_builtin(data), indent=indent, default=str) return self.put_object( bucket=bucket, key=key, @@ -294,7 +297,7 @@ def delete_object( response = s3.delete_object(Bucket=bucket, Key=key) self.logger.debug(f"Deleted object s3://{bucket}/{key}") - return response + return self.extend_result(response) def list_objects( self, @@ -349,7 +352,7 @@ def list_objects( objects = [unhump_map(o) for o in objects] self.logger.debug(f"Found {len(objects)} objects") - return objects + return self.extend_result(objects) def copy_object( self, @@ -385,7 +388,7 @@ def copy_object( CopySource={"Bucket": source_bucket, "Key": source_key}, ) self.logger.debug(f"Copied object to s3://{dest_bucket}/{dest_key}") - return response + return self.extend_result(response) # ========================================================================= # Bucket Features and Configuration @@ -418,7 +421,7 @@ def get_bucket_features( # Check if bucket exists if not bucket.creation_date: self.logger.warning(f"Bucket does not exist: {bucket_name}") - return {} + return self.extend_result({}) features: dict[str, Any] = {} @@ -454,7 +457,7 @@ def get_bucket_features( self.logger.debug("No policy for bucket") features["policy"] = None - return features + return self.extend_result(features) def find_buckets_by_name( self, @@ -498,7 +501,7 @@ def find_buckets_by_name( } self.logger.info(f"Found {len(buckets)} matching buckets") - return buckets + return self.extend_result(buckets) def create_bucket( self, @@ -561,7 +564,7 @@ def create_bucket( ) self.logger.info(f"Applied {len(tags)} tags to bucket: {bucket_name}") - return result + return self.extend_result(result) def delete_bucket( self, @@ -628,10 +631,10 @@ def get_bucket_tags( try: response = s3.get_bucket_tagging(Bucket=bucket_name) - return {tag["Key"]: tag["Value"] for tag in response.get("TagSet", [])} + return self.extend_result({tag["Key"]: tag["Value"] for tag in response.get("TagSet", [])}) except ClientError as e: if e.response.get("Error", {}).get("Code") == "NoSuchTagSet": - return {} + return self.extend_result({}) raise def set_bucket_tags( @@ -747,4 +750,4 @@ def get_bucket_sizes( } self.logger.info(f"Retrieved sizes for {len(bucket_sizes)} buckets") - return bucket_sizes + return self.extend_result(bucket_sizes) diff --git a/tests/connectors/test_aws_s3.py b/tests/connectors/test_aws_s3.py index a506275..de79fa6 100644 --- a/tests/connectors/test_aws_s3.py +++ b/tests/connectors/test_aws_s3.py @@ -15,6 +15,7 @@ from botocore.exceptions import ClientError +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.aws import AWSConnectorFull @@ -43,6 +44,8 @@ def test_list_s3_buckets(self, aws_connector): result = aws_connector.list_s3_buckets(unhump_buckets=False) + assert isinstance(result, ExtendedDict) + assert isinstance(result["bucket1"], ExtendedDict) assert len(result) == 2 assert "bucket1" in result assert "bucket2" in result @@ -70,6 +73,7 @@ def test_get_bucket_location(self, aws_connector): result = aws_connector.get_bucket_location("my-bucket") + assert isinstance(result, ExtendedString) assert result == "us-west-2" mock_s3.get_bucket_location.assert_called_once_with(Bucket="my-bucket") @@ -81,6 +85,7 @@ def test_get_bucket_location_us_east_1(self, aws_connector): result = aws_connector.get_bucket_location("my-bucket") + assert isinstance(result, ExtendedString) assert result == "us-east-1" def test_get_bucket_tags(self, aws_connector): @@ -96,6 +101,8 @@ def test_get_bucket_tags(self, aws_connector): result = aws_connector.get_bucket_tags("my-bucket") + assert isinstance(result, ExtendedDict) + assert isinstance(result["Environment"], ExtendedString) assert result == {"Environment": "dev", "Owner": "team"} def test_get_bucket_tags_no_tags(self, aws_connector): @@ -107,6 +114,7 @@ def test_get_bucket_tags_no_tags(self, aws_connector): result = aws_connector.get_bucket_tags("my-bucket") + assert isinstance(result, ExtendedDict) assert result == {} def test_get_bucket_tags_other_error(self, aws_connector): @@ -148,6 +156,7 @@ def test_get_object_success(self, aws_connector): result = aws_connector.get_object("bucket", "key.txt", decode=True) + assert isinstance(result, ExtendedString) assert result == "test content" mock_s3.get_object.assert_called_once_with(Bucket="bucket", Key="key.txt") @@ -195,6 +204,8 @@ def test_get_json_object(self, aws_connector): result = aws_connector.get_json_object("bucket", "data.json") + assert isinstance(result, ExtendedDict) + assert isinstance(result["key"], ExtendedString) assert result == test_data def test_get_json_object_not_found(self, aws_connector): @@ -216,6 +227,8 @@ def test_put_object_string(self, aws_connector): result = aws_connector.put_object("bucket", "key.txt", "test content") + assert isinstance(result, ExtendedDict) + assert isinstance(result["ETag"], ExtendedString) assert result["ETag"] == "abc123" call_args = mock_s3.put_object.call_args[1] assert call_args["Bucket"] == "bucket" @@ -280,9 +293,11 @@ def test_put_json_object(self, aws_connector): mock_s3.put_object.return_value = {"ETag": "abc123"} aws_connector.get_aws_client = MagicMock(return_value=mock_s3) - data = {"key": "value", "number": 123} + data = extend_data({"key": "value", "number": 123}) result = aws_connector.put_json_object("bucket", "data.json", data) + assert isinstance(result, ExtendedDict) + assert isinstance(result["ETag"], ExtendedString) assert result["ETag"] == "abc123" call_args = mock_s3.put_object.call_args[1] assert call_args["ContentType"] == "application/json" @@ -298,6 +313,7 @@ def test_delete_object(self, aws_connector): result = aws_connector.delete_object("bucket", "key.txt") + assert isinstance(result, ExtendedDict) assert result["DeleteMarker"] is True mock_s3.delete_object.assert_called_once_with(Bucket="bucket", Key="key.txt") @@ -319,6 +335,9 @@ def test_list_objects(self, aws_connector): result = aws_connector.list_objects("bucket", unhump_objects=False) + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["Key"], ExtendedString) assert len(result) == 3 assert result[0]["Key"] == "file1.txt" assert result[2]["Key"] == "file3.txt" @@ -357,6 +376,7 @@ def test_copy_object(self, aws_connector): result = aws_connector.copy_object("src-bucket", "src.txt", "dst-bucket", "dst.txt") + assert isinstance(result, ExtendedDict) assert "CopyObjectResult" in result mock_s3.copy_object.assert_called_once_with( Bucket="dst-bucket", @@ -399,6 +419,9 @@ def test_get_bucket_features(self, aws_connector): result = aws_connector.get_bucket_features("my-bucket") + assert isinstance(result, ExtendedDict) + assert isinstance(result["logging"], ExtendedDict) + assert isinstance(result["lifecycle_rules"], ExtendedList) assert result["logging"] == {"TargetBucket": "logs"} assert result["versioning"] == "Enabled" assert result["lifecycle_rules"] == [{"Id": "rule1"}] @@ -415,6 +438,7 @@ def test_get_bucket_features_no_bucket(self, aws_connector): result = aws_connector.get_bucket_features("missing-bucket") + assert isinstance(result, ExtendedDict) assert result == {} def test_get_bucket_features_errors(self, aws_connector): @@ -460,6 +484,8 @@ def test_find_buckets_by_name(self, aws_connector): result = aws_connector.find_buckets_by_name("app") + assert isinstance(result, ExtendedDict) + assert isinstance(result["prod-app-bucket"], ExtendedDict) assert len(result) == 2 assert "prod-app-bucket" in result assert "dev-app-bucket" in result @@ -473,6 +499,8 @@ def test_create_bucket_simple(self, aws_connector): result = aws_connector.create_bucket("my-bucket") + assert isinstance(result, ExtendedDict) + assert isinstance(result["Location"], ExtendedString) assert result["Location"] == "/my-bucket" call_args = mock_s3.create_bucket.call_args[1] assert call_args["Bucket"] == "my-bucket" @@ -579,6 +607,8 @@ def get_client(client_name, **kwargs): result = aws_connector.get_bucket_sizes(bucket_names=["test-bucket"]) + assert isinstance(result, ExtendedDict) + assert isinstance(result["test-bucket"], ExtendedDict) assert "test-bucket" in result assert result["test-bucket"]["size_bytes"] == 1073741824 assert result["test-bucket"]["size_gb"] == 1.0 From 93c5a9d926199873a095abc7092b1901a066d9f0 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 04:59:35 -0500 Subject: [PATCH 060/287] feat: promote aws organization payloads --- .../connectors/aws/organizations.py | 77 +++++++++++-------- tests/connectors/test_aws_organizations.py | 22 ++++++ 2 files changed, 65 insertions(+), 34 deletions(-) diff --git a/src/extended_data/connectors/aws/organizations.py b/src/extended_data/connectors/aws/organizations.py index e61e828..f45d417 100644 --- a/src/extended_data/connectors/aws/organizations.py +++ b/src/extended_data/connectors/aws/organizations.py @@ -16,6 +16,7 @@ from deepmerge import always_merger from extended_data import is_nothing, unhump_map +from extended_data.containers import to_builtin if TYPE_CHECKING: @@ -46,6 +47,8 @@ def get_aws_client( def get_caller_account_id(self) -> str: ... + def extend_result(self, value: Any) -> Any: ... + def get_organization_accounts( self, unhump_accounts: bool = True, @@ -144,7 +147,7 @@ def get_accounts_recursive(parent_id: str) -> dict[str, dict[str, Any]]: aws_accounts = dict(sorted(aws_accounts.items(), key=lambda x: x[1].get(key_field, ""))) self.logger.info(f"Retrieved {len(aws_accounts)} organization accounts") - return aws_accounts + return self.extend_result(aws_accounts) def get_controltower_accounts( self, @@ -211,7 +214,7 @@ def get_controltower_accounts( accounts = dict(sorted(accounts.items(), key=lambda x: x[1].get(key_field, ""))) self.logger.info(f"Retrieved {len(accounts)} Control Tower accounts") - return accounts + return self.extend_result(accounts) def get_accounts( self, @@ -237,18 +240,22 @@ def get_accounts( self.logger.info("Getting all AWS accounts") # Get organization accounts - aws_accounts = self.get_organization_accounts( - unhump_accounts=False, - sort_by_name=False, - execution_role_arn=execution_role_arn, + aws_accounts = to_builtin( + self.get_organization_accounts( + unhump_accounts=False, + sort_by_name=False, + execution_role_arn=execution_role_arn, + ) ) # Merge with Control Tower accounts if include_controltower: - controltower_accounts = self.get_controltower_accounts( - unhump_accounts=False, - sort_by_name=False, - execution_role_arn=execution_role_arn, + controltower_accounts = to_builtin( + self.get_controltower_accounts( + unhump_accounts=False, + sort_by_name=False, + execution_role_arn=execution_role_arn, + ) ) aws_accounts = always_merger.merge(aws_accounts, controltower_accounts) @@ -261,7 +268,7 @@ def get_accounts( aws_accounts = dict(sorted(aws_accounts.items(), key=lambda x: x[1].get(key_field, ""))) self.logger.info(f"Retrieved {len(aws_accounts)} total AWS accounts") - return aws_accounts + return self.extend_result(aws_accounts) def get_organization_units( self, @@ -306,7 +313,7 @@ def get_ous_recursive(parent_id: str, parent_path: str = "") -> None: org_units = {k: unhump_map(v) for k, v in org_units.items()} self.logger.info(f"Retrieved {len(org_units)} organizational units") - return org_units + return self.extend_result(org_units) # ------------------------------------------------------------------ # # Internal helpers # @@ -540,7 +547,7 @@ def classify_accounts( accounts[account_id]["classification"] = classification self.logger.info(f"Classified {len(accounts)} accounts") - return accounts + return self.extend_result(accounts) # --------------------------------------------------------------------- # # Terraform-migrated helpers # @@ -612,7 +619,7 @@ def label_aws_accounts( caller_account_id=caller_account_id, ) - return labeled_accounts + return self.extend_result(labeled_accounts) def label_aws_account( self, @@ -630,7 +637,7 @@ def label_aws_account( execution_role_arn=execution_role_arn, ) try: - return labeled_accounts[account_id] + return self.extend_result(labeled_accounts[account_id]) except KeyError as exc: # pragma: no cover - defensive guard raise KeyError(f"AWS account {account_id} not found") from exc @@ -664,7 +671,7 @@ def classify_aws_accounts( continue classified_accounts[f"{classification}{suffix_value}"].append(account_key) - return dict(classified_accounts) + return self.extend_result(dict(classified_accounts)) def preprocess_aws_organization( self, @@ -703,23 +710,25 @@ def preprocess_aws_organization( units_by_name = {unit["name"]: unit for unit in units_lookup.values() if unit.get("name")} - return { - "accounts": labeled_accounts, - "units": units_lookup, - "unit_classifications_by_name": { - name: unit.get("classifications", []) for name, unit in units_by_name.items() - }, - "accounts_by_classification": classification_lookup, - "accounts_by_name": accounts_by_name, - "accounts_by_email": accounts_by_email, - "accounts_by_key": accounts_by_key, - "organization": { - "root_id": root_id, - "organizational_units": units_lookup, - "account_count": len(labeled_accounts), - "ou_count": len(units_lookup), - }, - } + return self.extend_result( + { + "accounts": labeled_accounts, + "units": units_lookup, + "unit_classifications_by_name": { + name: unit.get("classifications", []) for name, unit in units_by_name.items() + }, + "accounts_by_classification": classification_lookup, + "accounts_by_name": accounts_by_name, + "accounts_by_email": accounts_by_email, + "accounts_by_key": accounts_by_key, + "organization": { + "root_id": root_id, + "organizational_units": units_lookup, + "account_count": len(labeled_accounts), + "ou_count": len(units_lookup), + }, + } + ) def preprocess_organization( self, @@ -776,4 +785,4 @@ def preprocess_organization( } self.logger.info(f"Preprocessed org: {len(accounts)} accounts, {len(org_units)} OUs") - return result + return self.extend_result(result) diff --git a/tests/connectors/test_aws_organizations.py b/tests/connectors/test_aws_organizations.py index c5fec18..3d3598b 100644 --- a/tests/connectors/test_aws_organizations.py +++ b/tests/connectors/test_aws_organizations.py @@ -10,6 +10,7 @@ pytest.importorskip("boto3") pytest.importorskip("botocore") +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.aws.organizations import AWSOrganizationsMixin @@ -47,6 +48,9 @@ def register_client(self, name: str, client: Any) -> None: def get_aws_client(self, client_name: str, execution_role_arn=None): return self._clients[client_name] + def extend_result(self, value: Any) -> Any: + return extend_data(value) + @pytest.fixture def organizations_connector() -> _TestAWSOrganizations: @@ -71,6 +75,9 @@ def test_classify_accounts_applies_rules(organizations_connector: _TestAWSOrgani }, ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["111111111111"], ExtendedDict) + assert isinstance(result["111111111111"]["classification"], ExtendedString) assert result["111111111111"]["classification"] == "production" assert result["222222222222"]["classification"] == "development" assert result["333333333333"]["classification"] == "sandbox" @@ -83,6 +90,7 @@ def test_classify_accounts_fetches_when_missing(mocker, organizations_connector: output = organizations_connector.classify_accounts() mock_get.assert_called_once() + assert isinstance(output, ExtendedDict) assert output["999999999999"]["classification"] == "shared" @@ -125,6 +133,8 @@ def test_preprocess_organization_compiles_sections(mocker, organizations_connect mock_classify.assert_called_once() mock_get_units.assert_called_once() + assert isinstance(result, ExtendedDict) + assert isinstance(result["accounts"], ExtendedDict) assert result["root_id"] == "r-root" assert result["account_count"] == 1 assert result["ou_count"] == 1 @@ -155,6 +165,9 @@ def test_get_accounts_merges_controltower_data(mocker, organizations_connector: mock_org.assert_called_once() mock_ctrl.assert_called_once() + assert isinstance(result, ExtendedDict) + assert isinstance(result["100"], ExtendedDict) + assert isinstance(result["100"]["name"], ExtendedString) assert list(result.keys()) == ["100", "200", "300"] assert result["200"]["managed"] is True assert result["100"]["name"] == "Alpha" @@ -191,6 +204,9 @@ def test_label_aws_accounts_builds_metadata(mocker, organizations_connector: _Te labeled = organizations_connector.label_aws_accounts(domains={"prod": "example.com"}) account = labeled["123456789012"] + assert isinstance(labeled, ExtendedDict) + assert isinstance(account, ExtendedDict) + assert isinstance(account["execution_role_arn"], ExtendedString) assert account["json_key"] == "ProdAccount" assert account["execution_role_arn"].endswith("role/CustomRole") assert account["environment"] == "prod" @@ -206,6 +222,9 @@ def test_classify_aws_accounts_generates_suffix(organizations_connector: _TestAW result = organizations_connector.classify_aws_accounts(labeled_accounts=labeled, suffix="_east") + assert isinstance(result, ExtendedDict) + assert isinstance(result["production_accounts_east"], ExtendedList) + assert isinstance(result["production_accounts_east"][0], ExtendedString) assert result["production_accounts_east"] == ["123"] assert result["development_accounts_east"] == ["456"] @@ -247,6 +266,9 @@ def list_roots(self): context = organizations_connector.preprocess_aws_organization(domains={"prod": "example.com"}) + assert isinstance(context, ExtendedDict) + assert isinstance(context["accounts_by_name"], ExtendedDict) + assert isinstance(context["organization"], ExtendedDict) assert context["organization"]["root_id"] == "r-root" assert context["accounts_by_name"]["Prod Account"]["email"] == "prod@example.com" assert context["accounts_by_classification"]["production_accounts"] == ["123"] From fe2c596002c93f9bf4af26d5306781c9fef14b13 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:01:18 -0500 Subject: [PATCH 061/287] feat: promote aws sso payloads --- src/extended_data/connectors/aws/sso.py | 32 ++++++++++++++----------- tests/connectors/test_aws_sso.py | 23 ++++++++++++++++++ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/extended_data/connectors/aws/sso.py b/src/extended_data/connectors/aws/sso.py index 881cc71..e74f058 100644 --- a/src/extended_data/connectors/aws/sso.py +++ b/src/extended_data/connectors/aws/sso.py @@ -40,6 +40,8 @@ def get_aws_client( **client_args: Any, ) -> Any: ... + def extend_result(self, value: Any) -> Any: ... + def get_identity_store_id( self, execution_role_arn: str | None = None, @@ -72,7 +74,7 @@ def get_identity_store_id( identity_store_id = instance_list[0]["IdentityStoreId"] self.logger.info(f"Identity store ID: {identity_store_id}") - return identity_store_id + return self.extend_result(identity_store_id) def get_sso_instance_arn( self, @@ -106,7 +108,7 @@ def get_sso_instance_arn( instance_arn = instance_list[0]["InstanceArn"] self.logger.info(f"SSO instance ARN: {instance_arn}") - return instance_arn + return self.extend_result(instance_arn) # ========================================================================= # Users @@ -177,7 +179,7 @@ def list_sso_users( users = {k: unhump_map(v) for k, v in users.items()} self.logger.info(f"Retrieved {len(users)} SSO users") - return users + return self.extend_result(users) def get_sso_user( self, @@ -208,9 +210,11 @@ def get_sso_user( ) try: - return identitystore.describe_user( - IdentityStoreId=identity_store_id, - UserId=user_id, + return self.extend_result( + identitystore.describe_user( + IdentityStoreId=identity_store_id, + UserId=user_id, + ) ) except ClientError as e: if e.response.get("Error", {}).get("Code") == "ResourceNotFoundException": @@ -270,7 +274,7 @@ def create_sso_user( result = identitystore.create_user(**user_body) self.logger.info(f"Created SSO user: {user_name} ({result.get('UserId')})") - return result + return self.extend_result(result) def delete_sso_user( self, @@ -385,7 +389,7 @@ def list_sso_groups( groups = {k: unhump_map(v) for k, v in groups.items()} self.logger.info(f"Retrieved {len(groups)} SSO groups") - return groups + return self.extend_result(groups) def _get_group_members( self, @@ -472,7 +476,7 @@ def create_sso_group( Description=description, ) self.logger.info(f"Created SSO group: {display_name} ({result.get('GroupId')})") - return result + return self.extend_result(result) def delete_sso_group( self, @@ -539,7 +543,7 @@ def add_user_to_group( MemberId={"UserId": user_id}, ) self.logger.info(f"Added user {user_id} to group {group_id}") - return result + return self.extend_result(result) def remove_user_from_group( self, @@ -660,7 +664,7 @@ def list_permission_sets( permission_sets = {k: unhump_map(v) for k, v in permission_sets.items()} self.logger.info(f"Retrieved {len(permission_sets)} permission sets") - return permission_sets + return self.extend_result(permission_sets) def _get_managed_policies_for_permission_set( self, @@ -747,7 +751,7 @@ def list_account_assignments( assignments = [unhump_map(a) for a in assignments] self.logger.info(f"Retrieved {len(assignments)} assignments for {account_id}") - return assignments + return self.extend_result(assignments) def create_account_assignment( self, @@ -791,7 +795,7 @@ def create_account_assignment( PrincipalId=principal_id, ) self.logger.info(f"Created account assignment for {principal_id}") - return result + return self.extend_result(result) def delete_account_assignment( self, @@ -835,4 +839,4 @@ def delete_account_assignment( PrincipalId=principal_id, ) self.logger.info(f"Deleted account assignment for {principal_id}") - return result + return self.extend_result(result) diff --git a/tests/connectors/test_aws_sso.py b/tests/connectors/test_aws_sso.py index 61c61c4..bfd59dc 100644 --- a/tests/connectors/test_aws_sso.py +++ b/tests/connectors/test_aws_sso.py @@ -12,6 +12,7 @@ from botocore.exceptions import ClientError +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.connectors.aws import AWSConnectorFull @@ -42,6 +43,7 @@ def test_get_identity_store_id(self, aws_connector): result = aws_connector.get_identity_store_id() + assert isinstance(result, ExtendedString) assert result == "d-1234567890" aws_connector.get_aws_client.assert_called_once_with(client_name="sso-admin", execution_role_arn=None) @@ -69,6 +71,7 @@ def test_get_sso_instance_arn(self, aws_connector): result = aws_connector.get_sso_instance_arn() + assert isinstance(result, ExtendedString) assert result == "arn:aws:sso:::instance/ssoins-1234567890" def test_get_sso_instance_arn_no_instance(self, aws_connector): @@ -113,6 +116,9 @@ def get_client(client_name, **kwargs): result = aws_connector.list_sso_users(unhump_users=False, flatten_name=False) + assert isinstance(result, ExtendedDict) + assert isinstance(result["user-1"], ExtendedDict) + assert isinstance(result["user-1"]["UserName"], ExtendedString) assert len(result) == 2 assert "user-1" in result assert "user-2" in result @@ -142,6 +148,8 @@ def get_client(client_name, **kwargs): result = aws_connector.list_sso_users(unhump_users=False, flatten_name=True, identity_store_id="d-1234567890") + assert isinstance(result, ExtendedDict) + assert isinstance(result["user-1"], ExtendedDict) assert len(result) == 1 assert result["user-1"]["GivenName"] == "John" assert result["user-1"]["FamilyName"] == "Doe" @@ -200,6 +208,8 @@ def test_get_sso_user(self, aws_connector): result = aws_connector.get_sso_user("user-1", identity_store_id="d-1234567890") + assert isinstance(result, ExtendedDict) + assert isinstance(result["UserName"], ExtendedString) assert result["UserId"] == "user-1" assert result["UserName"] == "john.doe" @@ -259,6 +269,9 @@ def get_client(client_name, **kwargs): result = aws_connector.list_sso_groups(unhump_groups=False) + assert isinstance(result, ExtendedDict) + assert isinstance(result["group-1"], ExtendedDict) + assert isinstance(result["group-1"]["DisplayName"], ExtendedString) assert len(result) == 2 assert "group-1" in result assert result["group-1"]["DisplayName"] == "Admins" @@ -282,6 +295,8 @@ def get_client(client_name, **kwargs): result = aws_connector.create_sso_group("Admins", description="Admin group") + assert isinstance(result, ExtendedDict) + assert isinstance(result["GroupId"], ExtendedString) assert result["GroupId"] == "group-1" mock_identitystore.create_group.assert_called_once() @@ -340,8 +355,11 @@ def test_list_permission_sets(self, aws_connector): result = aws_connector.list_permission_sets(unhump_sets=False) + assert isinstance(result, ExtendedDict) assert len(result) == 2 ps1_arn = "arn:aws:sso:::permissionSet/ssoins-1234567890/ps-1" + assert isinstance(result[ps1_arn], ExtendedDict) + assert isinstance(result[ps1_arn]["Name"], ExtendedString) assert ps1_arn in result assert result[ps1_arn]["Name"] == "AdminAccess" @@ -372,6 +390,9 @@ def test_list_account_assignments(self, aws_connector): unhump_assignments=False, ) + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["AccountId"], ExtendedString) assert len(result) == 1 assert result[0]["AccountId"] == "123456789012" assert result[0]["PrincipalType"] == "USER" @@ -398,5 +419,7 @@ def test_create_account_assignment(self, aws_connector): principal_type="USER", ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["AccountAssignmentCreationStatus"], ExtendedDict) assert "AccountAssignmentCreationStatus" in result mock_sso_admin.create_account_assignment.assert_called_once() From 5d304681f859139d49383e8f46762639ce13db6a Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:02:26 -0500 Subject: [PATCH 062/287] feat: promote aws codedeploy payloads --- .../connectors/aws/codedeploy.py | 27 +++++++++++-------- tests/connectors/test_aws_codedeploy.py | 8 ++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/extended_data/connectors/aws/codedeploy.py b/src/extended_data/connectors/aws/codedeploy.py index 301de4c..f978090 100644 --- a/src/extended_data/connectors/aws/codedeploy.py +++ b/src/extended_data/connectors/aws/codedeploy.py @@ -12,6 +12,7 @@ from typing import TYPE_CHECKING, Any from extended_data.connectors.aws import AWSConnector +from extended_data.containers import extend_data from extended_data.logging import Logging @@ -252,12 +253,14 @@ def get_aws_codedeploy_deployments( ) _ = connector # appease linters when we instantiate a connector internally - return { - "deployment_ids": deployment_ids, - "deployments": deployment_infos, - "next_token": final_token, - "pages": pages, - } + return extend_data( + { + "deployment_ids": deployment_ids, + "deployments": deployment_infos, + "next_token": final_token, + "pages": pages, + } + ) def create_codedeploy_deployment( @@ -356,8 +359,10 @@ def create_codedeploy_deployment( deployment_info = _safe_get_deployment(client, deployment_id, logger) _ = connector - return { - "deployment_id": deployment_id, - "status": deployment_info.get("status") if deployment_info else None, - "deployment_info": deployment_info, - } + return extend_data( + { + "deployment_id": deployment_id, + "status": deployment_info.get("status") if deployment_info else None, + "deployment_info": deployment_info, + } + ) diff --git a/tests/connectors/test_aws_codedeploy.py b/tests/connectors/test_aws_codedeploy.py index f3f9ebd..cb7c914 100644 --- a/tests/connectors/test_aws_codedeploy.py +++ b/tests/connectors/test_aws_codedeploy.py @@ -11,6 +11,7 @@ from botocore.exceptions import ClientError, WaiterError +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.connectors.aws.codedeploy import ( create_codedeploy_deployment, get_aws_codedeploy_deployments, @@ -46,6 +47,10 @@ def test_returns_details_and_normalizes_statuses(self): codedeploy_client=codedeploy_client, ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["deployment_ids"], ExtendedList) + assert isinstance(result["deployment_ids"][0], ExtendedString) + assert isinstance(result["deployments"][0], ExtendedDict) assert result["deployment_ids"] == ["dep-1", "dep-2", "dep-3"] assert [item["deploymentId"] for item in result["deployments"]] == ["dep-1", "dep-2", "dep-3"] @@ -82,6 +87,9 @@ def test_waits_for_success_and_returns_details(self): codedeploy_client=codedeploy_client, ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["deployment_id"], ExtendedString) + assert isinstance(result["deployment_info"], ExtendedDict) assert result["deployment_id"] == "dep-123" assert result["status"] == "Succeeded" waiter.wait.assert_called_once_with( From 898621e7a7abf87c1b929772beaceb3c7d1b2dd7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:04:40 -0500 Subject: [PATCH 063/287] feat: promote google tool payloads --- src/extended_data/connectors/google/tools.py | 26 +-- tests/connectors/test_google_tools.py | 197 ++++++++++++------- 2 files changed, 144 insertions(+), 79 deletions(-) diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index 8c2b154..35f653d 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -25,10 +25,13 @@ from __future__ import annotations +from collections.abc import Mapping from typing import Any from pydantic import BaseModel, Field +from extended_data.containers import extend_data + # ============================================================================= # Input Schemas @@ -114,7 +117,7 @@ def list_projects( } ) - return result + return extend_data(result) def list_folders( @@ -147,7 +150,7 @@ def list_folders( } ) - return result + return extend_data(result) def list_enabled_services( @@ -179,7 +182,7 @@ def list_enabled_services( } ) - return result + return extend_data(result) def list_billing_accounts( @@ -210,7 +213,7 @@ def list_billing_accounts( } ) - return result + return extend_data(result) def list_workspace_users( @@ -234,24 +237,25 @@ def list_workspace_users( flatten_names=True, key_by_email=False, ) - users = list(users_raw.values()) if isinstance(users_raw, dict) else users_raw + users = list(users_raw.values()) if isinstance(users_raw, Mapping) else users_raw # Limit results and extract key fields result: list[dict[str, Any]] = [] for user in users[:max_results]: - if not isinstance(user, dict): + if not isinstance(user, Mapping): continue + name = user.get("name", {}) result.append( { "email": user.get("primaryEmail", ""), - "name": user.get("name", {}).get("fullName", "") if isinstance(user.get("name"), dict) else "", + "name": name.get("fullName", "") if isinstance(name, Mapping) else "", "full_name": user.get("full_name", ""), "suspended": user.get("suspended", False), "org_unit_path": user.get("orgUnitPath", ""), } ) - return result + return extend_data(result) def list_workspace_groups( @@ -274,12 +278,12 @@ def list_workspace_groups( domain=domain or None, key_by_email=False, ) - groups = list(groups_raw.values()) if isinstance(groups_raw, dict) else groups_raw + groups = list(groups_raw.values()) if isinstance(groups_raw, Mapping) else groups_raw # Limit results and extract key fields result: list[dict[str, Any]] = [] for group in groups[:max_results]: - if not isinstance(group, dict): + if not isinstance(group, Mapping): continue result.append( { @@ -290,7 +294,7 @@ def list_workspace_groups( } ) - return result + return extend_data(result) # ============================================================================= diff --git a/tests/connectors/test_google_tools.py b/tests/connectors/test_google_tools.py index ccf89e8..7057ea2 100644 --- a/tests/connectors/test_google_tools.py +++ b/tests/connectors/test_google_tools.py @@ -8,6 +8,8 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data + # Patch target for GoogleConnectorFull - must patch where it's imported GOOGLE_CONNECTOR_PATCH = "extended_data.connectors.google.GoogleConnectorFull" @@ -66,24 +68,29 @@ def test_list_projects_basic(self, mock_connector_class): from extended_data.connectors.google.tools import list_projects mock_connector = MagicMock() - mock_connector.list_projects.return_value = [ - { - "projectId": "my-project-123", - "displayName": "My Project", - "state": "ACTIVE", - "parent": "organizations/123456", - }, - { - "projectId": "another-project-456", - "name": "projects/another-project-456", - "state": "ACTIVE", - "parent": "folders/789", - }, - ] + mock_connector.list_projects.return_value = extend_data( + [ + { + "projectId": "my-project-123", + "displayName": "My Project", + "state": "ACTIVE", + "parent": "organizations/123456", + }, + { + "projectId": "another-project-456", + "name": "projects/another-project-456", + "state": "ACTIVE", + "parent": "folders/789", + }, + ] + ) mock_connector_class.return_value = mock_connector result = list_projects() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["project_id"], ExtendedString) assert len(result) == 2 assert result[0]["project_id"] == "my-project-123" assert result[0]["name"] == "My Project" @@ -120,6 +127,36 @@ def test_list_projects_max_results(self, mock_connector_class): assert len(result) == 50 +class TestListFolders: + """Tests for list_folders tool.""" + + @patch(GOOGLE_CONNECTOR_PATCH) + def test_list_folders_basic(self, mock_connector_class): + """Test basic list_folders functionality.""" + from extended_data.connectors.google.tools import list_folders + + mock_connector = MagicMock() + mock_connector.list_folders.return_value = extend_data( + [ + { + "name": "folders/123", + "displayName": "Engineering", + "state": "ACTIVE", + "parent": "organizations/456", + } + ] + ) + mock_connector_class.return_value = mock_connector + + result = list_folders(parent="organizations/456") + + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["display_name"], ExtendedString) + assert result[0]["name"] == "folders/123" + assert result[0]["display_name"] == "Engineering" + + class TestListEnabledServices: """Tests for list_enabled_services tool.""" @@ -129,22 +166,27 @@ def test_list_enabled_services_basic(self, mock_connector_class): from extended_data.connectors.google.tools import list_enabled_services mock_connector = MagicMock() - mock_connector.list_enabled_services.return_value = [ - { - "name": "projects/123/services/compute.googleapis.com", - "config": {"title": "Compute Engine API"}, - "state": "ENABLED", - }, - { - "name": "projects/123/services/storage.googleapis.com", - "config": {"title": "Cloud Storage API"}, - "state": "ENABLED", - }, - ] + mock_connector.list_enabled_services.return_value = extend_data( + [ + { + "name": "projects/123/services/compute.googleapis.com", + "config": {"title": "Compute Engine API"}, + "state": "ENABLED", + }, + { + "name": "projects/123/services/storage.googleapis.com", + "config": {"title": "Cloud Storage API"}, + "state": "ENABLED", + }, + ] + ) mock_connector_class.return_value = mock_connector result = list_enabled_services(project_id="my-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["title"], ExtendedString) assert len(result) == 2 assert result[0]["name"] == "projects/123/services/compute.googleapis.com" assert result[0]["title"] == "Compute Engine API" @@ -173,24 +215,29 @@ def test_list_billing_accounts_basic(self, mock_connector_class): from extended_data.connectors.google.tools import list_billing_accounts mock_connector = MagicMock() - mock_connector.list_billing_accounts.return_value = [ - { - "name": "billingAccounts/012345-6789AB-CDEF01", - "displayName": "My Billing Account", - "open": True, - "masterBillingAccount": "", - }, - { - "name": "billingAccounts/ABCDEF-123456-789012", - "displayName": "Another Billing", - "open": False, - "masterBillingAccount": "billingAccounts/012345-6789AB-CDEF01", - }, - ] + mock_connector.list_billing_accounts.return_value = extend_data( + [ + { + "name": "billingAccounts/012345-6789AB-CDEF01", + "displayName": "My Billing Account", + "open": True, + "masterBillingAccount": "", + }, + { + "name": "billingAccounts/ABCDEF-123456-789012", + "displayName": "Another Billing", + "open": False, + "masterBillingAccount": "billingAccounts/012345-6789AB-CDEF01", + }, + ] + ) mock_connector_class.return_value = mock_connector result = list_billing_accounts() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["display_name"], ExtendedString) assert len(result) == 2 assert "billingAccounts/" in result[0]["name"] assert result[0]["display_name"] == "My Billing Account" @@ -220,26 +267,31 @@ def test_list_workspace_users_basic(self, mock_connector_class): from extended_data.connectors.google.tools import list_workspace_users mock_connector = MagicMock() - mock_connector.list_users.return_value = [ - { - "primaryEmail": "john.doe@example.com", - "name": {"fullName": "John Doe"}, - "full_name": "John Doe", - "suspended": False, - "orgUnitPath": "/", - }, + mock_connector.list_users.return_value = extend_data( { - "primaryEmail": "jane.smith@example.com", - "name": {"fullName": "Jane Smith"}, - "full_name": "Jane Smith", - "suspended": False, - "orgUnitPath": "/Engineering", - }, - ] + "john.doe@example.com": { + "primaryEmail": "john.doe@example.com", + "name": {"fullName": "John Doe"}, + "full_name": "John Doe", + "suspended": False, + "orgUnitPath": "/", + }, + "jane.smith@example.com": { + "primaryEmail": "jane.smith@example.com", + "name": {"fullName": "Jane Smith"}, + "full_name": "Jane Smith", + "suspended": False, + "orgUnitPath": "/Engineering", + }, + } + ) mock_connector_class.return_value = mock_connector result = list_workspace_users() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["email"], ExtendedString) assert len(result) == 2 assert result[0]["email"] == "john.doe@example.com" assert result[0]["full_name"] == "John Doe" @@ -283,6 +335,8 @@ def test_list_workspace_users_suspended(self, mock_connector_class): result = list_workspace_users() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 1 assert result[0]["suspended"] is True @@ -296,24 +350,29 @@ def test_list_workspace_groups_basic(self, mock_connector_class): from extended_data.connectors.google.tools import list_workspace_groups mock_connector = MagicMock() - mock_connector.list_groups.return_value = [ + mock_connector.list_groups.return_value = extend_data( { - "email": "admins@example.com", - "name": "Admins", - "description": "Administrator group", - "directMembersCount": 5, - }, - { - "email": "developers@example.com", - "name": "Developers", - "description": "Development team", - "directMembersCount": 25, - }, - ] + "admins@example.com": { + "email": "admins@example.com", + "name": "Admins", + "description": "Administrator group", + "directMembersCount": 5, + }, + "developers@example.com": { + "email": "developers@example.com", + "name": "Developers", + "description": "Development team", + "directMembersCount": 25, + }, + } + ) mock_connector_class.return_value = mock_connector result = list_workspace_groups() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["email"], ExtendedString) assert len(result) == 2 assert result[0]["email"] == "admins@example.com" assert result[0]["name"] == "Admins" @@ -355,6 +414,8 @@ def test_list_workspace_groups_empty_description(self, mock_connector_class): result = list_workspace_groups() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 1 assert result[0]["description"] == "" assert result[0]["direct_members_count"] == 0 From 47c8352745c8a125c4292bd09390e4ba2655cc02 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:06:21 -0500 Subject: [PATCH 064/287] feat: promote google cloud payloads --- src/extended_data/connectors/google/cloud.py | 37 +++++++++-------- tests/connectors/test_google_cloud.py | 43 ++++++++++++++++---- 2 files changed, 55 insertions(+), 25 deletions(-) diff --git a/src/extended_data/connectors/google/cloud.py b/src/extended_data/connectors/google/cloud.py index 0c948c7..7f98119 100644 --- a/src/extended_data/connectors/google/cloud.py +++ b/src/extended_data/connectors/google/cloud.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any from extended_data import unhump_map +from extended_data.containers import to_builtin class GoogleCloudMixin: @@ -27,6 +28,8 @@ def get_cloud_resource_manager_service(self) -> Any: ... def get_iam_service(self) -> Any: ... + def extend_result(self, value: Any) -> Any: ... + def get_organization_id(self) -> str: """Get the Google Cloud organization ID. @@ -49,7 +52,7 @@ def get_organization_id(self) -> str: org_name = organizations[0]["name"] org_id = org_name.split("/")[-1] self.logger.info(f"Organization ID: {org_id}") - return org_id + return self.extend_result(org_id) def get_organization(self) -> dict[str, Any]: """Get the Google Cloud organization details. @@ -70,7 +73,7 @@ def get_organization(self) -> dict[str, Any]: msg = "No organizations found" raise RuntimeError(msg) - return organizations[0] + return self.extend_result(organizations[0]) def list_projects( self, @@ -115,7 +118,7 @@ def list_projects( if unhump_projects: projects = [unhump_map(p) for p in projects] - return projects + return self.extend_result(projects) def get_project(self, project_id: str) -> dict[str, Any] | None: """Get a specific Google Cloud project. @@ -131,7 +134,7 @@ def get_project(self, project_id: str) -> dict[str, Any] | None: service = self.get_cloud_resource_manager_service() try: - return service.projects().get(name=f"projects/{project_id}").execute() + return self.extend_result(service.projects().get(name=f"projects/{project_id}").execute()) except HttpError as e: if e.resp.status == 404: self.logger.warning(f"Project not found: {project_id}") @@ -167,11 +170,11 @@ def create_project( if parent: project_body["parent"] = parent if labels: - project_body["labels"] = labels + project_body["labels"] = to_builtin(labels) result = service.projects().create(body=project_body).execute() self.logger.info(f"Created project: {project_id}") - return result + return self.extend_result(result) def delete_project(self, project_id: str) -> dict[str, Any]: """Delete a Google Cloud project. @@ -187,7 +190,7 @@ def delete_project(self, project_id: str) -> dict[str, Any]: result = service.projects().delete(name=f"projects/{project_id}").execute() self.logger.info(f"Deleted project: {project_id}") - return result + return self.extend_result(result) def move_project( self, @@ -215,7 +218,7 @@ def move_project( .execute() ) self.logger.info(f"Moved project {project_id}") - return result + return self.extend_result(result) def list_folders( self, @@ -254,7 +257,7 @@ def list_folders( if unhump_folders: folders = [unhump_map(f) for f in folders] - return folders + return self.extend_result(folders) def get_org_policy( self, @@ -275,7 +278,7 @@ def get_org_policy( service = self.get_cloud_resource_manager_service() try: - return ( + return self.extend_result( service.organizations() .getOrgPolicy( resource=resource, @@ -305,11 +308,11 @@ def set_org_policy( self.logger.info(f"Setting org policy on {resource}") service = self.get_cloud_resource_manager_service() - return ( + return self.extend_result( service.organizations() .setOrgPolicy( resource=resource, - body={"policy": policy}, + body={"policy": to_builtin(policy)}, ) .execute() ) @@ -358,7 +361,7 @@ def get_iam_policy( .execute() ) - return result + return self.extend_result(result) def set_iam_policy( self, @@ -379,7 +382,7 @@ def set_iam_policy( self.logger.info(f"Setting IAM policy on {resource_type}/{resource}") service = self.get_cloud_resource_manager_service() - body = {"policy": policy} + body = {"policy": to_builtin(policy)} if resource_type == "projects": result = ( @@ -410,7 +413,7 @@ def set_iam_policy( ) self.logger.info(f"Set IAM policy on {resource_type}/{resource}") - return result + return self.extend_result(result) def add_iam_binding( self, @@ -488,7 +491,7 @@ def list_service_accounts( if unhump_accounts: accounts = [unhump_map(a) for a in accounts] - return accounts + return self.extend_result(accounts) def create_service_account( self, @@ -528,4 +531,4 @@ def create_service_account( ) self.logger.info(f"Created service account: {result.get('email')}") - return result + return self.extend_result(result) diff --git a/tests/connectors/test_google_cloud.py b/tests/connectors/test_google_cloud.py index 3d05059..efecdc5 100644 --- a/tests/connectors/test_google_cloud.py +++ b/tests/connectors/test_google_cloud.py @@ -10,6 +10,7 @@ pytest.importorskip("google.oauth2.service_account") pytest.importorskip("googleapiclient") +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.google import GoogleConnectorFull @@ -43,6 +44,7 @@ def test_get_organization_id(self, google_connector): result = google_connector.get_organization_id() + assert isinstance(result, ExtendedString) assert result == "123456789" def test_get_organization_id_no_org(self, google_connector): @@ -72,6 +74,8 @@ def test_get_organization(self, google_connector): result = google_connector.get_organization() + assert isinstance(result, ExtendedDict) + assert isinstance(result["displayName"], ExtendedString) assert result["displayName"] == "Test Org" assert result["lifecycleState"] == "ACTIVE" @@ -103,6 +107,9 @@ def test_list_projects(self, google_connector): result = google_connector.list_projects() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["projectId"], ExtendedString) assert len(result) == 2 assert result[0]["projectId"] == "project-1" @@ -165,6 +172,8 @@ def test_get_project(self, google_connector): result = google_connector.get_project("test-project") + assert isinstance(result, ExtendedDict) + assert isinstance(result["projectId"], ExtendedString) assert result["projectId"] == "test-project" assert result["lifecycleState"] == "ACTIVE" @@ -180,6 +189,8 @@ def test_create_project(self, google_connector): result = google_connector.create_project("new-project", "New Project") + assert isinstance(result, ExtendedDict) + assert isinstance(result["projectId"], ExtendedString) assert result["projectId"] == "new-project" def test_delete_project(self, google_connector): @@ -211,6 +222,9 @@ def test_list_folders(self, google_connector): result = google_connector.list_folders(parent="organizations/123456") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["displayName"], ExtendedString) assert len(result) == 2 assert result[0]["displayName"] == "Folder One" @@ -235,6 +249,8 @@ def test_get_iam_policy(self, google_connector): result = google_connector.get_iam_policy("test-project") + assert isinstance(result, ExtendedDict) + assert isinstance(result["bindings"], ExtendedList) assert len(result["bindings"]) == 1 assert result["bindings"][0]["role"] == "roles/owner" @@ -252,17 +268,23 @@ def test_set_iam_policy(self, google_connector): } google_connector.get_cloud_resource_manager_service = MagicMock(return_value=mock_service) - policy = { - "bindings": [ - { - "role": "roles/viewer", - "members": ["user:viewer@example.com"], - } - ] - } + policy = extend_data( + { + "bindings": [ + { + "role": "roles/viewer", + "members": ["user:viewer@example.com"], + } + ] + } + ) result = google_connector.set_iam_policy("test-project", policy) + assert isinstance(result, ExtendedDict) + assert isinstance(result["bindings"], ExtendedList) assert result["bindings"][0]["role"] == "roles/viewer" + call_body = mock_projects.setIamPolicy.call_args.kwargs["body"] + assert isinstance(call_body["policy"], dict) def test_list_service_accounts(self, google_connector): """Test listing service accounts.""" @@ -284,6 +306,9 @@ def test_list_service_accounts(self, google_connector): result = google_connector.list_service_accounts("test-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["displayName"], ExtendedString) assert len(result) == 2 assert result[0]["displayName"] == "Service Account 1" @@ -303,4 +328,6 @@ def test_create_service_account(self, google_connector): "New Service Account", ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["displayName"], ExtendedString) assert result["displayName"] == "New Service Account" From 76b9d7c970c89ef284c121fcd4fe7e3e160639c6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:08:05 -0500 Subject: [PATCH 065/287] feat: promote google billing payloads --- .../connectors/google/billing.py | 53 +++++++++++-------- tests/connectors/test_google_billing.py | 14 +++++ 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/src/extended_data/connectors/google/billing.py b/src/extended_data/connectors/google/billing.py index 821e3bf..18b1060 100644 --- a/src/extended_data/connectors/google/billing.py +++ b/src/extended_data/connectors/google/billing.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any from extended_data import unhump_map +from extended_data.containers import to_builtin class GoogleBillingMixin: @@ -25,6 +26,8 @@ class GoogleBillingMixin: def get_billing_service(self) -> Any: ... + def extend_result(self, value: Any) -> Any: ... + def list_billing_accounts( self, filter_query: str | None = None, @@ -64,7 +67,7 @@ def list_billing_accounts( if unhump_accounts: accounts = [unhump_map(a) for a in accounts] - return accounts + return self.extend_result(accounts) def get_billing_account(self, billing_account_id: str) -> dict[str, Any] | None: """Get a specific billing account. @@ -83,7 +86,7 @@ def get_billing_account(self, billing_account_id: str) -> dict[str, Any] | None: name = f"billingAccounts/{billing_account_id}" try: - return service.billingAccounts().get(name=name).execute() + return self.extend_result(service.billingAccounts().get(name=name).execute()) except HttpError as e: if e.resp.status == 404: self.logger.warning(f"Billing account not found: {billing_account_id}") @@ -104,7 +107,7 @@ def get_project_billing_info(self, project_id: str) -> dict[str, Any] | None: service = self.get_billing_service() try: - return service.projects().getBillingInfo(name=f"projects/{project_id}").execute() + return self.extend_result(service.projects().getBillingInfo(name=f"projects/{project_id}").execute()) except HttpError as e: if e.resp.status == 404: self.logger.warning(f"Project billing info not found: {project_id}") @@ -141,7 +144,7 @@ def update_project_billing_info( ) self.logger.info(f"Linked project {project_id} to billing account") - return result + return self.extend_result(result) def disable_project_billing(self, project_id: str) -> dict[str, Any]: """Disable billing for a project. @@ -165,7 +168,7 @@ def disable_project_billing(self, project_id: str) -> dict[str, Any]: ) self.logger.info(f"Disabled billing for project {project_id}") - return result + return self.extend_result(result) def list_billing_account_projects( self, @@ -208,7 +211,7 @@ def list_billing_account_projects( if unhump_projects: projects = [unhump_map(p) for p in projects] - return projects + return self.extend_result(projects) def get_billing_account_iam_policy( self, @@ -228,7 +231,7 @@ def get_billing_account_iam_policy( if not name.startswith("billingAccounts/"): name = f"billingAccounts/{billing_account_id}" - return service.billingAccounts().getIamPolicy(resource=name).execute() + return self.extend_result(service.billingAccounts().getIamPolicy(resource=name).execute()) def set_billing_account_iam_policy( self, @@ -251,11 +254,11 @@ def set_billing_account_iam_policy( if not name.startswith("billingAccounts/"): name = f"billingAccounts/{billing_account_id}" - return ( + return self.extend_result( service.billingAccounts() .setIamPolicy( resource=name, - body={"policy": policy}, + body={"policy": to_builtin(policy)}, ) .execute() ) @@ -300,13 +303,15 @@ def get_bigquery_billing_dataset( t for t in tables if "gcp_billing_export" in t.get("tableReference", {}).get("tableId", "") ] - return { - "dataset": dataset, - "tables": tables, - "billing_tables": billing_tables, - "location": dataset.get("location"), - "description": dataset.get("description"), - } + return self.extend_result( + { + "dataset": dataset, + "tables": tables, + "billing_tables": billing_tables, + "location": dataset.get("location"), + "description": dataset.get("description"), + } + ) except HttpError as e: if e.resp.status == 404: @@ -374,10 +379,12 @@ def setup_billing_export( dataset = service.datasets().insert(projectId=project_id, body=dataset_body).execute() self.logger.info(f"Created billing export dataset: {dataset_id}") - return { - "billing_account_id": billing_account_id, - "project_id": project_id, - "dataset_id": dataset_id, - "location": dataset.get("location"), - "full_dataset_id": f"{project_id}.{dataset_id}", - } + return self.extend_result( + { + "billing_account_id": billing_account_id, + "project_id": project_id, + "dataset_id": dataset_id, + "location": dataset.get("location"), + "full_dataset_id": f"{project_id}.{dataset_id}", + } + ) diff --git a/tests/connectors/test_google_billing.py b/tests/connectors/test_google_billing.py index a7c9754..d5bf243 100644 --- a/tests/connectors/test_google_billing.py +++ b/tests/connectors/test_google_billing.py @@ -12,6 +12,7 @@ pytest.importorskip("google.oauth2.service_account") pytest.importorskip("googleapiclient") +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.google.billing import GoogleBillingMixin @@ -92,6 +93,9 @@ def __init__(self, service: _StubBillingService): def get_billing_service(self): return self._service + def extend_result(self, value: Any) -> Any: + return extend_data(value) + def test_list_billing_accounts_paginates_and_unhumps(): service = _StubBillingService( @@ -114,6 +118,9 @@ def test_list_billing_accounts_paginates_and_unhumps(): accounts = connector.list_billing_accounts(filter_query="parent:organizations/1", unhump_accounts=True) + assert isinstance(accounts, ExtendedList) + assert isinstance(accounts[0], ExtendedDict) + assert isinstance(accounts[0]["display_name"], ExtendedString) assert [acct["name"] for acct in accounts] == ["billingAccounts/ABC", "billingAccounts/DEF"] # Ensure snake_case conversion applied assert accounts[0]["display_name"] == "Primary" @@ -129,6 +136,8 @@ def test_update_project_billing_info_prefixes_account_name(): response = connector.update_project_billing_info("demo-project", "1234-ABCD") + assert isinstance(response, ExtendedDict) + assert isinstance(response["billingAccountName"], ExtendedString) assert response["billingAccountName"] == "billingAccounts/1234-ABCD" assert service.projects().update_calls == [ { @@ -144,6 +153,8 @@ def test_disable_project_billing_sets_empty_account(): response = connector.disable_project_billing("demo-project") + assert isinstance(response, ExtendedDict) + assert isinstance(response["billingAccountName"], ExtendedString) assert response["billingAccountName"] == "" assert service.projects().update_calls[-1] == { "name": "projects/demo-project", @@ -168,6 +179,9 @@ def test_list_billing_account_projects_handles_prefixing(): projects = connector.list_billing_account_projects("123456-AAAA", unhump_projects=True) + assert isinstance(projects, ExtendedList) + assert isinstance(projects[0], ExtendedDict) + assert isinstance(projects[0]["project_id"], ExtendedString) assert [proj["project_id"] for proj in projects] == ["alpha", "beta"] assert service.billingAccounts().projects().list_calls == [ {"name": "billingAccounts/123456-AAAA"}, From fb52c42a1ed779d5c1c80be70c2dd47ee1bbec04 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:12:52 -0500 Subject: [PATCH 066/287] feat: promote google service payloads --- .../connectors/google/services.py | 38 ++-- tests/connectors/test_google_activity.py | 5 + tests/connectors/test_google_services.py | 171 ++++++++++++++++++ 3 files changed, 196 insertions(+), 18 deletions(-) diff --git a/src/extended_data/connectors/google/services.py b/src/extended_data/connectors/google/services.py index 761534b..02d0d8f 100644 --- a/src/extended_data/connectors/google/services.py +++ b/src/extended_data/connectors/google/services.py @@ -108,6 +108,8 @@ def get_cloudkms_service(self) -> Any: ... def get_cloud_resource_manager_service(self) -> Any: ... + def extend_result(self, value: Any) -> Any: ... + # ========================================================================= # Compute Engine # ========================================================================= @@ -168,7 +170,7 @@ def list_compute_instances( if unhump_instances: instances = [unhump_map(i) for i in instances] - return instances + return self.extend_result(instances) # ========================================================================= # Google Kubernetes Engine @@ -202,7 +204,7 @@ def list_gke_clusters( if unhump_clusters: clusters = [unhump_map(c) for c in clusters] - return clusters + return self.extend_result(clusters) def get_gke_cluster( self, @@ -226,7 +228,7 @@ def get_gke_cluster( name = f"projects/{project_id}/locations/{location}/clusters/{cluster_id}" try: - return service.projects().locations().clusters().get(name=name).execute() + return self.extend_result(service.projects().locations().clusters().get(name=name).execute()) except HttpError as e: if e.resp.status == 404: self.logger.warning(f"GKE cluster not found: {cluster_id}") @@ -274,7 +276,7 @@ def list_storage_buckets( if unhump_buckets: buckets = [unhump_map(b) for b in buckets] - return buckets + return self.extend_result(buckets) # ========================================================================= # Cloud SQL @@ -317,7 +319,7 @@ def list_sql_instances( if unhump_instances: instances = [unhump_map(i) for i in instances] - return instances + return self.extend_result(instances) # ========================================================================= # Pub/Sub @@ -360,7 +362,7 @@ def list_pubsub_topics( if unhump_topics: topics = [unhump_map(t) for t in topics] - return topics + return self.extend_result(topics) def list_pubsub_subscriptions( self, @@ -399,7 +401,7 @@ def list_pubsub_subscriptions( if unhump_subscriptions: subscriptions = [unhump_map(s) for s in subscriptions] - return subscriptions + return self.extend_result(subscriptions) # ========================================================================= # Service Usage (Enabled APIs) @@ -445,7 +447,7 @@ def list_enabled_services( if unhump_services: services = [unhump_map(s) for s in services] - return services + return self.extend_result(services) def enable_service( self, @@ -468,7 +470,7 @@ def enable_service( result = service.services().enable(name=name).execute() self.logger.info(f"Enabled service {service_name}") - return result + return self.extend_result(result) def disable_service( self, @@ -497,7 +499,7 @@ def disable_service( result = service.services().disable(name=name, body=body).execute() self.logger.info(f"Disabled service {service_name}") - return result + return self.extend_result(result) def batch_enable_services( self, @@ -527,7 +529,7 @@ def batch_enable_services( ) self.logger.info(f"Batch enabled {len(service_names)} services") - return result + return self.extend_result(result) # ========================================================================= # Cloud KMS @@ -573,7 +575,7 @@ def list_kms_keyrings( if unhump_keyrings: keyrings = [unhump_map(k) for k in keyrings] - return keyrings + return self.extend_result(keyrings) def create_kms_keyring( self, @@ -608,7 +610,7 @@ def create_kms_keyring( ) self.logger.info(f"Created key ring {keyring_id}") - return result + return self.extend_result(result) def create_kms_key( self, @@ -655,7 +657,7 @@ def create_kms_key( ) self.logger.info(f"Created crypto key {key_id}") - return result + return self.extend_result(result) # ========================================================================= # Project Resource Summary @@ -752,7 +754,7 @@ def get_project_iam_users( users[member]["roles"].append(role) self.logger.info(f"Found {len(users)} IAM members for project {project_id}") - return users + return self.extend_result(users) def get_pubsub_resources_for_project( self, @@ -793,7 +795,7 @@ def get_pubsub_resources_for_project( f"Found {result['topic_count']} topics" + (f", {result.get('subscription_count', 0)} subscriptions" if include_subscriptions else "") ) - return result + return self.extend_result(result) def find_inactive_projects( self, @@ -830,7 +832,7 @@ def find_inactive_projects( projects = {p["projectId"]: p for p in self.list_projects()} else: self.logger.warning("list_projects not available, cannot find inactive projects") - return [] + return self.extend_result([]) inactive: list[dict[str, Any]] = [] @@ -866,4 +868,4 @@ def find_inactive_projects( raise self.logger.info(f"Found {len(inactive)} inactive projects out of {len(projects)}") - return inactive + return self.extend_result(inactive) diff --git a/tests/connectors/test_google_activity.py b/tests/connectors/test_google_activity.py index 99aa92f..c504346 100644 --- a/tests/connectors/test_google_activity.py +++ b/tests/connectors/test_google_activity.py @@ -8,6 +8,7 @@ import pytest from extended_data.connectors.google.services import GoogleServicesMixin +from extended_data.containers import ExtendedList, extend_data class DummyGoogleServices(GoogleServicesMixin): @@ -20,6 +21,9 @@ def __init__(self, empty_projects: set[str]) -> None: def is_project_empty(self, project_id: str) -> bool: return project_id in self.empty_projects + def extend_result(self, value: Any) -> Any: + return extend_data(value) + def test_find_inactive_projects_uses_activity_threshold_for_empty_projects() -> None: """Recently active empty projects are not reported as inactive.""" @@ -43,6 +47,7 @@ def test_find_inactive_projects_uses_activity_threshold_for_empty_projects() -> inactive = connector.find_inactive_projects(projects, days_since_activity=90) + assert isinstance(inactive, ExtendedList) assert {project["projectId"] for project in inactive} == {"old", "unknown"} assert projects["old"]["inactive_reason"] == "no_resources_since=2000-01-01" assert projects["unknown"]["inactive_reason"] == "no_resources" diff --git a/tests/connectors/test_google_services.py b/tests/connectors/test_google_services.py index 58f80f3..e668925 100644 --- a/tests/connectors/test_google_services.py +++ b/tests/connectors/test_google_services.py @@ -10,6 +10,7 @@ pytest.importorskip("google.oauth2.service_account") pytest.importorskip("googleapiclient") +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.google import GoogleConnectorFull @@ -51,6 +52,9 @@ def test_list_compute_instances_all_zones(self, google_connector): result = google_connector.list_compute_instances("test-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 3 assert result[0]["name"] == "instance-1" assert result[2]["name"] == "instance-3" @@ -69,6 +73,8 @@ def test_list_compute_instances_specific_zone(self, google_connector): result = google_connector.list_compute_instances("test-project", zone="us-central1-a") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 mock_instances.list.assert_called_once() @@ -89,6 +95,8 @@ def test_list_compute_instances_pagination(self, google_connector): result = google_connector.list_compute_instances("test-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert mock_instances.aggregatedList.return_value.execute.call_count == 2 @@ -110,6 +118,9 @@ def test_list_gke_clusters(self, google_connector): result = google_connector.list_gke_clusters("test-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 2 assert result[0]["name"] == "cluster-1" @@ -124,6 +135,8 @@ def test_list_gke_clusters_with_location(self, google_connector): result = google_connector.list_gke_clusters("test-project", location="us-central1") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 1 mock_clusters.list.assert_called_once_with(parent="projects/test-project/locations/us-central1") @@ -140,6 +153,8 @@ def test_get_gke_cluster(self, google_connector): result = google_connector.get_gke_cluster("test-project", "us-central1", "cluster-1") + assert isinstance(result, ExtendedDict) + assert isinstance(result["name"], ExtendedString) assert result["name"] == "cluster-1" assert result["status"] == "RUNNING" @@ -177,6 +192,9 @@ def test_list_storage_buckets(self, google_connector): result = google_connector.list_storage_buckets("test-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 2 assert result[0]["name"] == "bucket-1" @@ -198,6 +216,9 @@ def test_list_sql_instances(self, google_connector): result = google_connector.list_sql_instances("test-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["databaseVersion"], ExtendedString) assert len(result) == 2 assert result[0]["databaseVersion"] == "MYSQL_8_0" @@ -219,6 +240,9 @@ def test_list_pubsub_topics(self, google_connector): result = google_connector.list_pubsub_topics("test-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 2 assert "topic-1" in result[0]["name"] @@ -236,6 +260,9 @@ def test_list_pubsub_subscriptions(self, google_connector): result = google_connector.list_pubsub_subscriptions("test-project") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 2 assert "sub-1" in result[0]["name"] @@ -257,6 +284,9 @@ def test_list_kms_keyrings(self, google_connector): result = google_connector.list_kms_keyrings("test-project", "us") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 2 assert "keyring-1" in result[0]["name"] @@ -271,6 +301,8 @@ def test_create_kms_keyring(self, google_connector): result = google_connector.create_kms_keyring("test-project", "us", "new-keyring") + assert isinstance(result, ExtendedDict) + assert isinstance(result["name"], ExtendedString) assert "new-keyring" in result["name"] def test_create_kms_key(self, google_connector): @@ -287,4 +319,143 @@ def test_create_kms_key(self, google_connector): result = google_connector.create_kms_key("test-project", "us", "kr1", "new-key") + assert isinstance(result, ExtendedDict) + assert isinstance(result["name"], ExtendedString) assert "new-key" in result["name"] + + +class TestServiceUsage: + """Tests for Service Usage operations.""" + + def test_list_enabled_services(self, google_connector): + """Test listing enabled APIs.""" + mock_service = MagicMock() + mock_services = mock_service.services.return_value + mock_services.list.return_value.execute.return_value = { + "services": [ + {"name": "projects/test-project/services/compute.googleapis.com"}, + {"name": "projects/test-project/services/container.googleapis.com"}, + ] + } + google_connector.get_serviceusage_service = MagicMock(return_value=mock_service) + + result = google_connector.list_enabled_services("test-project") + + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) + assert len(result) == 2 + + def test_enable_service(self, google_connector): + """Test enabling an API.""" + mock_service = MagicMock() + mock_services = mock_service.services.return_value + mock_services.enable.return_value.execute.return_value = {"name": "operations/enable-compute"} + google_connector.get_serviceusage_service = MagicMock(return_value=mock_service) + + result = google_connector.enable_service("test-project", "compute.googleapis.com") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["name"], ExtendedString) + assert result["name"] == "operations/enable-compute" + + def test_disable_service(self, google_connector): + """Test disabling an API.""" + mock_service = MagicMock() + mock_services = mock_service.services.return_value + mock_services.disable.return_value.execute.return_value = {"name": "operations/disable-compute"} + google_connector.get_serviceusage_service = MagicMock(return_value=mock_service) + + result = google_connector.disable_service("test-project", "compute.googleapis.com", force=True) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["name"], ExtendedString) + assert result["name"] == "operations/disable-compute" + mock_services.disable.assert_called_once_with( + name="projects/test-project/services/compute.googleapis.com", + body={"disableDependentServices": True}, + ) + + def test_batch_enable_services(self, google_connector): + """Test enabling multiple APIs.""" + mock_service = MagicMock() + mock_services = mock_service.services.return_value + mock_services.batchEnable.return_value.execute.return_value = {"name": "operations/batch-enable"} + google_connector.get_serviceusage_service = MagicMock(return_value=mock_service) + + result = google_connector.batch_enable_services( + "test-project", + ["compute.googleapis.com", "container.googleapis.com"], + ) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["name"], ExtendedString) + assert result["name"] == "operations/batch-enable" + + +class TestProjectResourceSummary: + """Tests for derived project resource operations.""" + + def test_get_project_iam_users(self, google_connector): + """Test deriving IAM members from a project policy.""" + mock_service = MagicMock() + mock_projects = mock_service.projects.return_value + mock_projects.getIamPolicy.return_value.execute.return_value = { + "bindings": [ + {"role": "roles/viewer", "members": ["user:a@example.com"]}, + {"role": "roles/editor", "members": ["user:a@example.com", "group:dev@example.com"]}, + ] + } + google_connector.get_cloud_resource_manager_service = MagicMock(return_value=mock_service) + + result = google_connector.get_project_iam_users("test-project") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["user:a@example.com"], ExtendedDict) + assert isinstance(result["user:a@example.com"]["roles"], ExtendedList) + assert result["user:a@example.com"]["roles"] == ["roles/viewer", "roles/editor"] + + def test_get_pubsub_resources_for_project(self, google_connector): + """Test aggregating Pub/Sub resources.""" + google_connector.list_pubsub_topics = MagicMock( + return_value=extend_data([{"name": "projects/test-project/topics/topic-1"}]) + ) + google_connector.list_pubsub_subscriptions = MagicMock( + return_value=extend_data([{"name": "projects/test-project/subscriptions/sub-1"}]) + ) + + result = google_connector.get_pubsub_resources_for_project("test-project") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["topics"], ExtendedList) + assert isinstance(result["topics"][0], ExtendedDict) + assert isinstance(result["subscriptions"], ExtendedList) + assert result["topic_count"] == 1 + assert result["subscription_count"] == 1 + + def test_find_inactive_projects(self, google_connector): + """Test finding inactive projects from supplied project metadata.""" + projects = extend_data( + { + "active-project": { + "projectId": "active-project", + "lifecycleState": "ACTIVE", + "updateTime": "2026-06-01T00:00:00Z", + }, + "deleted-project": { + "projectId": "deleted-project", + "lifecycleState": "DELETE_REQUESTED", + }, + } + ) + google_connector.is_project_empty = MagicMock(return_value=True) + + result = google_connector.find_inactive_projects( + projects=projects, + days_since_activity=30, + ) + + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert result[0]["projectId"] == "deleted-project" + assert result[0]["inactive_reason"] == "lifecycle_state=DELETE_REQUESTED" From 5b049840bdf5d76e8f2a3736c90d848e949db632 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:16:22 -0500 Subject: [PATCH 067/287] feat: promote google workspace payloads --- .../connectors/google/__init__.py | 12 ++-- .../connectors/google/workspace.py | 55 ++++++++++--------- tests/connectors/test_google_connector.py | 6 ++ tests/connectors/test_google_workspace.py | 43 ++++++++++++++- 4 files changed, 85 insertions(+), 31 deletions(-) diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index d603df8..219737a 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -470,9 +470,11 @@ def list_users( ) if return_keyed: - return self._key_results_by_email(filtered_users, primary_field="primaryEmail", fallback_field="email") + return self.extend_result( + self._key_results_by_email(filtered_users, primary_field="primaryEmail", fallback_field="email") + ) - return filtered_users + return self.extend_result(filtered_users) def list_groups( self, @@ -541,9 +543,11 @@ def list_groups( ) if return_keyed: - return self._key_results_by_email(filtered_groups, primary_field="email", fallback_field="primaryEmail") + return self.extend_result( + self._key_results_by_email(filtered_groups, primary_field="email", fallback_field="primaryEmail") + ) - return filtered_groups + return self.extend_result(filtered_groups) # Import submodule operations diff --git a/src/extended_data/connectors/google/workspace.py b/src/extended_data/connectors/google/workspace.py index 3401892..08714d9 100644 --- a/src/extended_data/connectors/google/workspace.py +++ b/src/extended_data/connectors/google/workspace.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any from extended_data import unhump_map +from extended_data.containers import to_builtin class GoogleWorkspaceMixin: @@ -26,6 +27,8 @@ class GoogleWorkspaceMixin: def get_admin_directory_service(self, subject: str | None = None) -> Any: ... + def extend_result(self, value: Any) -> Any: ... + def list_workspace_users( self, domain: str | None = None, @@ -67,7 +70,7 @@ def list_workspace_users( if unhump_users: users = [unhump_map(u) for u in users] - return users + return self.extend_result(users) def get_user( self, @@ -88,7 +91,7 @@ def get_user( service = self.get_admin_directory_service(subject=subject) try: - return service.users().get(userKey=user_key).execute() + return self.extend_result(service.users().get(userKey=user_key).execute()) except HttpError as e: if e.resp.status == 404: self.logger.warning(f"User not found: {user_key}") @@ -140,9 +143,9 @@ def create_user( **additional_fields, } - result = service.users().insert(body=user_body).execute() + result = service.users().insert(body=to_builtin(user_body)).execute() self.logger.info(f"Created user: {primary_email}") - return result + return self.extend_result(result) def update_user( self, @@ -161,9 +164,9 @@ def update_user( Updated user dictionary. """ service = self.get_admin_directory_service(subject=subject) - result = service.users().update(userKey=user_key, body=fields).execute() + result = service.users().update(userKey=user_key, body=to_builtin(fields)).execute() self.logger.info(f"Updated user: {user_key}") - return result + return self.extend_result(result) def delete_user( self, @@ -221,7 +224,7 @@ def list_workspace_groups( if unhump_groups: groups = [unhump_map(g) for g in groups] - return groups + return self.extend_result(groups) def get_group( self, @@ -242,7 +245,7 @@ def get_group( service = self.get_admin_directory_service(subject=subject) try: - return service.groups().get(groupKey=group_key).execute() + return self.extend_result(service.groups().get(groupKey=group_key).execute()) except HttpError as e: if e.resp.status == 404: self.logger.warning(f"Group not found: {group_key}") @@ -275,9 +278,9 @@ def create_group( "description": description, } - result = service.groups().insert(body=group_body).execute() + result = service.groups().insert(body=to_builtin(group_body)).execute() self.logger.info(f"Created group: {email}") - return result + return self.extend_result(result) def delete_group( self, @@ -335,7 +338,7 @@ def list_group_members( if unhump_members: members = [unhump_map(m) for m in members] - return members + return self.extend_result(members) def add_group_member( self, @@ -362,9 +365,9 @@ def add_group_member( "role": role, } - result = service.members().insert(groupKey=group_key, body=member_body).execute() + result = service.members().insert(groupKey=group_key, body=to_builtin(member_body)).execute() self.logger.info(f"Added {email} to group {group_key} with role {role}") - return result + return self.extend_result(result) def remove_group_member( self, @@ -413,7 +416,7 @@ def list_org_units( org_units = response.get("organizationUnits", []) self.logger.info(f"Retrieved {len(org_units)} org units") - return org_units + return self.extend_result(org_units) def create_or_update_user( self, @@ -472,19 +475,19 @@ def create_or_update_user( existing = service.users().get(userKey=primary_email).execute() if update_if_exists: # Update existing user - result = service.users().update(userKey=primary_email, body=user_body).execute() + result = service.users().update(userKey=primary_email, body=to_builtin(user_body)).execute() self.logger.info(f"Updated existing user: {primary_email}") - return result + return self.extend_result(result) self.logger.info(f"User already exists: {primary_email}") - return existing + return self.extend_result(existing) except HttpError as e: if e.resp.status != 404: raise # User doesn't exist, create new - result = service.users().insert(body=user_body).execute() + result = service.users().insert(body=to_builtin(user_body)).execute() self.logger.info(f"Created user: {primary_email}") - return result + return self.extend_result(result) def create_or_update_group( self, @@ -527,19 +530,19 @@ def create_or_update_group( existing = service.groups().get(groupKey=email).execute() if update_if_exists: # Update existing group - result = service.groups().update(groupKey=email, body=group_body).execute() + result = service.groups().update(groupKey=email, body=to_builtin(group_body)).execute() self.logger.info(f"Updated existing group: {email}") - return result + return self.extend_result(result) self.logger.info(f"Group already exists: {email}") - return existing + return self.extend_result(existing) except HttpError as e: if e.resp.status != 404: raise # Group doesn't exist, create new - result = service.groups().insert(body=group_body).execute() + result = service.groups().insert(body=to_builtin(group_body)).execute() self.logger.info(f"Created group: {email}") - return result + return self.extend_result(result) def list_available_licenses( self, @@ -622,7 +625,7 @@ def list_available_licenses( self.logger.warning(f"Error listing licenses for {prod_id}: {e}") self.logger.info(f"Retrieved {len(licenses)} license assignments") - return licenses + return self.extend_result(licenses) def get_license_summary( self, @@ -653,4 +656,4 @@ def get_license_summary( summary[key] = {"assigned": 0} summary[key]["assigned"] += 1 - return summary + return self.extend_result(summary) diff --git a/tests/connectors/test_google_connector.py b/tests/connectors/test_google_connector.py index febf28c..ddc7078 100644 --- a/tests/connectors/test_google_connector.py +++ b/tests/connectors/test_google_connector.py @@ -10,6 +10,7 @@ pytest.importorskip("google.oauth2.service_account") pytest.importorskip("googleapiclient") +from extended_data.containers import ExtendedDict, ExtendedString from extended_data.connectors.google import ( GoogleBillingConnector, GoogleCloudConnector, @@ -197,6 +198,9 @@ def test_list_users_filters_and_transforms(self, mock_get_service, base_connecto key_by_email=True, ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["engineer@example.com"], ExtendedDict) + assert isinstance(result["engineer@example.com"]["full_name"], ExtendedString) assert "bot@example.com" not in result assert "suspended@example.com" not in result assert "sales@example.com" not in result @@ -229,6 +233,8 @@ def test_list_groups_key_by_email_and_filters(self, mock_get_service, base_conne key_by_email=True, ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["keepers@example.com"], ExtendedDict) assert "bots@example.com" not in result assert "keepers@example.com" in result assert result["keepers@example.com"]["suspended"] is True diff --git a/tests/connectors/test_google_workspace.py b/tests/connectors/test_google_workspace.py index 4e59089..3aacfec 100644 --- a/tests/connectors/test_google_workspace.py +++ b/tests/connectors/test_google_workspace.py @@ -10,6 +10,7 @@ pytest.importorskip("google.oauth2.service_account") pytest.importorskip("googleapiclient") +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.google import GoogleConnectorFull @@ -46,6 +47,9 @@ def test_list_users(self, google_connector): result = google_connector.list_users() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["primaryEmail"], ExtendedString) assert len(result) == 2 assert result[0]["primaryEmail"] == "user1@example.com" @@ -58,6 +62,8 @@ def test_list_users_with_domain(self, google_connector): result = google_connector.list_users(domain="example.com") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 1 call_args = mock_users.list.call_args[1] assert call_args["domain"] == "example.com" @@ -79,6 +85,8 @@ def test_list_users_pagination(self, google_connector): result = google_connector.list_users() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 2 assert mock_users.list.return_value.execute.call_count == 2 @@ -95,6 +103,8 @@ def test_get_user(self, google_connector): result = google_connector.get_user("user1@example.com") + assert isinstance(result, ExtendedDict) + assert isinstance(result["primaryEmail"], ExtendedString) assert result["primaryEmail"] == "user1@example.com" assert result["suspended"] is False @@ -129,10 +139,16 @@ def test_create_user(self, google_connector): given_name="New", family_name="User", password="SecurePass123!", + customSchemas=extend_data({"HR": {"level": "5"}}), ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["primaryEmail"], ExtendedString) assert result["primaryEmail"] == "newuser@example.com" mock_users.insert.assert_called_once() + body = mock_users.insert.call_args.kwargs["body"] + assert isinstance(body, dict) + assert isinstance(body["customSchemas"], dict) def test_update_user(self, google_connector): """Test updating a user.""" @@ -144,9 +160,17 @@ def test_update_user(self, google_connector): } google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) - result = google_connector.update_user("user1@example.com", suspended=True) + result = google_connector.update_user( + "user1@example.com", + suspended=True, + customSchemas=extend_data({"HR": {"level": "7"}}), + ) + assert isinstance(result, ExtendedDict) assert result["suspended"] is True + body = mock_users.update.call_args.kwargs["body"] + assert isinstance(body, dict) + assert isinstance(body["customSchemas"], dict) def test_delete_user(self, google_connector): """Test deleting a user.""" @@ -177,6 +201,9 @@ def test_list_groups(self, google_connector): result = google_connector.list_groups() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["email"], ExtendedString) assert len(result) == 2 assert result[0]["email"] == "group1@example.com" @@ -189,6 +216,8 @@ def test_list_groups_with_domain(self, google_connector): result = google_connector.list_groups(domain="example.com") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) assert len(result) == 1 call_args = mock_groups.list.call_args[1] assert call_args["domain"] == "example.com" @@ -206,6 +235,8 @@ def test_get_group(self, google_connector): result = google_connector.get_group("group1@example.com") + assert isinstance(result, ExtendedDict) + assert isinstance(result["email"], ExtendedString) assert result["email"] == "group1@example.com" assert result["directMembersCount"] == "5" @@ -240,6 +271,8 @@ def test_create_group(self, google_connector): name="New Group", ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["email"], ExtendedString) assert result["email"] == "newgroup@example.com" def test_list_group_members(self, google_connector): @@ -256,6 +289,9 @@ def test_list_group_members(self, google_connector): result = google_connector.list_group_members("group1@example.com") + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[1]["role"], ExtendedString) assert len(result) == 2 assert result[1]["role"] == "OWNER" @@ -271,6 +307,8 @@ def test_add_group_member(self, google_connector): result = google_connector.add_group_member("group1@example.com", "user1@example.com") + assert isinstance(result, ExtendedDict) + assert isinstance(result["email"], ExtendedString) assert result["email"] == "user1@example.com" def test_remove_group_member(self, google_connector): @@ -302,5 +340,8 @@ def test_list_org_units(self, google_connector): result = google_connector.list_org_units() + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 2 assert result[0]["name"] == "Engineering" From efdfcdbd9515303ed0ae8c457a2b1ac97197a54b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:19:04 -0500 Subject: [PATCH 068/287] feat: promote google jules payloads --- src/extended_data/connectors/google/jules.py | 29 ++-- tests/connectors/test_google_jules.py | 160 +++++++++++++++++++ 2 files changed, 177 insertions(+), 12 deletions(-) create mode 100644 tests/connectors/test_google_jules.py diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index 4a6d33b..354786e 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -19,7 +19,7 @@ ) # Poll for completion - status = connector.get_session(session.name) + status = connector.get_session(session["name"]) Reference: https://developers.google.com/jules/api """ @@ -176,7 +176,12 @@ def _handle_response(self, response: httpx.Response) -> dict[str, Any]: # Sources # ========================================================================= - def list_sources(self, page_size: int = 100, page_token: str = "") -> list[Source]: + @staticmethod + def _model_payload(model: BaseModel) -> dict[str, Any]: + """Serialize a Jules model using API field aliases.""" + return model.model_dump(by_alias=True) + + def list_sources(self, page_size: int = 100, page_token: str = "") -> list[dict[str, Any]]: """List available sources (connected GitHub repos). Args: @@ -193,7 +198,7 @@ def list_sources(self, page_size: int = 100, page_token: str = "") -> list[Sourc response = self.get("/sources", params=params) data = self._handle_response(response) - return [Source(**s) for s in data.get("sources", [])] + return self.extend_result([self._model_payload(Source(**s)) for s in data.get("sources", [])]) # ========================================================================= # Sessions @@ -207,7 +212,7 @@ def create_session( starting_branch: str = "main", automation_mode: str = "AUTO_CREATE_PR", require_plan_approval: bool = False, - ) -> Session: + ) -> dict[str, Any]: """Create a new Jules session. Args: @@ -240,9 +245,9 @@ def create_session( response = self.post("/sessions", json=body) data = self._handle_response(response) - return Session(**data) + return self.extend_result(self._model_payload(Session(**data))) - def get_session(self, session_name: str) -> Session: + def get_session(self, session_name: str) -> dict[str, Any]: """Get a session by name. Args: @@ -258,9 +263,9 @@ def get_session(self, session_name: str) -> Session: response = self.get(f"/{session_name}") data = self._handle_response(response) - return Session(**data) + return self.extend_result(self._model_payload(Session(**data))) - def list_sessions(self, page_size: int = 20, page_token: str = "") -> list[Session]: + def list_sessions(self, page_size: int = 20, page_token: str = "") -> list[dict[str, Any]]: """List sessions. Args: @@ -277,9 +282,9 @@ def list_sessions(self, page_size: int = 20, page_token: str = "") -> list[Sessi response = self.get("/sessions", params=params) data = self._handle_response(response) - return [Session(**s) for s in data.get("sessions", [])] + return self.extend_result([self._model_payload(Session(**s)) for s in data.get("sessions", [])]) - def approve_plan(self, session_name: str) -> Session: + def approve_plan(self, session_name: str) -> dict[str, Any]: """Approve the plan for a session that requires approval. Args: @@ -297,7 +302,7 @@ def approve_plan(self, session_name: str) -> Session: # API returns empty on success, fetch updated session return self.get_session(session_name) - def add_user_response(self, session_name: str, message: str = "") -> Session: + def add_user_response(self, session_name: str, message: str = "") -> dict[str, Any]: """Add a follow-up message to a session or resume it. Note: The Jules API uses :sendMessage endpoint. An empty body @@ -320,7 +325,7 @@ def add_user_response(self, session_name: str, message: str = "") -> Session: # API returns empty on success, fetch updated session return self.get_session(session_name) - def resume_session(self, session_name: str) -> Session: + def resume_session(self, session_name: str) -> dict[str, Any]: """Resume a paused or awaiting session. Args: diff --git a/tests/connectors/test_google_jules.py b/tests/connectors/test_google_jules.py new file mode 100644 index 0000000..c0aa690 --- /dev/null +++ b/tests/connectors/test_google_jules.py @@ -0,0 +1,160 @@ +"""Tests for the Google Jules connector.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import httpx +import pytest + +from extended_data.connectors.google.jules import JulesConnector, JulesError, Session +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + + +def _response(payload: dict, status_code: int = 200) -> httpx.Response: + return httpx.Response( + status_code, + json=payload, + request=httpx.Request("GET", "https://jules.googleapis.com/v1alpha/test"), + ) + + +def test_session_pull_request_model_property() -> None: + """The standalone Session model still exposes typed convenience properties.""" + session = Session( + name="sessions/123", + outputs=[ + { + "pullRequest": { + "url": "https://github.com/org/repo/pull/1", + "title": "Fix", + } + } + ], + ) + + assert session.pull_request is not None + assert session.pull_request.url == "https://github.com/org/repo/pull/1" + assert session.pull_request.title == "Fix" + + +def test_list_sources_returns_extended_payloads() -> None: + """Jules source lists are promoted into extended containers.""" + connector = JulesConnector(api_key="test-key") + connector.get = MagicMock( + return_value=_response( + { + "sources": [ + { + "name": "sources/github/org/repo", + "id": "repo", + "githubRepo": {"owner": "org", "name": "repo"}, + } + ] + } + ) + ) + + result = connector.list_sources(page_size=10, page_token="next") + + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert isinstance(result[0]["name"], ExtendedString) + assert isinstance(result[0]["githubRepo"], ExtendedDict) + assert result[0]["githubRepo"]["owner"] == "org" + connector.get.assert_called_once_with("/sources", params={"pageSize": 10, "pageToken": "next"}) + + +def test_create_session_returns_extended_payload() -> None: + """Created sessions are returned as extended payloads.""" + connector = JulesConnector(api_key="test-key") + connector.post = MagicMock( + return_value=_response( + { + "name": "sessions/123", + "id": "123", + "title": "Fix login", + "state": "RUNNING", + "sourceContext": { + "source": "sources/github/org/repo", + "githubRepoContext": {"startingBranch": "main"}, + }, + } + ) + ) + + result = connector.create_session( + prompt="Fix login", + source="sources/github/org/repo", + title="Fix login", + require_plan_approval=True, + ) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["sourceContext"], ExtendedDict) + assert isinstance(result["sourceContext"]["githubRepoContext"], ExtendedDict) + assert result["name"] == "sessions/123" + connector.post.assert_called_once() + body = connector.post.call_args.kwargs["json"] + assert body["requirePlanApproval"] is True + assert body["sourceContext"]["githubRepoContext"]["startingBranch"] == "main" + + +def test_get_session_accepts_id_and_returns_extended_payload() -> None: + """Session lookup accepts a bare ID and returns an extended session payload.""" + connector = JulesConnector(api_key="test-key") + connector.get = MagicMock(return_value=_response({"name": "sessions/123", "id": "123", "state": "COMPLETED"})) + + result = connector.get_session("123") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["state"], ExtendedString) + assert result["name"] == "sessions/123" + connector.get.assert_called_once_with("/sessions/123") + + +def test_list_sessions_returns_extended_payloads() -> None: + """Jules session lists are promoted into extended containers.""" + connector = JulesConnector(api_key="test-key") + connector.get = MagicMock( + return_value=_response( + { + "sessions": [ + {"name": "sessions/1", "id": "1", "state": "RUNNING"}, + {"name": "sessions/2", "id": "2", "state": "COMPLETED"}, + ] + } + ) + ) + + result = connector.list_sessions(page_size=2) + + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert result[1]["state"] == "COMPLETED" + + +def test_approve_plan_returns_updated_extended_session() -> None: + """Plan approval fetches and returns the updated extended session.""" + connector = JulesConnector(api_key="test-key") + connector.post = MagicMock(return_value=_response({})) + connector.get_session = MagicMock(return_value=ExtendedDict({"name": "sessions/123", "state": "RUNNING"})) + + result = connector.approve_plan("123") + + assert isinstance(result, ExtendedDict) + assert result["name"] == "sessions/123" + connector.post.assert_called_once_with("/sessions/123:approvePlan") + connector.get_session.assert_called_once_with("sessions/123") + + +def test_handle_response_raises_jules_error() -> None: + """Jules API errors preserve vendor message and status details.""" + connector = JulesConnector(api_key="test-key") + response = _response({"error": {"message": "denied", "code": 403, "details": [{"reason": "forbidden"}]}}, 403) + + with pytest.raises(JulesError) as exc_info: + connector._handle_response(response) + + assert exc_info.value.code == 403 + assert exc_info.value.details == [{"reason": "forbidden"}] From 9d49fa078c6b46fa081e7ba6f519dbb268aafa7e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:22:45 -0500 Subject: [PATCH 069/287] feat: promote cursor and slack payloads --- .../connectors/cursor/__init__.py | 44 ++++++---- src/extended_data/connectors/cursor/tools.py | 14 +-- .../connectors/slack/__init__.py | 2 +- tests/connectors/test_cursor.py | 86 +++++++++++++++++-- tests/connectors/test_cursor_tools.py | 26 +++--- tests/connectors/test_slack_connector.py | 1 + 6 files changed, 131 insertions(+), 42 deletions(-) diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index 1949cc3..70a2542 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -34,6 +34,7 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import to_builtin from extended_data.logging import Logging @@ -408,15 +409,20 @@ def _request_api( raise raise CursorAPIError(sanitize_error(str(e))) from e + @staticmethod + def _model_payload(model: BaseModel) -> dict[str, Any]: + """Serialize a Cursor model into JSON-compatible API field names.""" + return model.model_dump(mode="json") + # ========================================================================= # Agent Operations # ========================================================================= - def list_agents(self) -> list[Agent]: + def list_agents(self) -> list[dict[str, Any]]: """List all agents. Returns: - List of Agent objects. + List of agent payload dictionaries. Raises: CursorAPIError: If the API request fails. @@ -424,19 +430,19 @@ def list_agents(self) -> list[Agent]: self.logger.info("Listing agents") data = self._request_api("/agents") if not data: - return [] + return self.extend_result([]) agents_data = data.get("agents", []) - return [Agent.model_validate(a) for a in agents_data] + return self.extend_result([self._model_payload(Agent.model_validate(a)) for a in agents_data]) - def get_agent_status(self, agent_id: str) -> Agent: + def get_agent_status(self, agent_id: str) -> dict[str, Any]: """Get status of a specific agent. Args: agent_id: The agent identifier. Returns: - Agent object with current status. + Agent payload dictionary with current status. Raises: CursorValidationError: If agent_id is invalid. @@ -448,16 +454,16 @@ def get_agent_status(self, agent_id: str) -> Agent: data = self._request_api(f"/agents/{agent_id}") if not data: raise CursorAPIError(f"Empty response when getting agent status for {agent_id}") - return Agent.model_validate(data) + return self.extend_result(self._model_payload(Agent.model_validate(data))) - def get_agent_conversation(self, agent_id: str) -> Conversation: + def get_agent_conversation(self, agent_id: str) -> dict[str, Any]: """Get conversation history for an agent. Args: agent_id: The agent identifier. Returns: - Conversation object with message history. + Conversation payload dictionary with message history. Raises: CursorValidationError: If agent_id is invalid. @@ -468,10 +474,10 @@ def get_agent_conversation(self, agent_id: str) -> Conversation: data = self._request_api(f"/agents/{agent_id}/conversation") if not data: - return Conversation(agent_id=agent_id, messages=[]) + return self.extend_result(self._model_payload(Conversation(agent_id=agent_id, messages=[]))) messages = [ConversationMessage.model_validate(m) for m in data.get("messages", [])] - return Conversation(agent_id=agent_id, messages=messages) + return self.extend_result(self._model_payload(Conversation(agent_id=agent_id, messages=messages))) def launch_agent( self, @@ -485,7 +491,7 @@ def launch_agent( skip_reviewer_request: bool = False, webhook_url: str | None = None, webhook_secret: str | None = None, - ) -> Agent: + ) -> dict[str, Any]: """Launch a new agent. Args: @@ -501,7 +507,7 @@ def launch_agent( webhook_secret: Webhook secret for signature verification. Returns: - The launched Agent object. + The launched agent payload dictionary. Raises: CursorValidationError: If inputs are invalid. @@ -552,11 +558,11 @@ def launch_agent( webhook["secret"] = webhook_secret body["webhook"] = webhook - data = self._request_api("/agents", method="POST", json_body=body) + data = self._request_api("/agents", method="POST", json_body=to_builtin(body)) if not data: msg = "Empty response when launching agent" raise CursorAPIError(msg) - return Agent.model_validate(data) + return self.extend_result(self._model_payload(Agent.model_validate(data))) def add_followup(self, agent_id: str, prompt_text: str) -> None: """Send a follow-up message to an agent. @@ -584,11 +590,11 @@ def add_followup(self, agent_id: str, prompt_text: str) -> None: # Repository Operations # ========================================================================= - def list_repositories(self) -> list[Repository]: + def list_repositories(self) -> list[dict[str, Any]]: """List available repositories. Returns: - List of Repository objects. + List of repository payload dictionaries. Raises: CursorAPIError: If the API request fails. @@ -596,10 +602,10 @@ def list_repositories(self) -> list[Repository]: self.logger.info("Listing repositories") data = self._request_api("/repositories") if not data: - return [] + return self.extend_result([]) repos_data = data.get("repositories", []) - return [Repository.model_validate(r) for r in repos_data] + return self.extend_result([self._model_payload(Repository.model_validate(r)) for r in repos_data]) # ========================================================================= # Model Operations diff --git a/src/extended_data/connectors/cursor/tools.py b/src/extended_data/connectors/cursor/tools.py index f7ceb3f..387ba62 100644 --- a/src/extended_data/connectors/cursor/tools.py +++ b/src/extended_data/connectors/cursor/tools.py @@ -62,9 +62,9 @@ def cursor_launch_agent( return extend_data( { - "agent_id": agent.id, - "state": _state_value(agent.state), - "repository": agent.repository, + "agent_id": agent.get("id", ""), + "state": _state_value(agent.get("state")), + "repository": agent.get("repository"), } ) @@ -85,10 +85,10 @@ def cursor_get_agent_status(agent_id: str) -> dict[str, Any]: return extend_data( { - "agent_id": agent.id, - "state": _state_value(agent.state), - "error": agent.error, - "pr_url": agent.pr_url, + "agent_id": agent.get("id", ""), + "state": _state_value(agent.get("state")), + "error": agent.get("error"), + "pr_url": agent.get("pr_url"), } ) diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index fa436e7..83200d2 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -277,7 +277,7 @@ def send_message( opts["thread_ts"] = thread_id try: - return self.bot_web_client.chat_postMessage(**opts).get("ts") + return self.extend_result(self.bot_web_client.chat_postMessage(**opts).get("ts")) except SlackApiError as exc: if raise_on_api_error: raise SlackAPIError(exc.response) from exc diff --git a/tests/connectors/test_cursor.py b/tests/connectors/test_cursor.py index 11c4ac0..c03ee32 100644 --- a/tests/connectors/test_cursor.py +++ b/tests/connectors/test_cursor.py @@ -22,7 +22,7 @@ validate_repository, validate_webhook_url, ) -from extended_data.containers import ExtendedList, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data class TestValidators: @@ -202,9 +202,59 @@ def test_list_agents(self, mock_client_class): connector = CursorConnector(api_key="test-key") agents = connector.list_agents() + assert isinstance(agents, ExtendedList) + assert isinstance(agents[0], ExtendedDict) + assert isinstance(agents[0]["id"], ExtendedString) assert len(agents) == 1 - assert agents[0].id == "agent-1" - assert agents[0].state == AgentState.RUNNING + assert agents[0]["id"] == "agent-1" + assert agents[0]["state"] == "running" + + @patch("extended_data.connectors.cursor.httpx.Client") + def test_get_agent_status_returns_extended_dict(self, mock_client_class): + """get_agent_status should return an extended agent payload.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.text = '{"id": "agent-1", "state": "finished", "pr_url": "https://github.com/org/repo/pull/1"}' + mock_response.json.return_value = { + "id": "agent-1", + "state": "finished", + "pr_url": "https://github.com/org/repo/pull/1", + } + mock_client.request.return_value = mock_response + + connector = CursorConnector(api_key="test-key") + agent = connector.get_agent_status("agent-1") + + assert isinstance(agent, ExtendedDict) + assert isinstance(agent["state"], ExtendedString) + assert agent["pr_url"] == "https://github.com/org/repo/pull/1" + + @patch("extended_data.connectors.cursor.httpx.Client") + def test_get_agent_conversation_returns_extended_dict(self, mock_client_class): + """get_agent_conversation should return an extended conversation payload.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.text = '{"messages": [{"role": "user", "content": "hello"}]}' + mock_response.json.return_value = {"messages": [{"role": "user", "content": "hello"}]} + mock_client.request.return_value = mock_response + + connector = CursorConnector(api_key="test-key") + conversation = connector.get_agent_conversation("agent-1") + + assert isinstance(conversation, ExtendedDict) + assert isinstance(conversation["messages"], ExtendedList) + assert isinstance(conversation["messages"][0], ExtendedDict) + assert conversation["messages"][0]["content"] == "hello" @patch("extended_data.connectors.cursor.httpx.Client") def test_launch_agent(self, mock_client_class): @@ -223,10 +273,13 @@ def test_launch_agent(self, mock_client_class): agent = connector.launch_agent( prompt_text="Implement feature X", repository="owner/repo", + images=extend_data([{"data": "base64", "dimensions": {"width": 16, "height": 16}}]), ) - assert agent.id == "new-agent" - assert agent.state == AgentState.PENDING + assert isinstance(agent, ExtendedDict) + assert isinstance(agent["id"], ExtendedString) + assert agent["id"] == "new-agent" + assert agent["state"] == "pending" # Verify request was made correctly call_args = mock_client.request.call_args @@ -234,6 +287,8 @@ def test_launch_agent(self, mock_client_class): assert "/agents" in call_args.args[1] assert "prompt" in call_args.kwargs["json"] assert "source" in call_args.kwargs["json"] + assert isinstance(call_args.kwargs["json"]["prompt"]["images"], list) + assert isinstance(call_args.kwargs["json"]["prompt"]["images"][0], dict) @patch("extended_data.connectors.cursor.httpx.Client") def test_launch_agent_validation(self, mock_client_class): @@ -248,6 +303,27 @@ def test_launch_agent_validation(self, mock_client_class): with pytest.raises(CursorValidationError, match="format"): connector.launch_agent(prompt_text="Hello", repository="invalid") + @patch("extended_data.connectors.cursor.httpx.Client") + def test_list_repositories_returns_extended_list(self, mock_client_class): + """list_repositories should return extended repository payloads.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.text = '{"repositories": [{"name": "org/repo", "default_branch": "main"}]}' + mock_response.json.return_value = {"repositories": [{"name": "org/repo", "default_branch": "main"}]} + mock_client.request.return_value = mock_response + + connector = CursorConnector(api_key="test-key") + repositories = connector.list_repositories() + + assert isinstance(repositories, ExtendedList) + assert isinstance(repositories[0], ExtendedDict) + assert repositories[0]["name"] == "org/repo" + @patch("extended_data.connectors.cursor.httpx.Client") def test_list_models_returns_extended_list(self, mock_client_class): """list_models should expose model names as an extended container.""" diff --git a/tests/connectors/test_cursor_tools.py b/tests/connectors/test_cursor_tools.py index 1a03c1c..03d2287 100644 --- a/tests/connectors/test_cursor_tools.py +++ b/tests/connectors/test_cursor_tools.py @@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch from extended_data.connectors.cursor import AgentState -from extended_data.containers import ExtendedDict, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedString, extend_data def test_cursor_launch_agent(): @@ -14,10 +14,13 @@ def test_cursor_launch_agent(): with patch("extended_data.connectors.cursor.CursorConnector") as mock_connector_class: mock_connector = MagicMock() - mock_agent = MagicMock() - mock_agent.id = "agent_123" - mock_agent.state = AgentState.RUNNING - mock_agent.repository = "org/repo" + mock_agent = extend_data( + { + "id": "agent_123", + "state": AgentState.RUNNING, + "repository": "org/repo", + } + ) mock_connector.launch_agent.return_value = mock_agent mock_connector_class.return_value = mock_connector @@ -35,11 +38,14 @@ def test_cursor_get_agent_status(): with patch("extended_data.connectors.cursor.CursorConnector") as mock_connector_class: mock_connector = MagicMock() - mock_agent = MagicMock() - mock_agent.id = "agent_123" - mock_agent.state = AgentState.FINISHED - mock_agent.error = None - mock_agent.pr_url = "https://github.com/org/repo/pull/1" + mock_agent = extend_data( + { + "id": "agent_123", + "state": AgentState.FINISHED, + "error": None, + "pr_url": "https://github.com/org/repo/pull/1", + } + ) mock_connector.get_agent_status.return_value = mock_agent mock_connector_class.return_value = mock_connector diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index 48bb385..fe60a6b 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -69,6 +69,7 @@ def test_send_message(self, mock_webclient_class, base_connector_kwargs): ts = connector.send_message(channel_name="general", text="Test message", blocks=[]) + assert isinstance(ts, ExtendedString) assert ts == "1234567890.123456" mock_bot_client.chat_postMessage.assert_called_once() From e605cf77ec48d95c1c0f943db4dc9a6c56d2ab81 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:26:25 -0500 Subject: [PATCH 070/287] feat: promote anthropic connector payloads --- .../connectors/anthropic/__init__.py | 46 +++++++++++++------ .../connectors/anthropic/tools.py | 21 ++++++--- tests/connectors/test_anthropic.py | 45 +++++++++++++++--- tests/connectors/test_anthropic_tools.py | 24 +++++----- 4 files changed, 96 insertions(+), 40 deletions(-) diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index 4fbdfbf..2dc6bb7 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -37,6 +37,7 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import to_builtin from extended_data.logging import Logging @@ -215,7 +216,7 @@ class AnthropicConnector(VendorConnectorBase): ... max_tokens=1024, ... messages=[{"role": "user", "content": "Hello"}] ... ) - >>> print(response.text) + >>> print(response["content"][0]["text"]) """ API_KEY_ENV = "ANTHROPIC_API_KEY" @@ -292,6 +293,20 @@ def _handle_error(self, response: httpx.Response) -> None: raise AnthropicRateLimitError(message, status_code=status_code, error_type=error_type) raise AnthropicAPIError(message, status_code=status_code, error_type=error_type) + @staticmethod + def _model_payload(model: BaseModel) -> dict[str, Any]: + """Serialize an Anthropic model into JSON-compatible API field names.""" + return model.model_dump(mode="json") + + @staticmethod + def _message_text(message: dict[str, Any]) -> str: + """Extract concatenated text blocks from an extended message payload.""" + return "".join( + str(block.get("text", "")) + for block in message.get("content", []) + if block.get("type") == "text" and block.get("text") + ) + # ========================================================================= # Message Operations # ========================================================================= @@ -309,7 +324,7 @@ def create_message( tools: list[dict[str, Any]] | None = None, tool_choice: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None, - ) -> Message: + ) -> dict[str, Any]: """Create a message using Claude. Args: @@ -326,7 +341,7 @@ def create_message( metadata: Optional metadata for the request. Returns: - Message object with response. + Message response payload. Raises: AnthropicError: If the API request fails. @@ -356,12 +371,12 @@ def create_message( if metadata: body["metadata"] = metadata - response = self.post("/v1/messages", json=body) + response = self.post("/v1/messages", json=to_builtin(body)) if not response.is_success: self._handle_error(response) - return Message.model_validate(response.json()) + return self.extend_result(self._model_payload(Message.model_validate(response.json()))) def count_tokens( self, @@ -396,7 +411,7 @@ def count_tokens( if tools: body["tools"] = tools - response = self.post("/v1/messages/count_tokens", json=body) + response = self.post("/v1/messages/count_tokens", json=to_builtin(body)) if not response.is_success: self._handle_error(response) @@ -408,11 +423,11 @@ def count_tokens( # Model Operations # ========================================================================= - def list_models(self) -> list[Model]: + def list_models(self) -> list[dict[str, Any]]: """List available models from the API. Returns: - List of Model objects. + List of model payload dictionaries. Raises: AnthropicError: If the API request fails. @@ -426,16 +441,16 @@ def list_models(self) -> list[Model]: data = response.json() models_data = data.get("data", []) - return [Model.model_validate(m) for m in models_data] + return self.extend_result([self._model_payload(Model.model_validate(m)) for m in models_data]) - def get_model(self, model_id: str) -> Model: + def get_model(self, model_id: str) -> dict[str, Any]: """Get information about a specific model. Args: model_id: Model identifier. Returns: - Model object with details. + Model payload dictionary with details. Raises: AnthropicError: If the API request fails. @@ -447,7 +462,7 @@ def get_model(self, model_id: str) -> Model: if not response.is_success: self._handle_error(response) - return Model.model_validate(response.json()) + return self.extend_result(self._model_payload(Model.model_validate(response.json()))) # ========================================================================= # Agent Execution (Sandbox Mode) @@ -504,11 +519,12 @@ def execute_agent_task( ) duration = time.time() - start_time - total_tokens = response.usage.input_tokens + response.usage.output_tokens + usage = response.get("usage", {}) + total_tokens = int(usage.get("input_tokens", 0)) + int(usage.get("output_tokens", 0)) return AgentExecutionResult( success=True, - output=response.text, + output=self._message_text(response), duration_seconds=duration, tokens_used=total_tokens, ) @@ -553,4 +569,4 @@ def get_recommended_model(self, use_case: str = "general") -> str: "fast": "claude-haiku-4-5-20251001", # Claude Haiku 4.5 - fastest "powerful": "claude-opus-4-5-20251101", # Claude Opus 4.5 - most capable } - return recommendations.get(use_case, recommendations["general"]) + return self.extend_result(recommendations.get(use_case, recommendations["general"])) diff --git a/src/extended_data/connectors/anthropic/tools.py b/src/extended_data/connectors/anthropic/tools.py index 04f3707..a725c82 100644 --- a/src/extended_data/connectors/anthropic/tools.py +++ b/src/extended_data/connectors/anthropic/tools.py @@ -13,6 +13,15 @@ from extended_data.containers import extend_data +def _message_text(message: dict[str, Any]) -> str: + """Extract concatenated text blocks from a message payload.""" + return "".join( + str(block.get("text", "")) + for block in message.get("content", []) + if block.get("type") == "text" and block.get("text") + ) + + class CreateMessageSchema(BaseModel): """Pydantic schema for the anthropic_create_message tool.""" @@ -55,12 +64,12 @@ def anthropic_create_message( return extend_data( { - "id": response.id, - "text": response.text, - "model": response.model, + "id": response.get("id", ""), + "text": _message_text(response), + "model": response.get("model", ""), "usage": { - "input_tokens": response.usage.input_tokens, - "output_tokens": response.usage.output_tokens, + "input_tokens": response.get("usage", {}).get("input_tokens", 0), + "output_tokens": response.get("usage", {}).get("output_tokens", 0), }, } ) @@ -77,7 +86,7 @@ def anthropic_list_models() -> list[dict[str, Any]]: connector = AnthropicConnector() models = connector.list_models() - return extend_data([{"id": m.id, "display_name": m.display_name} for m in models]) + return extend_data([{"id": m.get("id", ""), "display_name": m.get("display_name", "")} for m in models]) TOOL_DEFINITIONS = [ diff --git a/tests/connectors/test_anthropic.py b/tests/connectors/test_anthropic.py index 5332a81..e878e73 100644 --- a/tests/connectors/test_anthropic.py +++ b/tests/connectors/test_anthropic.py @@ -18,6 +18,7 @@ Model, Usage, ) +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data class TestModels: @@ -123,6 +124,7 @@ def test_get_recommended_model(self): with patch.object(httpx, "Client"): connector = AnthropicConnector(api_key="test-key") # Using verified model IDs from https://docs.anthropic.com/en/docs/about-claude/models + assert isinstance(connector.get_recommended_model("general"), ExtendedString) assert connector.get_recommended_model("general") == "claude-sonnet-4-5-20250929" assert connector.get_recommended_model("fast") == "claude-haiku-4-5-20251001" assert connector.get_recommended_model("powerful") == "claude-opus-4-5-20251101" @@ -152,14 +154,18 @@ def test_create_message(self): message = connector.create_message( model="claude-sonnet-4-20250514", max_tokens=1024, - messages=[{"role": "user", "content": "Hi"}], + messages=extend_data([{"role": "user", "content": "Hi"}]), ) - assert message.id == "msg_123" - assert message.role == MessageRole.ASSISTANT - assert message.text == "Hello!" - assert message.usage.input_tokens == 10 - assert message.usage.output_tokens == 5 + assert isinstance(message, ExtendedDict) + assert isinstance(message["content"], ExtendedList) + assert isinstance(message["content"][0], ExtendedDict) + assert isinstance(message["id"], ExtendedString) + assert message["id"] == "msg_123" + assert message["role"] == "assistant" + assert message["content"][0]["text"] == "Hello!" + assert message["usage"]["input_tokens"] == 10 + assert message["usage"]["output_tokens"] == 5 # Verify request call_args = mock_client.request.call_args @@ -167,6 +173,8 @@ def test_create_message(self): assert "/v1/messages" in call_args.args[1] assert call_args.kwargs["json"]["model"] == "claude-sonnet-4-20250514" assert call_args.kwargs["json"]["max_tokens"] == 1024 + assert isinstance(call_args.kwargs["json"]["messages"], list) + assert isinstance(call_args.kwargs["json"]["messages"][0], dict) def test_create_message_with_system(self): """create_message should include system prompt.""" @@ -220,8 +228,31 @@ def test_list_models(self): connector = AnthropicConnector(api_key="test-key") models = connector.list_models() + assert isinstance(models, ExtendedList) + assert isinstance(models[0], ExtendedDict) + assert isinstance(models[0]["id"], ExtendedString) assert len(models) == 2 - assert models[0].id == "claude-sonnet-4-20250514" + assert models[0]["id"] == "claude-sonnet-4-20250514" + + def test_get_model(self): + """get_model should return an extended model payload.""" + import httpx + + mock_client = MagicMock() + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.json.return_value = {"id": "claude-sonnet-4-20250514", "display_name": "Claude Sonnet 4"} + mock_client.request.return_value = mock_response + + with patch.object(httpx, "Client", return_value=mock_client): + connector = AnthropicConnector(api_key="test-key") + model = connector.get_model("claude-sonnet-4-20250514") + + assert isinstance(model, ExtendedDict) + assert isinstance(model["display_name"], ExtendedString) + assert model["display_name"] == "Claude Sonnet 4" class TestClaudeModels: diff --git a/tests/connectors/test_anthropic_tools.py b/tests/connectors/test_anthropic_tools.py index fcb6204..40309c3 100644 --- a/tests/connectors/test_anthropic_tools.py +++ b/tests/connectors/test_anthropic_tools.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock, patch -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data def test_anthropic_list_models(): @@ -13,10 +13,9 @@ def test_anthropic_list_models(): with patch("extended_data.connectors.anthropic.AnthropicConnector") as mock_connector_class: mock_connector = MagicMock() - mock_model = MagicMock() - mock_model.id = "claude-3-opus" - mock_model.display_name = "Claude 3 Opus" - mock_connector.list_models.return_value = [mock_model] + mock_connector.list_models.return_value = extend_data( + [{"id": "claude-3-opus", "display_name": "Claude 3 Opus"}] + ) mock_connector_class.return_value = mock_connector result = anthropic_list_models() @@ -33,13 +32,14 @@ def test_anthropic_create_message(): with patch("extended_data.connectors.anthropic.AnthropicConnector") as mock_connector_class: mock_connector = MagicMock() - mock_response = MagicMock() - mock_response.id = "msg_123" - mock_response.text = "Hello!" - mock_response.model = "claude-3-opus" - mock_response.usage.input_tokens = 10 - mock_response.usage.output_tokens = 5 - mock_connector.create_message.return_value = mock_response + mock_connector.create_message.return_value = extend_data( + { + "id": "msg_123", + "content": [{"type": "text", "text": "Hello!"}], + "model": "claude-3-opus", + "usage": {"input_tokens": 10, "output_tokens": 5}, + } + ) mock_connector_class.return_value = mock_connector result = anthropic_create_message(model="claude-3-opus", prompt="Hi") From c667f770c9ab326039ca838bf2244d054f641aa6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:28:34 -0500 Subject: [PATCH 071/287] feat: promote secrets tool payloads --- src/extended_data/connectors/secrets/tools.py | 26 ++--- tests/connectors/test_secrets.py | 95 ++++++++++++++++++- 2 files changed, 108 insertions(+), 13 deletions(-) diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index 3a8fe88..c6e2f5c 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -10,6 +10,8 @@ from pydantic import BaseModel, Field +from extended_data.containers import extend_data + # ============================================================================= # Input Schemas @@ -66,11 +68,11 @@ def validate_config(config_path: str) -> dict[str, Any]: connector = SecretsConnector() is_valid, message = connector.validate_config(config_path) - return { + return extend_data({ "valid": is_valid, "message": message, "config_path": config_path, - } + }) def run_pipeline( @@ -126,7 +128,7 @@ def run_pipeline( result = connector.run_pipeline(config_path, options) - return { + return extend_data({ "success": result.success, "target_count": result.target_count, "secrets_processed": result.secrets_processed, @@ -137,7 +139,7 @@ def run_pipeline( "duration_ms": result.duration_ms, "error_message": result.error_message, "diff_output": result.diff_output if dry_run else "", - } + }) def dry_run(config_path: str) -> dict[str, Any]: @@ -154,7 +156,7 @@ def dry_run(config_path: str) -> dict[str, Any]: connector = SecretsConnector() result = connector.dry_run(config_path) - return { + return extend_data({ "success": result.success, "target_count": result.target_count, "secrets_would_add": result.secrets_added, @@ -163,7 +165,7 @@ def dry_run(config_path: str) -> dict[str, Any]: "secrets_unchanged": result.secrets_unchanged, "diff_output": result.diff_output, "error_message": result.error_message, - } + }) def get_config_info(config_path: str) -> dict[str, Any]: @@ -180,7 +182,7 @@ def get_config_info(config_path: str) -> dict[str, Any]: connector = SecretsConnector() info = connector.get_config_info(config_path) - return { + return extend_data({ "valid": info.valid, "error_message": info.error_message, "source_count": info.source_count, @@ -190,7 +192,7 @@ def get_config_info(config_path: str) -> dict[str, Any]: "has_merge_store": info.has_merge_store, "vault_address": info.vault_address, "aws_region": info.aws_region, - } + }) def get_targets(config_path: str) -> dict[str, Any]: @@ -207,11 +209,11 @@ def get_targets(config_path: str) -> dict[str, Any]: connector = SecretsConnector() targets, error = connector.get_targets(config_path) - return { + return extend_data({ "targets": targets, "count": len(targets), "error_message": error, - } + }) def get_sources(config_path: str) -> dict[str, Any]: @@ -228,11 +230,11 @@ def get_sources(config_path: str) -> dict[str, Any]: connector = SecretsConnector() sources, error = connector.get_sources(config_path) - return { + return extend_data({ "sources": sources, "count": len(sources), "error_message": error, - } + }) # ============================================================================= diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index 22af25d..2d2cbb1 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -7,13 +7,23 @@ import yaml from extended_data.connectors.secrets import ( + ConfigInfo, OutputFormat, SecretsConnector, SyncOperation, SyncOptions, SyncResult, ) -from extended_data.connectors.secrets.tools import RunPipelineSchema, run_pipeline +from extended_data.connectors.secrets.tools import ( + RunPipelineSchema, + dry_run, + get_config_info, + get_sources, + get_targets, + run_pipeline, + validate_config, +) +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString @pytest.fixture @@ -313,6 +323,8 @@ def test_run_pipeline_tool_default_continue_on_error_matches_cli(mock_connector_ options = mock_connector.run_pipeline.call_args.args[1] assert isinstance(options, SyncOptions) assert options.continue_on_error is True + assert isinstance(result, ExtendedDict) + assert isinstance(result["secrets_processed"], int) assert result["success"] is True assert result["secrets_processed"] == 3 @@ -333,3 +345,84 @@ def test_run_pipeline_schema_default_continue_on_error_matches_cli() -> None: schema = RunPipelineSchema(config_path="config.yaml") assert schema.continue_on_error is True + + +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_validate_config_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.validate_config.return_value = (True, "valid config") + + result = validate_config("config.yaml") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["message"], ExtendedString) + assert result["valid"] is True + assert result["config_path"] == "config.yaml" + + +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_dry_run_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.dry_run.return_value = SyncResult( + success=True, + target_count=2, + secrets_added=1, + secrets_modified=2, + secrets_removed=0, + secrets_unchanged=3, + diff_output="diff", + ) + + result = dry_run("config.yaml") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["diff_output"], ExtendedString) + assert result["secrets_would_add"] == 1 + + +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_get_config_info_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.get_config_info.return_value = ConfigInfo( + valid=True, + source_count=1, + target_count=1, + sources=["vault/prod"], + targets=["aws/prod"], + has_merge_store=True, + vault_address="https://vault.example.com", + aws_region="us-east-1", + ) + + result = get_config_info("config.yaml") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["sources"], ExtendedList) + assert isinstance(result["sources"][0], ExtendedString) + assert result["targets"] == ["aws/prod"] + + +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_get_targets_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.get_targets.return_value = (["prod", "dev"], "") + + result = get_targets("config.yaml") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["targets"], ExtendedList) + assert isinstance(result["targets"][0], ExtendedString) + assert result["count"] == 2 + + +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_get_sources_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.get_sources.return_value = (["vault/prod"], "") + + result = get_sources("config.yaml") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["sources"], ExtendedList) + assert isinstance(result["sources"][0], ExtendedString) + assert result["count"] == 1 From 5bfdb7a2bbe9ef0c18a661d6640e7a9e93869f01 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:31:07 -0500 Subject: [PATCH 072/287] feat: promote meshy tool payloads --- src/extended_data/connectors/meshy/tools.py | 58 +++++++++++---------- tests/connectors/meshy/test_tools.py | 27 +++++++++- 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index ac9511b..0320fef 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -12,6 +12,8 @@ from pydantic import BaseModel, Field +from extended_data.containers import extend_data + # ============================================================================= # Pydantic Schemas for Tool Inputs @@ -118,11 +120,11 @@ def _extract_result_fields(result: object) -> dict[str, object]: # Extract thumbnail_url thumbnail_url = getattr(result, "thumbnail_url", None) - return { + return extend_data({ "status": status, "model_url": model_url, "thumbnail_url": thumbnail_url, - } + }) def text3d_generate( @@ -158,17 +160,17 @@ def text3d_generate( ) if isinstance(result, str): - return { + return extend_data({ "task_id": result, "status": "pending", "message": "Text-to-3D task submitted", - } + }) fields = _extract_result_fields(result) - return { + return extend_data({ "task_id": result.id, **fields, - } + }) def image3d_generate( @@ -199,17 +201,17 @@ def image3d_generate( ) if isinstance(result, str): - return { + return extend_data({ "task_id": result, "status": "pending", "message": "Image-to-3D task submitted", - } + }) fields = _extract_result_fields(result) - return { + return extend_data({ "task_id": result.id, **fields, - } + }) def rig_model(model_id: str, wait: bool = True) -> dict[str, Any]: @@ -227,18 +229,18 @@ def rig_model(model_id: str, wait: bool = True) -> dict[str, Any]: result = rigging.rig(model_id, wait=wait) if isinstance(result, str): - return { + return extend_data({ "task_id": result, "status": "pending", "message": "Rigging task submitted", - } + }) if wait: - return { + return extend_data({ "task_id": result.id, "status": result.status.value if hasattr(result.status, "value") else str(result.status), "message": "Rigging completed", - } + }) msg = "Expected rigging task id when wait=False" raise TypeError(msg) @@ -260,19 +262,19 @@ def apply_animation(model_id: str, animation_id: int, wait: bool = True) -> dict result = animate.apply(model_id, int(animation_id), wait=wait) if isinstance(result, str): - return { + return extend_data({ "task_id": result, "status": "pending", "message": "Animation task submitted", - } + }) if wait: - return { + return extend_data({ "task_id": result.id, "status": result.status.value if hasattr(result.status, "value") else str(result.status), "message": "Animation completed", "glb_url": result.animation_glb_url, - } + }) msg = "Expected animation task id when wait=False" raise TypeError(msg) @@ -305,19 +307,19 @@ def retexture_model( ) if isinstance(result, str): - return { + return extend_data({ "task_id": result, "status": "pending", "message": "Retexture task submitted", - } + }) if wait: - return { + return extend_data({ "task_id": result.id, "status": result.status.value if hasattr(result.status, "value") else str(result.status), "message": "Retexture completed", "model_url": getattr(result, "model_url", None), - } + }) msg = "Expected retexture task id when wait=False" raise TypeError(msg) @@ -351,11 +353,11 @@ def list_animations(category: str = "", limit: int = 50) -> dict[str, Any]: } ) - return { + return extend_data({ "count": len(results), "total": len(animations), "animations": results, - } + }) def check_task_status(task_id: str, task_type: str = "text-to-3d") -> dict[str, Any]: @@ -393,12 +395,12 @@ def check_task_status(task_id: str, task_type: str = "text-to-3d") -> dict[str, elif hasattr(result, "glb_url"): model_url = result.glb_url - return { + return extend_data({ "task_id": task_id, "status": status, "progress": getattr(result, "progress", None), "model_url": model_url, - } + }) def get_animation(animation_id: int) -> dict[str, Any]: @@ -417,13 +419,13 @@ def get_animation(animation_id: int) -> dict[str, Any]: anim = ANIMATIONS[animation_id] - return { + return extend_data({ "id": anim.id, "name": anim.name, "category": anim.category, "subcategory": anim.subcategory, "preview_url": anim.preview_url, - } + }) # ============================================================================= diff --git a/tests/connectors/meshy/test_tools.py b/tests/connectors/meshy/test_tools.py index 170f999..f600801 100644 --- a/tests/connectors/meshy/test_tools.py +++ b/tests/connectors/meshy/test_tools.py @@ -7,10 +7,14 @@ from __future__ import annotations +import importlib.util + from unittest.mock import MagicMock, patch import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + # Expected tools list - canonical reference for all Meshy tools EXPECTED_MESHY_TOOLS = { @@ -80,6 +84,8 @@ def test_successful_generation(self): art_style="realistic", ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["task_id"], ExtendedString) assert result["task_id"] == "task_123" assert result["status"] == "SUCCEEDED" assert result["model_url"] == "https://example.com/model.glb" @@ -114,6 +120,7 @@ def test_generation_with_defaults(self): wait=True, ) + assert isinstance(result, ExtendedDict) assert result["task_id"] == "task_456" @@ -137,6 +144,8 @@ def test_successful_image_to_3d(self): topology="quad", ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["model_url"], ExtendedString) assert result["task_id"] == "img_task_456" assert result["status"] == "SUCCEEDED" assert result["model_url"] == "https://example.com/img_model.glb" @@ -156,6 +165,8 @@ def test_successful_rigging_with_wait(self): with patch("extended_data.connectors.meshy.rigging.rig", return_value=mock_result): result = rig_model(model_id="model_123", wait=True) + assert isinstance(result, ExtendedDict) + assert isinstance(result["message"], ExtendedString) assert result["task_id"] == "rig_789" assert result["status"] == "SUCCEEDED" assert "Rigging completed" in result["message"] @@ -168,6 +179,7 @@ def test_rigging_without_wait(self): with patch("extended_data.connectors.meshy.rigging.rig", return_value="pending_rig_task"): result = rig_model(model_id="model_123", wait=False) + assert isinstance(result, ExtendedDict) assert result["task_id"] == "pending_rig_task" assert result["status"] == "pending" @@ -191,6 +203,8 @@ def test_successful_animation(self): wait=True, ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["glb_url"], ExtendedString) assert result["task_id"] == "anim_task_123" assert result["status"] == "SUCCEEDED" assert result["glb_url"] == "https://example.com/animated.glb" @@ -206,6 +220,7 @@ def test_animation_without_wait(self): wait=False, ) + assert isinstance(result, ExtendedDict) assert result["task_id"] == "anim_pending" assert result["status"] == "pending" @@ -228,6 +243,7 @@ def test_successful_retexture(self): texture_prompt="golden metallic finish", ) + assert isinstance(result, ExtendedDict) assert result["task_id"] == "retex_123" assert result["status"] == "SUCCEEDED" @@ -257,6 +273,9 @@ def test_list_all_animations(self): with patch("extended_data.connectors.meshy.animations.ANIMATIONS", mock_animations): result = list_animations() + assert isinstance(result, ExtendedDict) + assert isinstance(result["animations"], ExtendedList) + assert isinstance(result["animations"][0], ExtendedDict) assert result["count"] == 2 assert result["total"] == 2 assert len(result["animations"]) == 2 @@ -282,6 +301,8 @@ def test_list_animations_with_category_filter(self): with patch("extended_data.connectors.meshy.animations.ANIMATIONS", mock_animations): result = list_animations(category="Fighting") + assert isinstance(result, ExtendedDict) + assert isinstance(result["animations"][0]["name"], ExtendedString) assert result["count"] == 1 assert result["animations"][0]["name"] == "Punch" @@ -324,6 +345,8 @@ def test_check_text3d_status(self): task_type="text-to-3d", ) + assert isinstance(result, ExtendedDict) + assert isinstance(result["task_id"], ExtendedString) assert result["task_id"] == "task_123" assert result["status"] == "SUCCEEDED" assert result["progress"] == 100 @@ -357,6 +380,8 @@ def test_get_existing_animation(self): with patch("extended_data.connectors.meshy.animations.ANIMATIONS", {42: mock_anim}): result = get_animation(animation_id=42) + assert isinstance(result, ExtendedDict) + assert isinstance(result["name"], ExtendedString) assert result["id"] == 42 assert result["name"] == "Dance" assert result["preview_url"] == "https://example.com/preview.gif" @@ -374,7 +399,7 @@ class TestLangChainTools: """Tests for LangChain tools (optional dependency).""" @pytest.mark.skipif( - not pytest.importorskip("langchain_core", reason="langchain-core not installed"), + importlib.util.find_spec("langchain_core") is None, reason="langchain-core not installed", ) def test_get_langchain_tools_returns_structured_tools(self): From 84c24f0e59a46a8140d411418d3c8d8fbfeaee28 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:33:44 -0500 Subject: [PATCH 073/287] feat: promote meshy task ids --- src/extended_data/connectors/meshy/animate.py | 3 +- src/extended_data/connectors/meshy/image3d.py | 5 +- .../connectors/meshy/retexture.py | 3 +- src/extended_data/connectors/meshy/rigging.py | 3 +- src/extended_data/connectors/meshy/text3d.py | 5 +- tests/connectors/meshy/test_task_ids.py | 73 +++++++++++++++++++ 6 files changed, 85 insertions(+), 7 deletions(-) create mode 100644 tests/connectors/meshy/test_task_ids.py diff --git a/src/extended_data/connectors/meshy/animate.py b/src/extended_data/connectors/meshy/animate.py index a215e30..b292d0e 100644 --- a/src/extended_data/connectors/meshy/animate.py +++ b/src/extended_data/connectors/meshy/animate.py @@ -17,6 +17,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import AnimationRequest, AnimationResult, TaskStatus +from extended_data.containers import extend_data def create(request: AnimationRequest) -> str: @@ -27,7 +28,7 @@ def create(request: AnimationRequest) -> str: version="v1", json=request.model_dump(exclude_none=True), ) - return response.json().get("result") + return extend_data(response.json().get("result")) def get(task_id: str) -> AnimationResult: diff --git a/src/extended_data/connectors/meshy/image3d.py b/src/extended_data/connectors/meshy/image3d.py index 5cda9f3..0874349 100644 --- a/src/extended_data/connectors/meshy/image3d.py +++ b/src/extended_data/connectors/meshy/image3d.py @@ -13,6 +13,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import Image3DRequest, Image3DResult, TaskStatus +from extended_data.containers import extend_data def create(request: Image3DRequest) -> str: @@ -26,7 +27,7 @@ def create(request: Image3DRequest) -> str: data = response.json() if "result" not in data: raise RuntimeError(f"Unexpected API response: missing 'result' key. Response: {data}") - return data["result"] + return extend_data(data["result"]) def get(task_id: str) -> Image3DResult: @@ -46,7 +47,7 @@ def refine(task_id: str) -> str: data = response.json() if "result" not in data: raise RuntimeError(f"Unexpected API response: missing 'result' key. Response: {data}") - return data["result"] + return extend_data(data["result"]) def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Image3DResult: diff --git a/src/extended_data/connectors/meshy/retexture.py b/src/extended_data/connectors/meshy/retexture.py index 422d63f..c3c6833 100644 --- a/src/extended_data/connectors/meshy/retexture.py +++ b/src/extended_data/connectors/meshy/retexture.py @@ -12,6 +12,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import RetextureRequest, RetextureResult, TaskStatus +from extended_data.containers import extend_data def create(request: RetextureRequest) -> str: @@ -22,7 +23,7 @@ def create(request: RetextureRequest) -> str: version="v1", json=request.model_dump(exclude_none=True), ) - return response.json().get("result") + return extend_data(response.json().get("result")) def get(task_id: str) -> RetextureResult: diff --git a/src/extended_data/connectors/meshy/rigging.py b/src/extended_data/connectors/meshy/rigging.py index bc3195a..93f458a 100644 --- a/src/extended_data/connectors/meshy/rigging.py +++ b/src/extended_data/connectors/meshy/rigging.py @@ -12,6 +12,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import RiggingRequest, RiggingResult, TaskStatus +from extended_data.containers import extend_data def create(request: RiggingRequest) -> str: @@ -22,7 +23,7 @@ def create(request: RiggingRequest) -> str: version="v1", json=request.model_dump(exclude_none=True), ) - return response.json().get("result") + return extend_data(response.json().get("result")) def get(task_id: str) -> RiggingResult: diff --git a/src/extended_data/connectors/meshy/text3d.py b/src/extended_data/connectors/meshy/text3d.py index 05fa743..aa52806 100644 --- a/src/extended_data/connectors/meshy/text3d.py +++ b/src/extended_data/connectors/meshy/text3d.py @@ -13,6 +13,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import ArtStyle, TaskStatus, Text3DRequest, Text3DResult +from extended_data.containers import extend_data def create(request: Text3DRequest) -> str: @@ -23,7 +24,7 @@ def create(request: Text3DRequest) -> str: version="v2", json=request.model_dump(exclude_none=True), ) - return response.json().get("result") + return extend_data(response.json().get("result")) def get(task_id: str) -> Text3DResult: @@ -40,7 +41,7 @@ def refine(task_id: str) -> str: version="v2", json={}, ) - return response.json().get("result") + return extend_data(response.json().get("result")) def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Text3DResult: diff --git a/tests/connectors/meshy/test_task_ids.py b/tests/connectors/meshy/test_task_ids.py new file mode 100644 index 0000000..e9ee7f5 --- /dev/null +++ b/tests/connectors/meshy/test_task_ids.py @@ -0,0 +1,73 @@ +"""Tests for Meshy task-id API helpers.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from extended_data.connectors.meshy import animate, image3d, retexture, rigging, text3d +from extended_data.connectors.meshy.models import ( + AnimationRequest, + Image3DRequest, + RetextureRequest, + RiggingRequest, + Text3DRequest, +) +from extended_data.containers import ExtendedString + + +def _task_response(task_id: str) -> MagicMock: + response = MagicMock() + response.json.return_value = {"result": task_id} + return response + + +def test_text3d_task_ids_are_extended_strings() -> None: + with patch("extended_data.connectors.meshy.text3d.base.request", return_value=_task_response("text-task")): + created = text3d.create(Text3DRequest(prompt="a sword")) + refined = text3d.refine("text-task") + + assert isinstance(created, ExtendedString) + assert isinstance(refined, ExtendedString) + assert created == "text-task" + assert refined == "text-task" + + +def test_image3d_task_ids_are_extended_strings() -> None: + with patch("extended_data.connectors.meshy.image3d.base.request", return_value=_task_response("image-task")): + created = image3d.create(Image3DRequest(image_url="https://example.com/source.png")) + refined = image3d.refine("image-task") + + assert isinstance(created, ExtendedString) + assert isinstance(refined, ExtendedString) + assert created == "image-task" + assert refined == "image-task" + + +def test_animation_task_id_is_extended_string() -> None: + request = AnimationRequest(rig_task_id="rig-task", action_id=42) + + with patch("extended_data.connectors.meshy.animate.base.request", return_value=_task_response("animation-task")): + created = animate.create(request) + + assert isinstance(created, ExtendedString) + assert created == "animation-task" + + +def test_rigging_task_id_is_extended_string() -> None: + request = RiggingRequest(input_task_id="model-task") + + with patch("extended_data.connectors.meshy.rigging.base.request", return_value=_task_response("rig-task")): + created = rigging.create(request) + + assert isinstance(created, ExtendedString) + assert created == "rig-task" + + +def test_retexture_task_id_is_extended_string() -> None: + request = RetextureRequest(input_task_id="model-task", text_style_prompt="gold") + + with patch("extended_data.connectors.meshy.retexture.base.request", return_value=_task_response("retexture-task")): + created = retexture.create(request) + + assert isinstance(created, ExtendedString) + assert created == "retexture-task" From 08c21af1ea7c83d58e7a7a4d8b527420892b2de6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:37:27 -0500 Subject: [PATCH 074/287] feat: promote cloud helper payloads --- src/extended_data/connectors/cloud_params.py | 17 ++++++++-------- .../connectors/cursor/__init__.py | 2 +- tests/connectors/test_cloud_params.py | 5 +++++ tests/connectors/test_cursor.py | 20 +++++++++++++++++++ 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/extended_data/connectors/cloud_params.py b/src/extended_data/connectors/cloud_params.py index 5718bad..f1078c0 100644 --- a/src/extended_data/connectors/cloud_params.py +++ b/src/extended_data/connectors/cloud_params.py @@ -17,6 +17,7 @@ from typing import Any from extended_data import is_nothing, lower_first_char, upper_first_char +from extended_data.containers import ExtendedDict, extend_data def get_cloud_call_params( @@ -26,7 +27,7 @@ def get_cloud_call_params( first_letter_to_lower: bool = False, first_letter_to_upper: bool = False, **kwargs: Any, -) -> dict[str, Any]: +) -> ExtendedDict: """Build a parameter dictionary for cloud API calls. This function creates properly formatted parameter dictionaries for @@ -42,7 +43,7 @@ def get_cloud_call_params( **kwargs: Additional parameters to include. Returns: - A dictionary of parameters ready for the cloud API call. + An extended dictionary of parameters ready for the cloud API call. Examples: >>> get_cloud_call_params(max_results=100, NextToken="abc123") @@ -58,7 +59,7 @@ def get_cloud_call_params( params["MaxResults"] = max_results if not first_letter_to_lower and not first_letter_to_upper: - return params + return extend_data(params) if first_letter_to_lower: params = {lower_first_char(k): v for k, v in params.items()} @@ -66,10 +67,10 @@ def get_cloud_call_params( if first_letter_to_upper: params = {upper_first_char(k): v for k, v in params.items()} - return params + return extend_data(params) -def get_aws_call_params(max_results: int | None = 100, **kwargs: Any) -> dict[str, Any]: +def get_aws_call_params(max_results: int | None = 100, **kwargs: Any) -> ExtendedDict: """Build parameters for AWS API calls. AWS APIs typically use PascalCase keys (e.g., MaxResults, NextToken). @@ -80,7 +81,7 @@ def get_aws_call_params(max_results: int | None = 100, **kwargs: Any) -> dict[st **kwargs: Additional parameters (will be PascalCased). Returns: - Parameter dictionary with PascalCase keys. + Extended parameter dictionary with PascalCase keys. Examples: >>> get_aws_call_params(NextToken="abc") @@ -94,7 +95,7 @@ def get_aws_call_params(max_results: int | None = 100, **kwargs: Any) -> dict[st def get_google_call_params( max_results: int | None = 200, no_max_results: bool = False, **kwargs: Any -) -> dict[str, Any]: +) -> ExtendedDict: """Build parameters for Google Cloud API calls. Google APIs typically use camelCase keys (e.g., maxResults, pageToken). @@ -106,7 +107,7 @@ def get_google_call_params( **kwargs: Additional parameters (will be camelCased). Returns: - Parameter dictionary with camelCase keys. + Extended parameter dictionary with camelCase keys. Examples: >>> get_google_call_params(pageToken="xyz") diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index 70a2542..bdaab42 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -623,6 +623,6 @@ def list_models(self) -> list[str]: self.logger.info("Listing models") data = self._request_api("/models") if not data: - return [] + return self.extend_result([]) return self.extend_result(data.get("models", [])) diff --git a/tests/connectors/test_cloud_params.py b/tests/connectors/test_cloud_params.py index a590af0..e0067fc 100644 --- a/tests/connectors/test_cloud_params.py +++ b/tests/connectors/test_cloud_params.py @@ -7,6 +7,7 @@ get_cloud_call_params, get_google_call_params, ) +from extended_data.containers import ExtendedDict, ExtendedString class TestGetCloudCallParams: @@ -15,6 +16,7 @@ class TestGetCloudCallParams: def test_default_max_results(self): """Default max_results is 10.""" params = get_cloud_call_params() + assert isinstance(params, ExtendedDict) assert params == {"MaxResults": 10} def test_custom_max_results(self): @@ -35,6 +37,7 @@ def test_max_results_zero(self): def test_kwargs_included(self): """Additional kwargs are included.""" params = get_cloud_call_params(NextToken="abc123") + assert isinstance(params["NextToken"], ExtendedString) assert params == {"MaxResults": 10, "NextToken": "abc123"} def test_reject_null_values(self): @@ -64,6 +67,7 @@ class TestGetAwsCallParams: def test_default_max_results(self): """AWS default max_results is 100.""" params = get_aws_call_params() + assert isinstance(params, ExtendedDict) assert params == {"MaxResults": 100} def test_first_letter_upper(self): @@ -91,6 +95,7 @@ class TestGetGoogleCallParams: def test_default_max_results(self): """Google default max_results is 200.""" params = get_google_call_params() + assert isinstance(params, ExtendedDict) assert params == {"maxResults": 200} def test_first_letter_lower(self): diff --git a/tests/connectors/test_cursor.py b/tests/connectors/test_cursor.py index c03ee32..a7a929b 100644 --- a/tests/connectors/test_cursor.py +++ b/tests/connectors/test_cursor.py @@ -344,3 +344,23 @@ def test_list_models_returns_extended_list(self, mock_client_class): assert isinstance(models, ExtendedList) assert isinstance(models[0], ExtendedString) assert models[0].to_snake_case() == "cursor_small" + + @patch("extended_data.connectors.cursor.httpx.Client") + def test_list_models_empty_response_returns_extended_list(self, mock_client_class): + """list_models should extend the empty response path too.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.text = "{}" + mock_response.json.return_value = {} + mock_client.request.return_value = mock_response + + connector = CursorConnector(api_key="test-key") + models = connector.list_models() + + assert isinstance(models, ExtendedList) + assert models == [] From e5ea1bdfe9d8e01173cd19a017330207d3a5ef47 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:41:59 -0500 Subject: [PATCH 075/287] feat: promote connector metadata payloads --- src/extended_data/connectors/_optional.py | 22 ++++++++------- src/extended_data/connectors/cli.py | 16 ++++++----- src/extended_data/connectors/connectors.py | 5 ++-- src/extended_data/connectors/registry.py | 17 +++++------ tests/connectors/test_connectors.py | 14 +++++++++- .../connectors/test_optional_dependencies.py | 28 +++++++++++++++++++ 6 files changed, 74 insertions(+), 28 deletions(-) diff --git a/src/extended_data/connectors/_optional.py b/src/extended_data/connectors/_optional.py index 7da4b40..54ac540 100644 --- a/src/extended_data/connectors/_optional.py +++ b/src/extended_data/connectors/_optional.py @@ -20,6 +20,8 @@ from typing import Any +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data + # Mapping of package names to their extras PACKAGE_TO_EXTRA: dict[str, str] = { @@ -162,27 +164,27 @@ def require_any(*packages: str, extra: str) -> Any: # === Framework Detection === -def detect_ai_frameworks() -> dict[str, bool]: +def detect_ai_frameworks() -> ExtendedDict: """Detect which AI frameworks are available. Returns: - Dict mapping framework name to availability + Extended dict mapping framework name to availability. """ - return { + return extend_data({ "langchain": is_available("langchain_core"), "crewai": is_available("crewai"), "strands": is_available("strands"), "mcp": is_available("mcp"), - } + }) -def get_available_ai_frameworks() -> list[str]: +def get_available_ai_frameworks() -> ExtendedList[ExtendedString]: """Get list of available AI frameworks. Returns: - List of framework names that are installed + Extended list of framework names that are installed. """ - return [name for name, available in detect_ai_frameworks().items() if available] + return extend_data([name for name, available in detect_ai_frameworks().items() if available]) # === Connector Availability === @@ -264,13 +266,13 @@ def is_connector_available(connector: str) -> bool: return not get_missing_connector_requirements(connector) -def get_available_connectors() -> list[str]: +def get_available_connectors() -> ExtendedList[ExtendedString]: """Get list of connectors with available dependencies. Returns: - List of connector names that can be used + Extended list of connector names that can be used. """ - return [name for name in CONNECTOR_REQUIREMENTS if is_connector_available(name)] + return extend_data([name for name in CONNECTOR_REQUIREMENTS if is_connector_available(name)]) def require_connector(connector: str) -> None: diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index fe8407c..56580a3 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -29,6 +29,7 @@ get_connector_info, list_connector_info, ) +from extended_data.containers import ExtendedList from extended_data.containers.factory import to_builtin @@ -71,11 +72,11 @@ def _parse_arg_value(value: str) -> Any: return value -def _format_list(values: list[str] | tuple[str, ...] | None) -> str: +def _format_list(values: list[Any] | tuple[Any, ...] | ExtendedList[Any] | None) -> str: """Format a list-like metadata field for CLI output.""" if not values: return "-" - return ", ".join(values) + return ", ".join(str(value) for value in values) def _write_stdout(message: str) -> None: @@ -104,10 +105,11 @@ def cmd_list(args: argparse.Namespace) -> int: _write_stdout(f"{'name':<18} {'status':<11} {'extra':<10} {'class':<28} install") for c in info: status = "available" if c["available"] else "missing" - extra = c.get("extra") or "-" - class_name = c.get("class") or "-" - install = c.get("install") or "-" - _write_stdout(f"{c['name']:<18} {status:<11} {extra:<10} {class_name:<28} {install}") + name = str(c["name"]) + extra = str(c.get("extra") or "-") + class_name = str(c.get("class") or "-") + install = str(c.get("install") or "-") + _write_stdout(f"{name:<18} {status:<11} {extra:<10} {class_name:<28} {install}") return 0 @@ -214,7 +216,7 @@ def cmd_info(args: argparse.Namespace) -> int: "error", ): value = info.get(key) - if isinstance(value, list): + if isinstance(value, list | tuple | ExtendedList): value = _format_list(value) _write_stdout(f"{key}: {value if value is not None else '-'}") return 0 diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 63f8d92..831af14 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -22,6 +22,7 @@ list_connectors as list_registered_connectors, ) from extended_data.connectors.zoom import ZoomConnector +from extended_data.containers import ExtendedDict, ExtendedList from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -94,11 +95,11 @@ def list_connectors(self) -> dict[str, Any]: """List connector classes available in the current environment.""" return list_registered_connectors() - def list_connector_info(self, *, include_unavailable: bool = True) -> list[dict[str, Any]]: + def list_connector_info(self, *, include_unavailable: bool = True) -> ExtendedList[ExtendedDict]: """List connector catalog metadata.""" return list_registered_connector_info(include_unavailable=include_unavailable) - def get_connector_info(self, name: str, *, include_unavailable: bool = True) -> dict[str, Any]: + def get_connector_info(self, name: str, *, include_unavailable: bool = True) -> ExtendedDict: """Get catalog metadata for one connector.""" return get_registered_connector_info(name, include_unavailable=include_unavailable) diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 98cfab0..6b420d1 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -41,6 +41,7 @@ get_extra_for_connector, get_missing_connector_requirements, ) +from extended_data.containers import ExtendedDict, ExtendedList, extend_data if TYPE_CHECKING: @@ -73,9 +74,9 @@ class ConnectorInfo: description: str | None error: str | None - def as_dict(self) -> dict[str, Any]: - """Return JSON-friendly connector metadata.""" - return { + def as_dict(self) -> ExtendedDict: + """Return extended JSON-friendly connector metadata.""" + return extend_data({ "name": self.name, "available": self.available, "source": self.source, @@ -88,7 +89,7 @@ def as_dict(self) -> dict[str, Any]: "base_url": self.base_url, "description": self.description, "error": self.error, - } + }) BUILTIN_CONNECTORS: dict[str, BuiltinConnectorSpec] = { @@ -316,7 +317,7 @@ def _missing_builtin_connector_info(name: str, error: ImportError | None) -> Con # ============================================================================= -def get_connector_info(name: str, *, include_unavailable: bool = True) -> dict[str, Any]: +def get_connector_info(name: str, *, include_unavailable: bool = True) -> ExtendedDict: """Get registry metadata about a connector.""" connector_name = _normalize_connector_name(name) connectors = _discover_connectors() @@ -336,7 +337,7 @@ def get_connector_info(name: str, *, include_unavailable: bool = True) -> dict[s raise ValueError(f"Unknown connector: {name}. Available: {available}") -def list_connector_info(*, include_unavailable: bool = True) -> list[dict[str, Any]]: +def list_connector_info(*, include_unavailable: bool = True) -> ExtendedList[ExtendedDict]: """Get registry metadata for known connectors.""" connectors = _discover_connectors() names = set(connectors) @@ -345,5 +346,5 @@ def list_connector_info(*, include_unavailable: bool = True) -> list[dict[str, A names.update(_missing_builtin_connectors) info = [get_connector_info(name, include_unavailable=include_unavailable) for name in sorted(names)] if not include_unavailable: - return [connector for connector in info if connector["available"]] - return info + return extend_data([connector for connector in info if connector["available"]]) + return extend_data(info) diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index ca2f692..3456c64 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -8,6 +8,7 @@ from extended_data.connectors import registry from extended_data.connectors.connectors import ConnectorFabric +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString # Helper to check if optional dependencies are available @@ -138,9 +139,14 @@ def test_connector_fabric_exposes_catalog_info(self): info = vc.list_connector_info() names = {connector["name"] for connector in info} + assert isinstance(info, ExtendedList) + assert isinstance(info[0], ExtendedDict) + assert isinstance(info[0]["name"], ExtendedString) assert "cursor" in names assert "github" in names - assert vc.get_connector_info(" github ")["name"] == "github" + github_info = vc.get_connector_info(" github ") + assert isinstance(github_info, ExtendedDict) + assert github_info["name"] == "github" assert isinstance(vc.list_connectors(), dict) @requires_boto3 @@ -350,6 +356,8 @@ def test_get_connector_info_includes_known_missing_builtin(self, monkeypatch): info = registry.get_connector_info(" github ") + assert isinstance(info, ExtendedDict) + assert isinstance(info["name"], ExtendedString) assert info["name"] == "github" assert info["available"] is False assert info["extra"] == "github" @@ -371,6 +379,7 @@ def test_get_connector_info_reports_unregistered_builtin_entry_point(self, monke info = registry.get_connector_info(" github ") + assert isinstance(info, ExtendedDict) assert info["name"] == "github" assert info["available"] is False assert info["extra"] == "github" @@ -383,6 +392,8 @@ def test_builtin_with_missing_requirements_is_unavailable(self): if not _has_module("boto3"): info = registry.get_connector_info("aws") + assert isinstance(info, ExtendedDict) + assert isinstance(info["missing"], ExtendedList) assert info["available"] is False assert info["missing"] == ["boto3"] @@ -395,4 +406,5 @@ def test_available_only_catalog_filters_missing_builtins(self): info = registry.list_connector_info(include_unavailable=False) + assert isinstance(info, ExtendedList) assert all(connector["available"] for connector in info) diff --git a/tests/connectors/test_optional_dependencies.py b/tests/connectors/test_optional_dependencies.py index 5e63a1e..38b5910 100644 --- a/tests/connectors/test_optional_dependencies.py +++ b/tests/connectors/test_optional_dependencies.py @@ -9,6 +9,7 @@ import tomlkit from extended_data.connectors import _optional, registry +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString REPO_ROOT = Path(__file__).resolve().parents[2] @@ -91,3 +92,30 @@ def test_get_crewai_tool_decorator_rejects_incompatible_crewai(monkeypatch) -> N with pytest.raises(ImportError, match="does not expose it"): _optional.get_crewai_tool_decorator() + + +def test_framework_detection_returns_extended_metadata(monkeypatch) -> None: + """AI framework availability helpers return first-class extended values.""" + available = {"langchain_core": True, "crewai": False, "strands": True, "mcp": False} + monkeypatch.setattr(_optional, "is_available", lambda package: available[package]) + + detected = _optional.detect_ai_frameworks() + frameworks = _optional.get_available_ai_frameworks() + + assert isinstance(detected, ExtendedDict) + assert detected == {"langchain": True, "crewai": False, "strands": True, "mcp": False} + assert isinstance(frameworks, ExtendedList) + assert frameworks == ["langchain", "strands"] + assert isinstance(frameworks[0], ExtendedString) + + +def test_available_connectors_returns_extended_names(monkeypatch) -> None: + """Connector availability helper returns first-class extended names.""" + monkeypatch.setattr(_optional, "is_connector_available", lambda connector: connector in {"cursor", "meshy"}) + + connectors = _optional.get_available_connectors() + + assert isinstance(connectors, ExtendedList) + assert "cursor" in connectors + assert "meshy" in connectors + assert isinstance(connectors[0], ExtendedString) From d9ec996faf9d597348de60743746167d071236e6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:44:17 -0500 Subject: [PATCH 076/287] feat: promote slack block payloads --- .../connectors/slack/__init__.py | 39 +++++++------- tests/connectors/test_slack_connector.py | 53 ++++++++++++++++++- 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 83200d2..66849a9 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -25,6 +25,7 @@ def batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: from extended_data import is_nothing, wrap_raw_data_for_export from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin from extended_data.logging import Logging @@ -64,31 +65,31 @@ def __init__(self, response: Any) -> None: super().__init__(f"Slack API error: {response}") -def get_divider() -> dict[str, str]: +def get_divider() -> ExtendedDict: """Return a Slack divider block. Returns: - dict[str, str]: Slack block definition for a divider element. + Extended Slack block definition for a divider element. """ - return {"type": "divider"} + return extend_data({"type": "divider"}) -def get_header_block(field_title: str) -> list[dict[str, Any]]: +def get_header_block(field_title: str) -> ExtendedList[ExtendedDict]: """Return header and divider blocks for a section title. Args: field_title: Title text to render in the header block. Returns: - list[dict[str, Any]]: Header block followed by a divider. + Extended Slack blocks containing a header followed by a divider. """ - return [ + return extend_data([ {"type": "header", "text": {"type": "plain_text", "text": field_title}}, get_divider(), - ] + ]) -def get_field_context_message_blocks(field_name: str, context_data: Mapping[str, Any]) -> list[dict[str, Any]]: +def get_field_context_message_blocks(field_name: str, context_data: Mapping[str, Any]) -> ExtendedList[ExtendedDict]: """Build header and context blocks for detailed field data. Args: @@ -96,10 +97,10 @@ def get_field_context_message_blocks(field_name: str, context_data: Mapping[str, context_data: Mapping of key/value pairs rendered inside context blocks. Returns: - list[dict[str, Any]]: Blocks describing the field data. + Extended Slack blocks describing the field data. """ field_title = field_name.title() - blocks: list[dict[str, Any]] = [ + blocks: list[Any] = [ {"type": "header", "text": {"type": "plain_text", "text": field_title}}, get_divider(), ] @@ -117,10 +118,10 @@ def get_field_context_message_blocks(field_name: str, context_data: Mapping[str, blocks.extend([{"type": "context", "elements": context_elements}, get_divider()]) - return blocks + return extend_data(blocks) -def get_key_value_blocks(k: str, v: Any) -> list[dict[str, Any]]: +def get_key_value_blocks(k: str, v: Any) -> ExtendedList[ExtendedDict]: """Format a key/value pair into Slack section blocks. Args: @@ -128,7 +129,7 @@ def get_key_value_blocks(k: str, v: Any) -> list[dict[str, Any]]: v: Value to render. Mappings are encoded to Slack-safe text. Returns: - list[dict[str, Any]]: Section block followed by a divider. + Extended Slack section block followed by a divider. """ k = k.title() if isinstance(v, Mapping): @@ -136,7 +137,7 @@ def get_key_value_blocks(k: str, v: Any) -> list[dict[str, Any]]: if not isinstance(v, str): v = str(v) - return [{"type": "section", "text": {"type": "mrkdwn", "text": f"*{k}*: {v}"}}, get_divider()] + return extend_data([{"type": "section", "text": {"type": "mrkdwn", "text": f"*{k}*: {v}"}}, get_divider()]) def get_rich_text_blocks( @@ -144,7 +145,7 @@ def get_rich_text_blocks( bold: bool = False, italic: bool = False, strike: bool = False, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """Build a rich text block for multiline messages. Args: @@ -154,7 +155,7 @@ def get_rich_text_blocks( strike: Whether to strike through the text. Returns: - list[dict[str, Any]]: Rich-text block followed by a divider. + Extended rich-text block followed by a divider. """ style: dict[str, bool] = {} if bold: @@ -171,7 +172,7 @@ def get_rich_text_blocks( element["style"] = style elements.append(element) - return [{"type": "rich_text", "elements": elements}, get_divider()] + return extend_data([{"type": "rich_text", "elements": elements}, get_divider()]) class SlackConnector(VendorConnectorBase): @@ -228,7 +229,7 @@ def send_message( self, channel_name: str, text: str, - blocks: list[dict[str, Any]] | None = None, + blocks: list[Any] | ExtendedList[ExtendedDict] | None = None, lines: list[str] | None = None, bold: bool = False, italic: bool = False, @@ -277,7 +278,7 @@ def send_message( opts["thread_ts"] = thread_id try: - return self.extend_result(self.bot_web_client.chat_postMessage(**opts).get("ts")) + return self.extend_result(self.bot_web_client.chat_postMessage(**to_builtin(opts)).get("ts")) except SlackApiError as exc: if raise_on_api_error: raise SlackAPIError(exc.response) from exc diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index fe60a6b..7a521a6 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -8,8 +8,15 @@ import pytest -from extended_data.connectors.slack import SlackConnector -from extended_data.containers import ExtendedDict, ExtendedString +from extended_data.connectors.slack import ( + SlackConnector, + get_divider, + get_field_context_message_blocks, + get_header_block, + get_key_value_blocks, + get_rich_text_blocks, +) +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString def test_slack_connector_requires_slack_sdk_when_constructed_without_extra(): @@ -21,6 +28,27 @@ def test_slack_connector_requires_slack_sdk_when_constructed_without_extra(): SlackConnector(token="xoxp-test", bot_token="xoxb-test", from_environment=False) +def test_slack_block_helpers_return_extended_payloads(): + """Slack block helper payloads are first-class extended containers.""" + divider = get_divider() + header = get_header_block("Deploys") + context = get_field_context_message_blocks("deploy", {"service": "api"}) + key_value = get_key_value_blocks("service", {"name": "api"}) + rich = get_rich_text_blocks(["hello"], bold=True) + + assert isinstance(divider, ExtendedDict) + assert isinstance(divider["type"], ExtendedString) + assert isinstance(header, ExtendedList) + assert isinstance(header[0], ExtendedDict) + assert isinstance(header[0]["text"], ExtendedDict) + assert isinstance(context, ExtendedList) + assert isinstance(context[0], ExtendedDict) + assert isinstance(key_value, ExtendedList) + assert isinstance(key_value[0]["text"], ExtendedDict) + assert isinstance(rich, ExtendedList) + assert isinstance(rich[0]["elements"], ExtendedList) + + class TestSlackConnector: """Test suite for SlackConnector.""" @@ -73,6 +101,27 @@ def test_send_message(self, mock_webclient_class, base_connector_kwargs): assert ts == "1234567890.123456" mock_bot_client.chat_postMessage.assert_called_once() + @patch("extended_data.connectors.slack.WebClient") + def test_send_message_converts_extended_blocks_for_sdk(self, mock_webclient_class, base_connector_kwargs): + """Slack SDK calls should receive builtin payloads even when helpers are extended.""" + mock_bot_client = MagicMock() + mock_bot_client.users_conversations.return_value = {"channels": [{"name": "general", "id": "C12345"}]} + mock_bot_client.chat_postMessage.return_value = {"ts": "1234567890.123456"} + + mock_user_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + connector.send_message(channel_name="general", text="Test message", lines=["hello"], bold=True) + + kwargs = mock_bot_client.chat_postMessage.call_args.kwargs + assert isinstance(kwargs["blocks"], list) + assert not isinstance(kwargs["blocks"], ExtendedList) + assert isinstance(kwargs["blocks"][0], dict) + assert not isinstance(kwargs["blocks"][0], ExtendedDict) + assert isinstance(kwargs["channel"], str) + @patch("extended_data.connectors.slack.SlackConnector._call_api") @patch("extended_data.connectors.slack.WebClient") def test_list_users_filters_deleted( From d1d0334e1afbab1a06e034289744e7a840816d37 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:47:17 -0500 Subject: [PATCH 077/287] feat: promote meshy operational payloads --- src/extended_data/connectors/meshy/README.md | 2 +- src/extended_data/connectors/meshy/jobs.py | 22 ++++++++------- .../connectors/meshy/webhooks/handler.py | 28 +++++++++---------- tests/connectors/meshy/test_jobs.py | 27 ++++++++++++------ tests/connectors/meshy/test_webhooks.py | 10 +++++++ 5 files changed, 55 insertions(+), 34 deletions(-) diff --git a/src/extended_data/connectors/meshy/README.md b/src/extended_data/connectors/meshy/README.md index 95cb259..3519fad 100644 --- a/src/extended_data/connectors/meshy/README.md +++ b/src/extended_data/connectors/meshy/README.md @@ -58,7 +58,7 @@ from extended_data.connectors.meshy.jobs import AssetGenerator, example_characte generator = AssetGenerator(output_root="client/public") manifest = generator.generate_model(example_character_spec(), wait=True) -print(manifest.model_path) +print(manifest["model_path"]) ``` Built-in example specs are available as: diff --git a/src/extended_data/connectors/meshy/jobs.py b/src/extended_data/connectors/meshy/jobs.py index c6db90b..8efa138 100644 --- a/src/extended_data/connectors/meshy/jobs.py +++ b/src/extended_data/connectors/meshy/jobs.py @@ -15,6 +15,7 @@ from extended_data.connectors.meshy import base, text3d from extended_data.connectors.meshy.models import ArtStyle, AssetIntent, AssetSpec, Text3DRequest +from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin @dataclass @@ -37,8 +38,9 @@ def __post_init__(self) -> None: if self.metadata is None: self.metadata = {} - def to_dict(self) -> dict[str, Any]: - return asdict(self) + def to_dict(self) -> ExtendedDict: + """Return an extended manifest payload.""" + return extend_data(asdict(self)) class AssetGenerator: @@ -58,8 +60,8 @@ def _generate_asset_id(self, spec: AssetSpec) -> str: desc_hash = hashlib.sha256(spec.description.encode()).hexdigest()[:8] return f"{spec.intent.value}_{desc_hash}" - def generate_model(self, spec: AssetSpec, wait: bool = True, poll_interval: float = 5.0) -> AssetManifest: - """Generate 3D model from spec.""" + def generate_model(self, spec: AssetSpec, wait: bool = True, poll_interval: float = 5.0) -> ExtendedDict: + """Generate 3D model from spec and return an extended manifest payload.""" asset_id = self._generate_asset_id(spec) # Create task using text3d module @@ -85,7 +87,7 @@ def generate_model(self, spec: AssetSpec, wait: bool = True, poll_interval: floa ) if not wait: - return manifest + return manifest.to_dict() # Poll until complete result = text3d.poll(task_id, interval=poll_interval) @@ -119,12 +121,12 @@ def generate_model(self, spec: AssetSpec, wait: bool = True, poll_interval: floa # Save manifest manifest_path = output_dir / f"{asset_id}_manifest.json" with open(manifest_path, "w") as f: - json.dump(manifest.to_dict(), f, indent=2) + json.dump(to_builtin(manifest.to_dict()), f, indent=2) - return manifest + return manifest.to_dict() - def batch_generate(self, specs: list[AssetSpec], max_concurrent: int = 3) -> list[AssetManifest]: - """Generate multiple assets (respecting rate limits).""" + def batch_generate(self, specs: list[AssetSpec], max_concurrent: int = 3) -> ExtendedList[ExtendedDict]: + """Generate multiple assets and return extended manifest payloads.""" manifests = [] for spec in specs: @@ -134,7 +136,7 @@ def batch_generate(self, specs: list[AssetSpec], max_concurrent: int = 3) -> lis except Exception: # noqa: S112 - batch continues on individual failures continue - return manifests + return extend_data(manifests) # Example specs diff --git a/src/extended_data/connectors/meshy/webhooks/handler.py b/src/extended_data/connectors/meshy/webhooks/handler.py index f77b1bd..da6dab6 100644 --- a/src/extended_data/connectors/meshy/webhooks/handler.py +++ b/src/extended_data/connectors/meshy/webhooks/handler.py @@ -7,10 +7,10 @@ import hmac from datetime import datetime, timezone -from typing import Any from extended_data.connectors.meshy import base from extended_data.connectors.meshy.webhooks.schemas import MeshyWebhookPayload +from extended_data.containers import ExtendedDict, extend_data from ..persistence.repository import TaskRepository from ..persistence.schemas import ArtifactRecord @@ -51,28 +51,28 @@ def handle_signed_webhook( signature: str, project: str | None = None, spec_hash: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Verify a raw webhook payload before parsing and processing it.""" if not self.verify_signature(payload, signature): - return { + return extend_data({ "status": "error", "message": "Invalid webhook signature", - } + }) try: parsed_payload = MeshyWebhookPayload.model_validate_json(payload) except ValueError as exc: - return { + return extend_data({ "status": "error", "message": "Invalid webhook payload", "error": str(exc), - } + }) return self.handle_webhook(parsed_payload, project=project, spec_hash=spec_hash) def handle_webhook( self, payload: MeshyWebhookPayload, project: str | None = None, spec_hash: str | None = None - ) -> dict[str, Any]: + ) -> ExtendedDict: """Process webhook payload and update repository. Args: @@ -81,16 +81,16 @@ def handle_webhook( spec_hash: Optional spec hash (will search if not provided) Returns: - Dict with status and details + Extended dict with status and details. """ task_lookup = self.repository.find_task_by_id(task_id=payload.id, project=project) if not task_lookup: - return { + return extend_data({ "status": "error", "message": f"Task {payload.id} not found in repository", "task_id": payload.id, - } + }) found_project, found_spec_hash, asset_manifest = task_lookup @@ -101,11 +101,11 @@ def handle_webhook( break if not service_name: - return { + return extend_data({ "status": "error", "message": f"Task {payload.id} not found in task graph", "task_id": payload.id, - } + }) error_message = None if payload.status == "FAILED": @@ -137,7 +137,7 @@ def handle_webhook( error=error_message, ) - return { + return extend_data({ "status": "success", "task_id": payload.id, "project": found_project, @@ -145,7 +145,7 @@ def handle_webhook( "service": service_name, "task_status": payload.status, "artifacts_downloaded": len(artifacts), - } + }) def _download_glb_artifact(self, project: str, spec_hash: str, service: str, glb_url: str) -> ArtifactRecord | None: """Download GLB artifact and create record.""" diff --git a/tests/connectors/meshy/test_jobs.py b/tests/connectors/meshy/test_jobs.py index dc76db7..9129093 100644 --- a/tests/connectors/meshy/test_jobs.py +++ b/tests/connectors/meshy/test_jobs.py @@ -22,6 +22,7 @@ Text3DResult, TextureUrls, ) +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString class TestAssetManifest: @@ -50,6 +51,8 @@ def test_manifest_to_dict(self): model_path="models/test.glb", ) data = manifest.to_dict() + assert isinstance(data, ExtendedDict) + assert isinstance(data["asset_id"], ExtendedString) assert data["asset_id"] == "test-001" assert data["model_path"] == "models/test.glb" @@ -125,9 +128,11 @@ def test_generate_model_no_wait(self, temp_dir): manifest = generator.generate_model(spec, wait=False) - assert manifest.asset_id == "project1-001" - assert manifest.task_id == "task-12345" - assert manifest.model_path is None # Not downloaded yet + assert isinstance(manifest, ExtendedDict) + assert isinstance(manifest["asset_id"], ExtendedString) + assert manifest["asset_id"] == "project1-001" + assert manifest["task_id"] == "task-12345" + assert manifest["model_path"] is None # Not downloaded yet mock_text3d.create.assert_called_once() def test_generate_model_with_wait(self, temp_dir): @@ -159,9 +164,10 @@ def test_generate_model_with_wait(self, temp_dir): manifest = generator.generate_model(spec, wait=True, poll_interval=0.01) - assert manifest.asset_id == "project1-001" - assert manifest.model_path is not None - assert "project1-001.glb" in manifest.model_path + assert isinstance(manifest, ExtendedDict) + assert manifest["asset_id"] == "project1-001" + assert manifest["model_path"] is not None + assert "project1-001.glb" in manifest["model_path"] mock_base.download.assert_called() def test_generate_model_saves_manifest_json(self, temp_dir): @@ -233,9 +239,11 @@ def test_batch_generate(self, temp_dir): manifests = generator.batch_generate(specs) + assert isinstance(manifests, ExtendedList) + assert isinstance(manifests[0], ExtendedDict) assert len(manifests) == 2 - assert manifests[0].asset_id == "item-001" - assert manifests[1].asset_id == "item-002" + assert manifests[0]["asset_id"] == "item-001" + assert manifests[1]["asset_id"] == "item-002" def test_batch_generate_continues_on_failure(self, temp_dir): """Test that batch generation continues if one fails.""" @@ -282,8 +290,9 @@ def create_side_effect(*args, **kwargs): manifests = generator.batch_generate(specs) # Only the successful one should be in results + assert isinstance(manifests, ExtendedList) assert len(manifests) == 1 - assert manifests[0].asset_id == "success-001" + assert manifests[0]["asset_id"] == "success-001" class TestExampleSpecs: diff --git a/tests/connectors/meshy/test_webhooks.py b/tests/connectors/meshy/test_webhooks.py index 327ed23..6d250bf 100644 --- a/tests/connectors/meshy/test_webhooks.py +++ b/tests/connectors/meshy/test_webhooks.py @@ -22,6 +22,7 @@ WebhookModelUrls, WebhookRiggingResult, ) +from extended_data.containers import ExtendedDict, ExtendedString class TestMeshyWebhookPayload: @@ -149,6 +150,8 @@ def test_handle_webhook_success(self, webhook_handler, mock_repository, webhook_ payload = MeshyWebhookPayload(**webhook_payload_succeeded) result = webhook_handler.handle_webhook(payload) + assert isinstance(result, ExtendedDict) + assert isinstance(result["status"], ExtendedString) assert result["status"] == "success" assert result["task_id"] == "task-12345-abcde" assert result["project"] == "project1" @@ -168,6 +171,8 @@ def test_handle_webhook_task_not_found(self, webhook_handler, mock_repository): ) result = webhook_handler.handle_webhook(payload) + assert isinstance(result, ExtendedDict) + assert isinstance(result["message"], ExtendedString) assert result["status"] == "error" assert "not found" in result["message"] @@ -194,6 +199,7 @@ def test_handle_webhook_failed_task(self, webhook_handler, mock_repository, webh payload = MeshyWebhookPayload(**webhook_payload_failed) result = webhook_handler.handle_webhook(payload) + assert isinstance(result, ExtendedDict) assert result["status"] == "success" # Handler succeeded assert result["task_status"] == "FAILED" # Task failed @@ -248,6 +254,7 @@ def mock_download(url, output_path): payload = MeshyWebhookPayload(**webhook_payload_succeeded) result = handler.handle_webhook(payload) + assert isinstance(result, ExtendedDict) assert result["artifacts_downloaded"] == 1 mock_base.download.assert_called_once() @@ -262,6 +269,7 @@ def test_handle_webhook_no_download_when_disabled(self, mock_repository, webhook payload = MeshyWebhookPayload(**webhook_payload_succeeded) result = handler.handle_webhook(payload) + assert isinstance(result, ExtendedDict) assert result["artifacts_downloaded"] == 0 mock_base.download.assert_not_called() @@ -301,6 +309,7 @@ def test_handle_signed_webhook_rejects_invalid_signature( result = handler.handle_signed_webhook(payload, "invalid") + assert isinstance(result, ExtendedDict) assert result == { "status": "error", "message": "Invalid webhook signature", @@ -322,6 +331,7 @@ def test_handle_signed_webhook_processes_valid_signature( result = handler.handle_signed_webhook(payload, signature) + assert isinstance(result, ExtendedDict) assert result["status"] == "success" assert result["task_id"] == "task-12345-abcde" mock_repository.record_task_update.assert_called_once() From 233c7a3473576fd63f178f5417e0b77fcbe9ec9e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:53:35 -0500 Subject: [PATCH 078/287] feat: promote meshy result payloads --- src/extended_data/connectors/meshy/README.md | 2 +- .../connectors/meshy/__init__.py | 6 +- src/extended_data/connectors/meshy/animate.py | 26 +++--- src/extended_data/connectors/meshy/image3d.py | 32 +++---- src/extended_data/connectors/meshy/jobs.py | 24 ++--- .../connectors/meshy/retexture.py | 32 +++---- src/extended_data/connectors/meshy/rigging.py | 32 +++---- src/extended_data/connectors/meshy/text3d.py | 30 ++++--- src/extended_data/connectors/meshy/tools.py | 61 ++++++++----- tests/connectors/meshy/test_jobs.py | 22 +++-- tests/connectors/meshy/test_task_ids.py | 89 ++++++++++++++++++- tests/connectors/meshy/test_tools.py | 39 +++++++- 12 files changed, 278 insertions(+), 117 deletions(-) diff --git a/src/extended_data/connectors/meshy/README.md b/src/extended_data/connectors/meshy/README.md index 3519fad..5038948 100644 --- a/src/extended_data/connectors/meshy/README.md +++ b/src/extended_data/connectors/meshy/README.md @@ -35,7 +35,7 @@ task_id = text3d.create( ) result = text3d.poll(task_id) -print(result.status) +print(result["status"]) ``` The package also exposes `image3d`, `rigging`, `animate`, and `retexture` diff --git a/src/extended_data/connectors/meshy/__init__.py b/src/extended_data/connectors/meshy/__init__.py index 9b466a3..988830d 100644 --- a/src/extended_data/connectors/meshy/__init__.py +++ b/src/extended_data/connectors/meshy/__init__.py @@ -13,13 +13,13 @@ model = image3d.generate("https://example.com/image.png") # Rig for animation - rigged = rigging.rig(model.id) + rigged = rigging.rig(model["id"]) # Apply animation - animated = animate.apply(rigged.id, animation_id=0) + animated = animate.apply(rigged["id"], animation_id=0) # Retexture - retextured = retexture.apply(model.id, "golden with gems") + retextured = retexture.apply(model["id"], "golden with gems") # LangChain tools from extended_data.connectors.meshy.tools import get_tools diff --git a/src/extended_data/connectors/meshy/animate.py b/src/extended_data/connectors/meshy/animate.py index b292d0e..0605830 100644 --- a/src/extended_data/connectors/meshy/animate.py +++ b/src/extended_data/connectors/meshy/animate.py @@ -17,10 +17,10 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import AnimationRequest, AnimationResult, TaskStatus -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedString, extend_data -def create(request: AnimationRequest) -> str: +def create(request: AnimationRequest) -> ExtendedString: """Create animation task. Returns task_id.""" response = base.request( "POST", @@ -31,25 +31,27 @@ def create(request: AnimationRequest) -> str: return extend_data(response.json().get("result")) -def get(task_id: str) -> AnimationResult: +def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"animations/{task_id}", version="v1") - return AnimationResult(**response.json()) + result = AnimationResult(**response.json()) + return extend_data(result.model_dump(mode="json")) -def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> AnimationResult: +def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: """Poll until complete or failed.""" start = time.time() while True: result = get(task_id) - if result.status == TaskStatus.SUCCEEDED: + status = result.get("status") + if status == TaskStatus.SUCCEEDED: return result - if result.status == TaskStatus.FAILED: - error = getattr(result, "task_error", {}) + if status == TaskStatus.FAILED: + error = result.get("task_error", {}) msg = error.get("message", "Unknown error") if isinstance(error, dict) else str(error) msg = f"Task failed: {msg}" raise RuntimeError(msg) - if result.status == TaskStatus.EXPIRED: + if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) if time.time() - start > timeout: @@ -65,7 +67,7 @@ def apply( loop: bool = True, frame_rate: int = 30, wait: bool = True, -) -> AnimationResult | str: +) -> ExtendedDict | ExtendedString: """Apply animation to a rigged model. Args: @@ -76,7 +78,7 @@ def apply( wait: Wait for completion (default True) Returns: - AnimationResult if wait=True, task_id if wait=False + Extended result payload if wait=True, extended task_id if wait=False. """ request = AnimationRequest( rig_task_id=rigged_task_id, @@ -90,4 +92,4 @@ def apply( if not wait: return task_id - return poll(task_id) + return poll(str(task_id)) diff --git a/src/extended_data/connectors/meshy/image3d.py b/src/extended_data/connectors/meshy/image3d.py index 0874349..8260ae9 100644 --- a/src/extended_data/connectors/meshy/image3d.py +++ b/src/extended_data/connectors/meshy/image3d.py @@ -4,7 +4,7 @@ from extended_data.connectors.meshy import image3d result = image3d.generate("https://example.com/image.png") - print(result.model_urls.glb) + print(result["model_urls"]["glb"]) """ from __future__ import annotations @@ -13,10 +13,10 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import Image3DRequest, Image3DResult, TaskStatus -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedString, extend_data -def create(request: Image3DRequest) -> str: +def create(request: Image3DRequest) -> ExtendedString: """Create image-to-3d task. Returns task_id.""" response = base.request( "POST", @@ -30,13 +30,14 @@ def create(request: Image3DRequest) -> str: return extend_data(data["result"]) -def get(task_id: str) -> Image3DResult: +def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"image-to-3d/{task_id}", version="v2") - return Image3DResult(**response.json()) + result = Image3DResult(**response.json()) + return extend_data(result.model_dump(mode="json")) -def refine(task_id: str) -> str: +def refine(task_id: str) -> ExtendedString: """Refine preview to full quality. Returns new task_id.""" response = base.request( "POST", @@ -50,7 +51,7 @@ def refine(task_id: str) -> str: return extend_data(data["result"]) -def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Image3DResult: +def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: """Polls the status of an image-to-3D task until it completes, fails, expires, or times out. Args: @@ -59,7 +60,7 @@ def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Image3D timeout: Maximum time in seconds to wait for task completion (default: 600.0). Returns: - Image3DResult: The result of the completed task. + Extended payload for the completed task. Raises: RuntimeError: If the task fails or expires. @@ -68,12 +69,13 @@ def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Image3D start = time.time() while True: result = get(task_id) - if result.status == TaskStatus.SUCCEEDED: + status = result.get("status") + if status == TaskStatus.SUCCEEDED: return result - if result.status == TaskStatus.FAILED: - msg = f"Task failed: {result.error or 'Unknown error'}" + if status == TaskStatus.FAILED: + msg = f"Task failed: {result.get('error') or 'Unknown error'}" raise RuntimeError(msg) - if result.status == TaskStatus.EXPIRED: + if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) if time.time() - start > timeout: @@ -89,7 +91,7 @@ def generate( target_polycount: int | None = None, enable_pbr: bool = True, wait: bool = True, -) -> Image3DResult | str: +) -> ExtendedDict | ExtendedString: """Generate a 3D model from an image. Args: @@ -100,7 +102,7 @@ def generate( wait: Wait for completion (default True) Returns: - Image3DResult if wait=True, task_id if wait=False + Extended result payload if wait=True, extended task_id if wait=False. """ request = Image3DRequest( mode="preview", @@ -115,4 +117,4 @@ def generate( if not wait: return task_id - return poll(task_id) + return poll(str(task_id)) diff --git a/src/extended_data/connectors/meshy/jobs.py b/src/extended_data/connectors/meshy/jobs.py index 8efa138..069ba1a 100644 --- a/src/extended_data/connectors/meshy/jobs.py +++ b/src/extended_data/connectors/meshy/jobs.py @@ -81,7 +81,7 @@ def generate_model(self, spec: AssetSpec, wait: bool = True, poll_interval: floa intent=spec.intent.value, description=spec.description, art_style=spec.art_style.value, - task_id=task_id, + task_id=str(task_id), polycount_target=spec.target_polycount, metadata=spec.metadata.copy() if spec.metadata else {}, ) @@ -90,32 +90,36 @@ def generate_model(self, spec: AssetSpec, wait: bool = True, poll_interval: floa return manifest.to_dict() # Poll until complete - result = text3d.poll(task_id, interval=poll_interval) + result = text3d.poll(str(task_id), interval=poll_interval) # Download assets output_dir = self.output_root / spec.output_path output_dir.mkdir(parents=True, exist_ok=True) - if result.model_urls and result.model_urls.glb: + model_urls = result.get("model_urls") or {} + glb_url = model_urls.get("glb") + if glb_url: glb_path = output_dir / f"{asset_id}.glb" - base.download(result.model_urls.glb, str(glb_path)) + base.download(str(glb_url), str(glb_path)) manifest.model_path = str(glb_path.relative_to(self.output_root)) - if result.texture_urls and len(result.texture_urls) > 0: - textures = result.texture_urls[0] + texture_urls = result.get("texture_urls") or [] + if texture_urls and len(texture_urls) > 0: + textures = texture_urls[0] texture_paths = {} - for map_type, url in textures.model_dump(exclude_none=True).items(): + for map_type, url in textures.items(): if url: tex_path = output_dir / f"{asset_id}_{map_type}.png" - base.download(url, str(tex_path)) + base.download(str(url), str(tex_path)) texture_paths[map_type] = str(tex_path.relative_to(self.output_root)) manifest.texture_paths = texture_paths - if result.thumbnail_url: + thumbnail_url = result.get("thumbnail_url") + if thumbnail_url: thumb_path = output_dir / f"{asset_id}_thumb.png" - base.download(result.thumbnail_url, str(thumb_path)) + base.download(str(thumbnail_url), str(thumb_path)) manifest.thumbnail_path = str(thumb_path.relative_to(self.output_root)) # Save manifest diff --git a/src/extended_data/connectors/meshy/retexture.py b/src/extended_data/connectors/meshy/retexture.py index c3c6833..afa0694 100644 --- a/src/extended_data/connectors/meshy/retexture.py +++ b/src/extended_data/connectors/meshy/retexture.py @@ -12,10 +12,10 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import RetextureRequest, RetextureResult, TaskStatus -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedString, extend_data -def create(request: RetextureRequest) -> str: +def create(request: RetextureRequest) -> ExtendedString: """Create retexture task. Returns task_id.""" response = base.request( "POST", @@ -26,25 +26,27 @@ def create(request: RetextureRequest) -> str: return extend_data(response.json().get("result")) -def get(task_id: str) -> RetextureResult: +def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"retexture/{task_id}", version="v1") - return RetextureResult(**response.json()) + result = RetextureResult(**response.json()) + return extend_data(result.model_dump(mode="json")) -def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> RetextureResult: +def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: """Poll until complete or failed.""" start = time.time() while True: result = get(task_id) - if result.status == TaskStatus.SUCCEEDED: + status = result.get("status") + if status == TaskStatus.SUCCEEDED: return result - if result.status == TaskStatus.FAILED: - error = getattr(result, "task_error", {}) + if status == TaskStatus.FAILED: + error = result.get("task_error", {}) msg = error.get("message", "Unknown error") if isinstance(error, dict) else str(error) msg = f"Task failed: {msg}" raise RuntimeError(msg) - if result.status == TaskStatus.EXPIRED: + if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) if time.time() - start > timeout: @@ -60,7 +62,7 @@ def apply( enable_original_uv: bool = True, enable_pbr: bool = True, wait: bool = True, -) -> RetextureResult | str: +) -> ExtendedDict | ExtendedString: """Apply new textures to a model. Args: @@ -71,7 +73,7 @@ def apply( wait: Wait for completion (default True) Returns: - RetextureResult if wait=True, task_id if wait=False + Extended result payload if wait=True, extended task_id if wait=False. """ request = RetextureRequest( input_task_id=model_task_id, @@ -85,7 +87,7 @@ def apply( if not wait: return task_id - return poll(task_id) + return poll(str(task_id)) def apply_from_image( @@ -95,7 +97,7 @@ def apply_from_image( enable_original_uv: bool = True, enable_pbr: bool = True, wait: bool = True, -) -> RetextureResult | str: +) -> ExtendedDict | ExtendedString: """Apply textures based on reference image. Args: @@ -106,7 +108,7 @@ def apply_from_image( wait: Wait for completion (default True) Returns: - RetextureResult if wait=True, task_id if wait=False + Extended result payload if wait=True, extended task_id if wait=False. """ request = RetextureRequest( input_task_id=model_task_id, @@ -120,4 +122,4 @@ def apply_from_image( if not wait: return task_id - return poll(task_id) + return poll(str(task_id)) diff --git a/src/extended_data/connectors/meshy/rigging.py b/src/extended_data/connectors/meshy/rigging.py index 93f458a..bcdbdb7 100644 --- a/src/extended_data/connectors/meshy/rigging.py +++ b/src/extended_data/connectors/meshy/rigging.py @@ -12,10 +12,10 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import RiggingRequest, RiggingResult, TaskStatus -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedString, extend_data -def create(request: RiggingRequest) -> str: +def create(request: RiggingRequest) -> ExtendedString: """Create rigging task. Returns task_id.""" response = base.request( "POST", @@ -26,25 +26,27 @@ def create(request: RiggingRequest) -> str: return extend_data(response.json().get("result")) -def get(task_id: str) -> RiggingResult: +def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"rigging/{task_id}", version="v1") - return RiggingResult(**response.json()) + result = RiggingResult(**response.json()) + return extend_data(result.model_dump(mode="json")) -def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> RiggingResult: +def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: """Poll until complete or failed.""" start = time.time() while True: result = get(task_id) - if result.status == TaskStatus.SUCCEEDED: + status = result.get("status") + if status == TaskStatus.SUCCEEDED: return result - if result.status == TaskStatus.FAILED: - error = getattr(result, "task_error", {}) + if status == TaskStatus.FAILED: + error = result.get("task_error", {}) msg = error.get("message", "Unknown error") if isinstance(error, dict) else str(error) msg = f"Task failed: {msg}" raise RuntimeError(msg) - if result.status == TaskStatus.EXPIRED: + if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) if time.time() - start > timeout: @@ -58,7 +60,7 @@ def rig( *, height_meters: float = 1.7, wait: bool = True, -) -> RiggingResult | str: +) -> ExtendedDict | ExtendedString: """Rig a model for animation. Args: @@ -67,7 +69,7 @@ def rig( wait: Wait for completion (default True) Returns: - RiggingResult if wait=True, task_id if wait=False + Extended result payload if wait=True, extended task_id if wait=False. """ request = RiggingRequest( input_task_id=model_task_id, @@ -79,7 +81,7 @@ def rig( if not wait: return task_id - return poll(task_id) + return poll(str(task_id)) def rig_from_url( @@ -88,7 +90,7 @@ def rig_from_url( height_meters: float = 1.7, texture_url: str | None = None, wait: bool = True, -) -> RiggingResult | str: +) -> ExtendedDict | ExtendedString: """Rig a model from URL. Args: @@ -98,7 +100,7 @@ def rig_from_url( wait: Wait for completion (default True) Returns: - RiggingResult if wait=True, task_id if wait=False + Extended result payload if wait=True, extended task_id if wait=False. """ request = RiggingRequest( model_url=model_url, @@ -111,4 +113,4 @@ def rig_from_url( if not wait: return task_id - return poll(task_id) + return poll(str(task_id)) diff --git a/src/extended_data/connectors/meshy/text3d.py b/src/extended_data/connectors/meshy/text3d.py index aa52806..26c1829 100644 --- a/src/extended_data/connectors/meshy/text3d.py +++ b/src/extended_data/connectors/meshy/text3d.py @@ -4,7 +4,7 @@ from extended_data.connectors.meshy import text3d result = text3d.generate("a medieval sword") - print(result.model_urls.glb) + print(result["model_urls"]["glb"]) """ from __future__ import annotations @@ -13,10 +13,10 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import ArtStyle, TaskStatus, Text3DRequest, Text3DResult -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedString, extend_data -def create(request: Text3DRequest) -> str: +def create(request: Text3DRequest) -> ExtendedString: """Create text-to-3d task. Returns task_id.""" response = base.request( "POST", @@ -27,13 +27,14 @@ def create(request: Text3DRequest) -> str: return extend_data(response.json().get("result")) -def get(task_id: str) -> Text3DResult: +def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"text-to-3d/{task_id}", version="v2") - return Text3DResult(**response.json()) + result = Text3DResult(**response.json()) + return extend_data(result.model_dump(mode="json")) -def refine(task_id: str) -> str: +def refine(task_id: str) -> ExtendedString: """Refine preview to full quality. Returns new task_id.""" response = base.request( "POST", @@ -44,19 +45,20 @@ def refine(task_id: str) -> str: return extend_data(response.json().get("result")) -def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Text3DResult: +def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: """Poll until complete or failed.""" start = time.time() while True: result = get(task_id) - if result.status == TaskStatus.SUCCEEDED: + status = result.get("status") + if status == TaskStatus.SUCCEEDED: return result - if result.status == TaskStatus.FAILED: - error = getattr(result, "task_error", {}) + if status == TaskStatus.FAILED: + error = result.get("task_error", {}) msg = error.get("message", "Unknown error") if isinstance(error, dict) else str(error) msg = f"Task failed: {msg}" raise RuntimeError(msg) - if result.status == TaskStatus.EXPIRED: + if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) if time.time() - start > timeout: @@ -73,7 +75,7 @@ def generate( target_polycount: int = 15000, enable_pbr: bool = True, wait: bool = True, -) -> Text3DResult | str: +) -> ExtendedDict | ExtendedString: """Generate a 3D model from text. Args: @@ -85,7 +87,7 @@ def generate( wait: Wait for completion (default True) Returns: - Text3DResult if wait=True, task_id if wait=False + Extended result payload if wait=True, extended task_id if wait=False. """ if isinstance(art_style, str): art_style = ArtStyle(art_style) @@ -104,4 +106,4 @@ def generate( if not wait: return task_id - return poll(task_id) + return poll(str(task_id)) diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index 0320fef..2e6a63e 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -7,7 +7,7 @@ from __future__ import annotations -from collections.abc import Callable +from collections.abc import Callable, Mapping from typing import Any from pydantic import BaseModel, Field @@ -97,6 +97,19 @@ class GetAnimationSchema(BaseModel): # ============================================================================= +def _result_get(result: object, field: str, default: object = None) -> object: + """Read a field from an extended payload or a model-like test double.""" + if isinstance(result, Mapping): + return result.get(field, default) + return getattr(result, field, default) + + +def _result_status(result: object) -> str: + """Read a task status from an extended payload or model-like object.""" + status = _result_get(result, "status", "unknown") + return str(status.value) if hasattr(status, "value") else str(status) + + def _extract_result_fields(result: object) -> dict[str, object]: """Extract common fields from Meshy API result objects. @@ -110,15 +123,18 @@ def _extract_result_fields(result: object) -> dict[str, object]: Dict with status, model_url, and thumbnail_url fields """ # Extract status - prefer .value if it's an enum, otherwise str() - status = getattr(result.status, "value", str(result.status)) if hasattr(result, "status") else "unknown" + status = _result_status(result) # Extract model_url from model_urls.glb if available model_url = None - if hasattr(result, "model_urls") and result.model_urls: - model_url = result.model_urls.glb + model_urls = _result_get(result, "model_urls") + if isinstance(model_urls, Mapping): + model_url = model_urls.get("glb") + elif model_urls: + model_url = getattr(model_urls, "glb", None) # Extract thumbnail_url - thumbnail_url = getattr(result, "thumbnail_url", None) + thumbnail_url = _result_get(result, "thumbnail_url") return extend_data({ "status": status, @@ -168,7 +184,7 @@ def text3d_generate( fields = _extract_result_fields(result) return extend_data({ - "task_id": result.id, + "task_id": _result_get(result, "id"), **fields, }) @@ -209,7 +225,7 @@ def image3d_generate( fields = _extract_result_fields(result) return extend_data({ - "task_id": result.id, + "task_id": _result_get(result, "id"), **fields, }) @@ -237,8 +253,8 @@ def rig_model(model_id: str, wait: bool = True) -> dict[str, Any]: if wait: return extend_data({ - "task_id": result.id, - "status": result.status.value if hasattr(result.status, "value") else str(result.status), + "task_id": _result_get(result, "id"), + "status": _result_status(result), "message": "Rigging completed", }) @@ -270,10 +286,10 @@ def apply_animation(model_id: str, animation_id: int, wait: bool = True) -> dict if wait: return extend_data({ - "task_id": result.id, - "status": result.status.value if hasattr(result.status, "value") else str(result.status), + "task_id": _result_get(result, "id"), + "status": _result_status(result), "message": "Animation completed", - "glb_url": result.animation_glb_url, + "glb_url": _result_get(result, "animation_glb_url"), }) msg = "Expected animation task id when wait=False" @@ -315,10 +331,10 @@ def retexture_model( if wait: return extend_data({ - "task_id": result.id, - "status": result.status.value if hasattr(result.status, "value") else str(result.status), + "task_id": _result_get(result, "id"), + "status": _result_status(result), "message": "Retexture completed", - "model_url": getattr(result, "model_url", None), + "model_url": _result_get(result, "model_url"), }) msg = "Expected retexture task id when wait=False" @@ -386,19 +402,22 @@ def check_task_status(task_id: str, task_type: str = "text-to-3d") -> dict[str, raise ValueError(f"Unknown task type: {task_type}") result = get_func(task_id) - status = result.status.value if hasattr(result.status, "value") else str(result.status) + status = _result_status(result) # Get model URL if available model_url = None - if hasattr(result, "model_urls") and result.model_urls: - model_url = result.model_urls.glb - elif hasattr(result, "glb_url"): - model_url = result.glb_url + model_urls = _result_get(result, "model_urls") + if isinstance(model_urls, Mapping): + model_url = model_urls.get("glb") + elif model_urls: + model_url = getattr(model_urls, "glb", None) + if model_url is None: + model_url = _result_get(result, "glb_url") return extend_data({ "task_id": task_id, "status": status, - "progress": getattr(result, "progress", None), + "progress": _result_get(result, "progress"), "model_url": model_url, }) diff --git a/tests/connectors/meshy/test_jobs.py b/tests/connectors/meshy/test_jobs.py index 9129093..2bde941 100644 --- a/tests/connectors/meshy/test_jobs.py +++ b/tests/connectors/meshy/test_jobs.py @@ -22,7 +22,11 @@ Text3DResult, TextureUrls, ) -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data + + +def _extended_result(result: Text3DResult) -> ExtendedDict: + return extend_data(result.model_dump(mode="json")) class TestAssetManifest: @@ -142,7 +146,7 @@ def test_generate_model_with_wait(self, temp_dir): patch("extended_data.connectors.meshy.jobs.base") as mock_base, ): mock_text3d.create.return_value = "task-12345" - mock_text3d.poll.return_value = Text3DResult( + mock_text3d.poll.return_value = _extended_result(Text3DResult( id="task-12345", status=TaskStatus.SUCCEEDED, progress=100, @@ -150,7 +154,7 @@ def test_generate_model_with_wait(self, temp_dir): model_urls=ModelUrls(glb="https://example.com/model.glb"), texture_urls=[TextureUrls(base_color="https://example.com/base.png")], thumbnail_url="https://example.com/thumb.png", - ) + )) mock_base.download.return_value = 1000 generator = AssetGenerator(output_root=str(temp_dir)) @@ -177,13 +181,13 @@ def test_generate_model_saves_manifest_json(self, temp_dir): patch("extended_data.connectors.meshy.jobs.base") as mock_base, ): mock_text3d.create.return_value = "task-12345" - mock_text3d.poll.return_value = Text3DResult( + mock_text3d.poll.return_value = _extended_result(Text3DResult( id="task-12345", status=TaskStatus.SUCCEEDED, progress=100, created_at=1700000000, model_urls=ModelUrls(glb="https://example.com/model.glb"), - ) + )) mock_base.download.return_value = 1000 generator = AssetGenerator(output_root=str(temp_dir)) @@ -211,13 +215,13 @@ def test_batch_generate(self, temp_dir): patch("extended_data.connectors.meshy.jobs.base") as mock_base, ): mock_text3d.create.return_value = "task-12345" - mock_text3d.poll.return_value = Text3DResult( + mock_text3d.poll.return_value = _extended_result(Text3DResult( id="task-12345", status=TaskStatus.SUCCEEDED, progress=100, created_at=1700000000, model_urls=ModelUrls(glb="https://example.com/model.glb"), - ) + )) mock_base.download.return_value = 1000 generator = AssetGenerator(output_root=str(temp_dir)) @@ -261,13 +265,13 @@ def create_side_effect(*args, **kwargs): return "task-success" mock_text3d.create.side_effect = create_side_effect - mock_text3d.poll.return_value = Text3DResult( + mock_text3d.poll.return_value = _extended_result(Text3DResult( id="task-success", status=TaskStatus.SUCCEEDED, progress=100, created_at=1700000000, model_urls=ModelUrls(glb="https://example.com/model.glb"), - ) + )) mock_base.download.return_value = 1000 generator = AssetGenerator(output_root=str(temp_dir)) diff --git a/tests/connectors/meshy/test_task_ids.py b/tests/connectors/meshy/test_task_ids.py index e9ee7f5..7d78169 100644 --- a/tests/connectors/meshy/test_task_ids.py +++ b/tests/connectors/meshy/test_task_ids.py @@ -12,7 +12,7 @@ RiggingRequest, Text3DRequest, ) -from extended_data.containers import ExtendedString +from extended_data.containers import ExtendedDict, ExtendedString def _task_response(task_id: str) -> MagicMock: @@ -21,6 +21,12 @@ def _task_response(task_id: str) -> MagicMock: return response +def _json_response(payload: dict[str, object]) -> MagicMock: + response = MagicMock() + response.json.return_value = payload + return response + + def test_text3d_task_ids_are_extended_strings() -> None: with patch("extended_data.connectors.meshy.text3d.base.request", return_value=_task_response("text-task")): created = text3d.create(Text3DRequest(prompt="a sword")) @@ -71,3 +77,84 @@ def test_retexture_task_id_is_extended_string() -> None: assert isinstance(created, ExtendedString) assert created == "retexture-task" + + +def test_text3d_get_returns_extended_payload() -> None: + payload = { + "id": "text-task", + "status": "SUCCEEDED", + "progress": 100, + "created_at": 1700000000, + "model_urls": {"glb": "https://example.com/model.glb"}, + } + with patch("extended_data.connectors.meshy.text3d.base.request", return_value=_json_response(payload)): + result = text3d.get("text-task") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["id"], ExtendedString) + assert isinstance(result["model_urls"], ExtendedDict) + assert result["model_urls"]["glb"] == "https://example.com/model.glb" + + +def test_image3d_get_returns_extended_payload() -> None: + payload = { + "id": "image-task", + "status": "SUCCEEDED", + "progress": 100, + "created_at": 1700000000, + "model_urls": {"glb": "https://example.com/image.glb"}, + } + with patch("extended_data.connectors.meshy.image3d.base.request", return_value=_json_response(payload)): + result = image3d.get("image-task") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["model_urls"], ExtendedDict) + assert result["model_urls"]["glb"] == "https://example.com/image.glb" + + +def test_animation_get_returns_extended_payload() -> None: + payload = { + "id": "animation-task", + "status": "SUCCEEDED", + "progress": 100, + "created_at": 1700000000, + "animation_glb_url": "https://example.com/animation.glb", + } + with patch("extended_data.connectors.meshy.animate.base.request", return_value=_json_response(payload)): + result = animate.get("animation-task") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["animation_glb_url"], ExtendedString) + assert result["animation_glb_url"] == "https://example.com/animation.glb" + + +def test_rigging_get_returns_extended_payload() -> None: + payload = { + "id": "rig-task", + "status": "SUCCEEDED", + "progress": 100, + "created_at": 1700000000, + "result": {"rigged_character_glb_url": "https://example.com/rig.glb"}, + } + with patch("extended_data.connectors.meshy.rigging.base.request", return_value=_json_response(payload)): + result = rigging.get("rig-task") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["result"], ExtendedDict) + assert result["result"]["rigged_character_glb_url"] == "https://example.com/rig.glb" + + +def test_retexture_get_returns_extended_payload() -> None: + payload = { + "id": "retexture-task", + "status": "SUCCEEDED", + "progress": 100, + "created_at": 1700000000, + "model_urls": {"glb": "https://example.com/retexture.glb"}, + } + with patch("extended_data.connectors.meshy.retexture.base.request", return_value=_json_response(payload)): + result = retexture.get("retexture-task") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["model_urls"], ExtendedDict) + assert result["model_urls"]["glb"] == "https://example.com/retexture.glb" diff --git a/tests/connectors/meshy/test_tools.py b/tests/connectors/meshy/test_tools.py index f600801..da7ebcc 100644 --- a/tests/connectors/meshy/test_tools.py +++ b/tests/connectors/meshy/test_tools.py @@ -13,7 +13,7 @@ import pytest -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data # Expected tools list - canonical reference for all Meshy tools @@ -91,6 +91,25 @@ def test_successful_generation(self): assert result["model_url"] == "https://example.com/model.glb" assert result["thumbnail_url"] == "https://example.com/thumb.png" + def test_successful_generation_accepts_extended_payload(self): + """Tool wrapper should consume the real extended result payload shape.""" + from extended_data.connectors.meshy.tools import text3d_generate + + mock_result = extend_data({ + "id": "task_123", + "status": "SUCCEEDED", + "model_urls": {"glb": "https://example.com/model.glb"}, + "thumbnail_url": "https://example.com/thumb.png", + }) + + with patch("extended_data.connectors.meshy.text3d.generate", return_value=mock_result): + result = text3d_generate(prompt="a medieval sword") + + assert isinstance(result, ExtendedDict) + assert result["task_id"] == "task_123" + assert result["status"] == "SUCCEEDED" + assert result["model_url"] == "https://example.com/model.glb" + def test_generation_with_defaults(self): """Test generation with default parameters. @@ -352,6 +371,24 @@ def test_check_text3d_status(self): assert result["progress"] == 100 assert result["model_url"] == "https://example.com/model.glb" + def test_check_text3d_status_accepts_extended_payload(self): + """Task status wrapper should consume the real extended get() payload.""" + from extended_data.connectors.meshy.tools import check_task_status + + mock_result = extend_data({ + "status": "SUCCEEDED", + "progress": 100, + "model_urls": {"glb": "https://example.com/model.glb"}, + }) + + with patch("extended_data.connectors.meshy.text3d.get", return_value=mock_result): + result = check_task_status(task_id="task_123", task_type="text-to-3d") + + assert isinstance(result, ExtendedDict) + assert result["status"] == "SUCCEEDED" + assert result["progress"] == 100 + assert result["model_url"] == "https://example.com/model.glb" + def test_check_unknown_task_type(self): """Test checking unknown task type.""" from extended_data.connectors.meshy.tools import check_task_status From 5c860872029bc7c3404266eb90a2dad70ca851fa Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 05:57:41 -0500 Subject: [PATCH 079/287] feat: promote secrets connector payloads --- .../connectors/secrets/__init__.py | 103 +++++++----- src/extended_data/connectors/secrets/tools.py | 74 +++------ tests/connectors/test_secrets.py | 153 ++++++++++++------ 3 files changed, 188 insertions(+), 142 deletions(-) diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 9a5a941..2449398 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -16,16 +16,16 @@ connector = SecretsConnector() # Validate a configuration - is_valid, message = connector.validate_config("pipeline.yaml") + validation = connector.validate_config("pipeline.yaml") # Run a dry-run to see what would change result = connector.dry_run("pipeline.yaml") - print(f"Would sync {result.secrets_processed} secrets") + print(f"Would sync {result['secrets_processed']} secrets") # Execute the full pipeline result = connector.run_pipeline("pipeline.yaml") - if result.success: - print(f"Synced {result.secrets_added} secrets") + if result["success"]: + print(f"Synced {result['secrets_added']} secrets") """ from __future__ import annotations @@ -34,12 +34,13 @@ import shutil import subprocess -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from enum import Enum from pathlib import Path from typing import Any from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict, extend_data from extended_data.logging import Logging @@ -134,6 +135,10 @@ def from_cli_output(cls, output: dict[str, Any]) -> SyncResult: diff_output=output.get("diff_output", ""), ) + def to_dict(self) -> ExtendedDict: + """Return an extended sync result payload.""" + return extend_data(asdict(self)) + @dataclass class ConfigInfo: @@ -164,6 +169,10 @@ def from_native(cls, native_info: Any) -> ConfigInfo: aws_region=native_info.AWSRegion, ) + def to_dict(self) -> ExtendedDict: + """Return an extended config info payload.""" + return extend_data(asdict(self)) + class SecretsConnector(VendorConnectorBase): """Enterprise-grade secret synchronization connector. @@ -233,19 +242,25 @@ def cli_available(self) -> bool: """Check if CLI is available.""" return self._cli_path is not None - def validate_config(self, config_path: str) -> tuple[bool, str]: + def validate_config(self, config_path: str) -> ExtendedDict: """Validate a pipeline configuration file. Args: config_path: Path to YAML configuration file Returns: - Tuple of (is_valid, message) + Extended validation payload. """ if self._prefer_native: - return _native.ValidateConfig(config_path) + is_valid, message = _native.ValidateConfig(config_path) + else: + is_valid, message = self._cli_validate_config(config_path) - return self._cli_validate_config(config_path) + return extend_data({ + "valid": is_valid, + "message": message, + "config_path": config_path, + }) def _cli_validate_config(self, config_path: str) -> tuple[bool, str]: """Validate config via CLI.""" @@ -268,20 +283,20 @@ def _cli_validate_config(self, config_path: str) -> tuple[bool, str]: except Exception as e: return False, str(e) - def get_config_info(self, config_path: str) -> ConfigInfo: + def get_config_info(self, config_path: str) -> ExtendedDict: """Get detailed information about a configuration. Args: config_path: Path to YAML configuration file Returns: - ConfigInfo with configuration details + Extended configuration details payload. """ if self._prefer_native: native_info = _native.GetConfigInfo(config_path) - return ConfigInfo.from_native(native_info) + return ConfigInfo.from_native(native_info).to_dict() - return self._cli_get_config_info(config_path) + return self._cli_get_config_info(config_path).to_dict() def _cli_get_config_info(self, config_path: str) -> ConfigInfo: """Get config info via CLI.""" @@ -317,7 +332,7 @@ def run_pipeline( self, config_path: str, options: SyncOptions | None = None, - ) -> SyncResult: + ) -> ExtendedDict: """Execute the secrets synchronization pipeline. Args: @@ -325,14 +340,14 @@ def run_pipeline( options: Execution options (defaults to full pipeline) Returns: - SyncResult with operation details + Extended sync result payload. """ options = options or SyncOptions() if self._prefer_native: - return self._native_run_pipeline(config_path, options) + return self._native_run_pipeline(config_path, options).to_dict() - return self._cli_run_pipeline(config_path, options) + return self._cli_run_pipeline(config_path, options).to_dict() def _native_run_pipeline( self, @@ -450,23 +465,23 @@ def _cli_run_pipeline( error_message=str(e), ) - def dry_run(self, config_path: str) -> SyncResult: + def dry_run(self, config_path: str) -> ExtendedDict: """Perform a dry run of the pipeline. Args: config_path: Path to YAML configuration file Returns: - SyncResult with what would be changed + Extended dry-run result payload. """ if self._prefer_native: native_result = _native.DryRun(config_path) - return SyncResult.from_native(native_result) + return SyncResult.from_native(native_result).to_dict() options = SyncOptions(dry_run=True, compute_diff=True) - return self._cli_run_pipeline(config_path, options) + return self._cli_run_pipeline(config_path, options).to_dict() - def merge(self, config_path: str, dry_run: bool = False) -> SyncResult: + def merge(self, config_path: str, dry_run: bool = False) -> ExtendedDict: """Run only the merge phase of the pipeline. Args: @@ -474,20 +489,20 @@ def merge(self, config_path: str, dry_run: bool = False) -> SyncResult: dry_run: If True, don't make actual changes Returns: - SyncResult with merge operation details + Extended merge result payload. """ if self._prefer_native: native_result = _native.Merge(config_path, dry_run) - return SyncResult.from_native(native_result) + return SyncResult.from_native(native_result).to_dict() options = SyncOptions( operation=SyncOperation.MERGE, dry_run=dry_run, compute_diff=dry_run, ) - return self._cli_run_pipeline(config_path, options) + return self._cli_run_pipeline(config_path, options).to_dict() - def sync(self, config_path: str, dry_run: bool = False) -> SyncResult: + def sync(self, config_path: str, dry_run: bool = False) -> ExtendedDict: """Run only the sync phase of the pipeline. Args: @@ -495,50 +510,62 @@ def sync(self, config_path: str, dry_run: bool = False) -> SyncResult: dry_run: If True, don't make actual changes Returns: - SyncResult with sync operation details + Extended sync result payload. """ if self._prefer_native: native_result = _native.Sync(config_path, dry_run) - return SyncResult.from_native(native_result) + return SyncResult.from_native(native_result).to_dict() options = SyncOptions( operation=SyncOperation.SYNC, dry_run=dry_run, compute_diff=dry_run, ) - return self._cli_run_pipeline(config_path, options) + return self._cli_run_pipeline(config_path, options).to_dict() - def get_targets(self, config_path: str) -> tuple[list[str], str]: + def get_targets(self, config_path: str) -> ExtendedDict: """Get the list of targets from a configuration. Args: config_path: Path to YAML configuration file Returns: - Tuple of (targets, error_message) + Extended targets payload. """ if self._prefer_native: targets, err = _native.GetTargets(config_path) - return list(targets) if targets else [], err + target_list = list(targets) if targets else [] + return extend_data({"targets": target_list, "count": len(target_list), "error_message": err}) info = self.get_config_info(config_path) - return info.targets, info.error_message - - def get_sources(self, config_path: str) -> tuple[list[str], str]: + targets = info.get("targets", []) + return extend_data({ + "targets": targets, + "count": len(targets), + "error_message": info.get("error_message", ""), + }) + + def get_sources(self, config_path: str) -> ExtendedDict: """Get the list of sources from a configuration. Args: config_path: Path to YAML configuration file Returns: - Tuple of (sources, error_message) + Extended sources payload. """ if self._prefer_native: sources, err = _native.GetSources(config_path) - return list(sources) if sources else [], err + source_list = list(sources) if sources else [] + return extend_data({"sources": source_list, "count": len(source_list), "error_message": err}) info = self.get_config_info(config_path) - return info.sources, info.error_message + sources = info.get("sources", []) + return extend_data({ + "sources": sources, + "count": len(sources), + "error_message": info.get("error_message", ""), + }) # Import tools for AI framework integration diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index c6e2f5c..fd4b0d9 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -66,13 +66,7 @@ def validate_config(config_path: str) -> dict[str, Any]: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - is_valid, message = connector.validate_config(config_path) - - return extend_data({ - "valid": is_valid, - "message": message, - "config_path": config_path, - }) + return extend_data(connector.validate_config(config_path)) def run_pipeline( @@ -129,16 +123,16 @@ def run_pipeline( result = connector.run_pipeline(config_path, options) return extend_data({ - "success": result.success, - "target_count": result.target_count, - "secrets_processed": result.secrets_processed, - "secrets_added": result.secrets_added, - "secrets_modified": result.secrets_modified, - "secrets_removed": result.secrets_removed, - "secrets_unchanged": result.secrets_unchanged, - "duration_ms": result.duration_ms, - "error_message": result.error_message, - "diff_output": result.diff_output if dry_run else "", + "success": result.get("success", False), + "target_count": result.get("target_count", 0), + "secrets_processed": result.get("secrets_processed", 0), + "secrets_added": result.get("secrets_added", 0), + "secrets_modified": result.get("secrets_modified", 0), + "secrets_removed": result.get("secrets_removed", 0), + "secrets_unchanged": result.get("secrets_unchanged", 0), + "duration_ms": result.get("duration_ms", 0), + "error_message": result.get("error_message", ""), + "diff_output": result.get("diff_output", "") if dry_run else "", }) @@ -157,14 +151,14 @@ def dry_run(config_path: str) -> dict[str, Any]: result = connector.dry_run(config_path) return extend_data({ - "success": result.success, - "target_count": result.target_count, - "secrets_would_add": result.secrets_added, - "secrets_would_modify": result.secrets_modified, - "secrets_would_remove": result.secrets_removed, - "secrets_unchanged": result.secrets_unchanged, - "diff_output": result.diff_output, - "error_message": result.error_message, + "success": result.get("success", False), + "target_count": result.get("target_count", 0), + "secrets_would_add": result.get("secrets_added", 0), + "secrets_would_modify": result.get("secrets_modified", 0), + "secrets_would_remove": result.get("secrets_removed", 0), + "secrets_unchanged": result.get("secrets_unchanged", 0), + "diff_output": result.get("diff_output", ""), + "error_message": result.get("error_message", ""), }) @@ -180,19 +174,7 @@ def get_config_info(config_path: str) -> dict[str, Any]: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - info = connector.get_config_info(config_path) - - return extend_data({ - "valid": info.valid, - "error_message": info.error_message, - "source_count": info.source_count, - "target_count": info.target_count, - "sources": info.sources, - "targets": info.targets, - "has_merge_store": info.has_merge_store, - "vault_address": info.vault_address, - "aws_region": info.aws_region, - }) + return extend_data(connector.get_config_info(config_path)) def get_targets(config_path: str) -> dict[str, Any]: @@ -207,13 +189,7 @@ def get_targets(config_path: str) -> dict[str, Any]: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - targets, error = connector.get_targets(config_path) - - return extend_data({ - "targets": targets, - "count": len(targets), - "error_message": error, - }) + return extend_data(connector.get_targets(config_path)) def get_sources(config_path: str) -> dict[str, Any]: @@ -228,13 +204,7 @@ def get_sources(config_path: str) -> dict[str, Any]: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - sources, error = connector.get_sources(config_path) - - return extend_data({ - "sources": sources, - "count": len(sources), - "error_message": error, - }) + return extend_data(connector.get_sources(config_path)) # ============================================================================= diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index 2d2cbb1..98b9233 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -23,7 +23,7 @@ run_pipeline, validate_config, ) -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data @pytest.fixture @@ -50,21 +50,24 @@ def test_cli_get_config_info_valid(connector: SecretsConnector, tmp_path: Path) info = connector.get_config_info(str(config_file)) - assert info.valid is True - assert info.source_count == 2 - assert info.target_count == 1 - assert "src1" in info.sources - assert "src2" in info.sources - assert "tgt1" in info.targets - assert info.has_merge_store is True - assert info.vault_address == "http://vault:8200" - assert info.aws_region == "us-east-1" + assert isinstance(info, ExtendedDict) + assert isinstance(info["sources"], ExtendedList) + assert info["valid"] is True + assert info["source_count"] == 2 + assert info["target_count"] == 1 + assert "src1" in info["sources"] + assert "src2" in info["sources"] + assert "tgt1" in info["targets"] + assert info["has_merge_store"] is True + assert info["vault_address"] == "http://vault:8200" + assert info["aws_region"] == "us-east-1" def test_cli_get_config_info_not_found(connector: SecretsConnector) -> None: info = connector.get_config_info("/non/existent/path.yaml") - assert info.valid is False - assert "Configuration file not found" in info.error_message + assert isinstance(info, ExtendedDict) + assert info["valid"] is False + assert "Configuration file not found" in info["error_message"] def test_cli_get_config_info_invalid_yaml(connector: SecretsConnector, tmp_path: Path) -> None: @@ -72,8 +75,9 @@ def test_cli_get_config_info_invalid_yaml(connector: SecretsConnector, tmp_path: config_file.write_text("invalid: yaml: :") info = connector.get_config_info(str(config_file)) - assert info.valid is False - assert "Error parsing YAML file" in info.error_message + assert isinstance(info, ExtendedDict) + assert info["valid"] is False + assert "Error parsing YAML file" in info["error_message"] def test_cli_get_config_info_empty_file(connector: SecretsConnector, tmp_path: Path) -> None: @@ -81,8 +85,31 @@ def test_cli_get_config_info_empty_file(connector: SecretsConnector, tmp_path: P config_file.write_text("") info = connector.get_config_info(str(config_file)) - assert info.valid is True - assert info.source_count == 0 + assert isinstance(info, ExtendedDict) + assert info["valid"] is True + assert info["source_count"] == 0 + + +def test_cli_get_targets_and_sources_return_extended_payloads(connector: SecretsConnector, tmp_path: Path) -> None: + config_file = tmp_path / "config.yaml" + config_file.write_text( + yaml.dump({ + "sources": {"vault/prod": {}, "vault/dev": {}}, + "targets": {"prod": {}, "dev": {}}, + }) + ) + + targets = connector.get_targets(str(config_file)) + sources = connector.get_sources(str(config_file)) + + assert isinstance(targets, ExtendedDict) + assert isinstance(targets["targets"], ExtendedList) + assert isinstance(targets["targets"][0], ExtendedString) + assert targets["count"] == 2 + assert set(targets["targets"]) == {"prod", "dev"} + assert isinstance(sources, ExtendedDict) + assert isinstance(sources["sources"], ExtendedList) + assert set(sources["sources"]) == {"vault/prod", "vault/dev"} @patch("subprocess.run") @@ -96,8 +123,9 @@ def test_cli_run_pipeline_operation(mock_run: MagicMock, connector: SecretsConne options = SyncOptions(operation=SyncOperation.MERGE) result = connector.run_pipeline("config.yaml", options) - assert result.success is True - assert result.secrets_processed == 5 + assert isinstance(result, ExtendedDict) + assert result["success"] is True + assert result["secrets_processed"] == 5 # Check that it uses "pipeline" command with "--merge-only" flag args = mock_run.call_args[0][0] @@ -121,7 +149,8 @@ def test_cli_run_pipeline_diff_and_format(mock_run: MagicMock, connector: Secret ) result = connector.run_pipeline("config.yaml", options) - assert result.success is True + assert isinstance(result, ExtendedDict) + assert result["success"] is True args = mock_run.call_args[0][0] assert "--diff" in args @@ -139,7 +168,8 @@ def test_cli_run_pipeline_default_output_is_json(mock_run: MagicMock, connector: result = connector.run_pipeline("config.yaml") - assert result.success is True + assert isinstance(result, ExtendedDict) + assert result["success"] is True args = mock_run.call_args[0][0] assert args.count("--output") == 1 assert args[args.index("--output") + 1] == "json" @@ -173,15 +203,16 @@ def test_cli_run_pipeline_parses_result_envelope(mock_run: MagicMock, connector: result = connector.run_pipeline("config.yaml") - assert result.success is True - assert result.target_count == 2 - assert result.secrets_processed == 5 - assert result.secrets_added == 1 - assert result.secrets_modified == 2 - assert result.secrets_unchanged == 2 - assert result.duration_ms == 321 - assert json.loads(result.results_json) == output["results"] - assert result.diff_output == '{"summary":{"added":1}}' + assert isinstance(result, ExtendedDict) + assert result["success"] is True + assert result["target_count"] == 2 + assert result["secrets_processed"] == 5 + assert result["secrets_added"] == 1 + assert result["secrets_modified"] == 2 + assert result["secrets_unchanged"] == 2 + assert result["duration_ms"] == 321 + assert json.loads(str(result["results_json"])) == output["results"] + assert result["diff_output"] == '{"summary":{"added":1}}' @patch("subprocess.run") @@ -200,8 +231,9 @@ def test_cli_run_pipeline_rejects_legacy_raw_diff_json(mock_run: MagicMock, conn result = connector.run_pipeline("config.yaml", SyncOptions(dry_run=True, compute_diff=True)) - assert result.success is False - assert "expected pipeline result envelope" in result.error_message + assert isinstance(result, ExtendedDict) + assert result["success"] is False + assert "expected pipeline result envelope" in result["error_message"] @patch("subprocess.run") @@ -222,11 +254,12 @@ def test_cli_run_pipeline_parses_failure_result_envelope(mock_run: MagicMock, co result = connector.run_pipeline("config.yaml") - assert result.success is False - assert result.target_count == 1 - assert result.secrets_processed == 2 - assert result.error_message == "pipeline completed with errors" - assert json.loads(result.results_json)[0]["error"] == "denied" + assert isinstance(result, ExtendedDict) + assert result["success"] is False + assert result["target_count"] == 1 + assert result["secrets_processed"] == 2 + assert result["error_message"] == "pipeline completed with errors" + assert json.loads(str(result["results_json"]))[0]["error"] == "denied" @patch("subprocess.run") @@ -242,8 +275,9 @@ def test_cli_run_pipeline_failure_envelope_uses_stderr_when_error_message_missin result = connector.run_pipeline("config.yaml") - assert result.success is False - assert result.error_message == "Error: boom\n" + assert isinstance(result, ExtendedDict) + assert result["success"] is False + assert result["error_message"] == "Error: boom\n" @patch("subprocess.run") @@ -256,8 +290,9 @@ def test_cli_run_pipeline_success_without_json_is_error(mock_run: MagicMock, con result = connector.run_pipeline("config.yaml") - assert result.success is False - assert "produced no JSON output" in result.error_message + assert isinstance(result, ExtendedDict) + assert result["success"] is False + assert "produced no JSON output" in result["error_message"] @patch("subprocess.run") @@ -270,8 +305,9 @@ def test_cli_run_pipeline_non_json_failure_uses_cli_output(mock_run: MagicMock, result = connector.run_pipeline("config.yaml") - assert result.success is False - assert result.error_message == "not json" + assert isinstance(result, ExtendedDict) + assert result["success"] is False + assert result["error_message"] == "not json" @patch("subprocess.run") @@ -305,9 +341,10 @@ def test_cli_validate_config(mock_run: MagicMock, connector: SecretsConnector) - stderr="", ) - is_valid, message = connector.validate_config("config.yaml") - assert is_valid is True - assert "valid" in message.lower() + validation = connector.validate_config("config.yaml") + assert isinstance(validation, ExtendedDict) + assert validation["valid"] is True + assert "valid" in validation["message"].lower() args = mock_run.call_args[0][0] assert "validate" in args @@ -316,7 +353,7 @@ def test_cli_validate_config(mock_run: MagicMock, connector: SecretsConnector) - @patch("extended_data.connectors.secrets.SecretsConnector") def test_run_pipeline_tool_default_continue_on_error_matches_cli(mock_connector_class: MagicMock) -> None: mock_connector = mock_connector_class.return_value - mock_connector.run_pipeline.return_value = SyncResult(success=True, secrets_processed=3) + mock_connector.run_pipeline.return_value = SyncResult(success=True, secrets_processed=3).to_dict() result = run_pipeline("config.yaml") @@ -332,7 +369,7 @@ def test_run_pipeline_tool_default_continue_on_error_matches_cli(mock_connector_ @patch("extended_data.connectors.secrets.SecretsConnector") def test_run_pipeline_tool_can_disable_continue_on_error(mock_connector_class: MagicMock) -> None: mock_connector = mock_connector_class.return_value - mock_connector.run_pipeline.return_value = SyncResult(success=True) + mock_connector.run_pipeline.return_value = SyncResult(success=True).to_dict() run_pipeline("config.yaml", continue_on_error=False) @@ -350,7 +387,11 @@ def test_run_pipeline_schema_default_continue_on_error_matches_cli() -> None: @patch("extended_data.connectors.secrets.SecretsConnector") def test_validate_config_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: mock_connector = mock_connector_class.return_value - mock_connector.validate_config.return_value = (True, "valid config") + mock_connector.validate_config.return_value = extend_data({ + "valid": True, + "message": "valid config", + "config_path": "config.yaml", + }) result = validate_config("config.yaml") @@ -371,7 +412,7 @@ def test_dry_run_tool_returns_extended_payload(mock_connector_class: MagicMock) secrets_removed=0, secrets_unchanged=3, diff_output="diff", - ) + ).to_dict() result = dry_run("config.yaml") @@ -392,7 +433,7 @@ def test_get_config_info_tool_returns_extended_payload(mock_connector_class: Mag has_merge_store=True, vault_address="https://vault.example.com", aws_region="us-east-1", - ) + ).to_dict() result = get_config_info("config.yaml") @@ -405,7 +446,11 @@ def test_get_config_info_tool_returns_extended_payload(mock_connector_class: Mag @patch("extended_data.connectors.secrets.SecretsConnector") def test_get_targets_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: mock_connector = mock_connector_class.return_value - mock_connector.get_targets.return_value = (["prod", "dev"], "") + mock_connector.get_targets.return_value = extend_data({ + "targets": ["prod", "dev"], + "count": 2, + "error_message": "", + }) result = get_targets("config.yaml") @@ -418,7 +463,11 @@ def test_get_targets_tool_returns_extended_payload(mock_connector_class: MagicMo @patch("extended_data.connectors.secrets.SecretsConnector") def test_get_sources_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: mock_connector = mock_connector_class.return_value - mock_connector.get_sources.return_value = (["vault/prod"], "") + mock_connector.get_sources.return_value = extend_data({ + "sources": ["vault/prod"], + "count": 1, + "error_message": "", + }) result = get_sources("config.yaml") From d7a3c4c3743ecd5fad6694e9ff01d27419082184 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:01:33 -0500 Subject: [PATCH 080/287] feat: promote meshy animation catalog payloads --- .../connectors/meshy/animations.py | 23 ++++--- tests/connectors/meshy/test_animations.py | 60 +++++++++++++++++++ 2 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 tests/connectors/meshy/test_animations.py diff --git a/src/extended_data/connectors/meshy/animations.py b/src/extended_data/connectors/meshy/animations.py index be9d2ff..5a2dae6 100644 --- a/src/extended_data/connectors/meshy/animations.py +++ b/src/extended_data/connectors/meshy/animations.py @@ -2,8 +2,11 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import asdict, dataclass from enum import Enum +from typing import cast + +from extended_data.containers import ExtendedDict, ExtendedList, extend_data # This file is auto-generated by scripts/sync_animations.py @@ -4866,21 +4869,27 @@ def _populate_animation_sets() -> None: _populate_animation_sets() -def get_animations_by_category(category: AnimationCategory) -> list[AnimationMeta]: +def get_animations_by_category(category: AnimationCategory) -> ExtendedList[ExtendedDict]: """Get all animations in a category.""" - return [anim for anim in ANIMATIONS.values() if anim.category == category.value] + return cast( + ExtendedList[ExtendedDict], + extend_data([asdict(anim) for anim in ANIMATIONS.values() if anim.category == category.value]), + ) def get_animations_by_subcategory( subcategory: AnimationSubcategory, -) -> list[AnimationMeta]: +) -> ExtendedList[ExtendedDict]: """Get all animations in a subcategory.""" - return [anim for anim in ANIMATIONS.values() if anim.subcategory == subcategory.value] + return cast( + ExtendedList[ExtendedDict], + extend_data([asdict(anim) for anim in ANIMATIONS.values() if anim.subcategory == subcategory.value]), + ) -def get_animation(action_id: int) -> AnimationMeta: +def get_animation(action_id: int) -> ExtendedDict: """Get animation by ID.""" if action_id not in ANIMATIONS: msg = f"Animation ID {action_id} not found" raise ValueError(msg) - return ANIMATIONS[action_id] + return cast(ExtendedDict, extend_data(asdict(ANIMATIONS[action_id]))) diff --git a/tests/connectors/meshy/test_animations.py b/tests/connectors/meshy/test_animations.py new file mode 100644 index 0000000..a3bfcb8 --- /dev/null +++ b/tests/connectors/meshy/test_animations.py @@ -0,0 +1,60 @@ +"""Tests for Meshy animation catalog helpers.""" + +from __future__ import annotations + +import pytest + +from extended_data.connectors.meshy.animations import ( + ANIMATIONS, + AnimationCategory, + AnimationSubcategory, + get_animation, + get_animations_by_category, + get_animations_by_subcategory, +) +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + + +def test_get_animation_returns_extended_payload() -> None: + """Single animation lookup should expose extended mapping payloads.""" + action_id, raw_animation = next(iter(ANIMATIONS.items())) + + result = get_animation(action_id) + + assert isinstance(result, ExtendedDict) + assert result["id"] == raw_animation.id + assert result["name"] == raw_animation.name + assert isinstance(result["name"], ExtendedString) + assert result["preview_url"] == raw_animation.preview_url + + +def test_get_animations_by_category_returns_extended_payloads() -> None: + """Category lookup should expose an extended list of extended mappings.""" + raw_animation = next(iter(ANIMATIONS.values())) + + result = get_animations_by_category(AnimationCategory(raw_animation.category)) + + assert isinstance(result, ExtendedList) + assert result + assert all(isinstance(animation, ExtendedDict) for animation in result) + assert all(animation["category"] == raw_animation.category for animation in result) + assert isinstance(result[0]["subcategory"], ExtendedString) + + +def test_get_animations_by_subcategory_returns_extended_payloads() -> None: + """Subcategory lookup should expose an extended list of extended mappings.""" + raw_animation = next(iter(ANIMATIONS.values())) + + result = get_animations_by_subcategory(AnimationSubcategory(raw_animation.subcategory)) + + assert isinstance(result, ExtendedList) + assert result + assert all(isinstance(animation, ExtendedDict) for animation in result) + assert all(animation["subcategory"] == raw_animation.subcategory for animation in result) + assert isinstance(result[0]["name"], ExtendedString) + + +def test_get_animation_rejects_unknown_id() -> None: + """Missing animations should remain explicit errors.""" + with pytest.raises(ValueError, match="Animation ID -1 not found"): + get_animation(-1) From 9e3f316aa1a243df5f4ffbf33cb76f9e66271a7c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:04:39 -0500 Subject: [PATCH 081/287] feat: promote meshy vector store payloads --- .../meshy/persistence/vector_store.py | 86 +++++++++----- tests/connectors/meshy/test_vector_store.py | 105 ++++++++++++++++++ 2 files changed, 164 insertions(+), 27 deletions(-) create mode 100644 tests/connectors/meshy/test_vector_store.py diff --git a/src/extended_data/connectors/meshy/persistence/vector_store.py b/src/extended_data/connectors/meshy/persistence/vector_store.py index 8ef9829..1f457bf 100644 --- a/src/extended_data/connectors/meshy/persistence/vector_store.py +++ b/src/extended_data/connectors/meshy/persistence/vector_store.py @@ -41,13 +41,15 @@ import sqlite3 from contextlib import contextmanager, suppress -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from datetime import datetime, timezone from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from typing_extensions import Self +from extended_data.containers import ExtendedDict, ExtendedList, extend_data + if TYPE_CHECKING: from collections.abc import Iterator @@ -94,6 +96,23 @@ class SimilarityResult: score: float # 1 - distance (higher = more similar) +def _record_payload(record: GenerationRecord) -> dict[str, Any]: + """Convert an internal generation record to a JSON-friendly payload.""" + payload = asdict(record) + payload["created_at"] = record.created_at.isoformat() + payload["updated_at"] = record.updated_at.isoformat() + return payload + + +def _similarity_payload(result: SimilarityResult) -> dict[str, Any]: + """Convert an internal similarity result to a JSON-friendly payload.""" + return { + "record": _record_payload(result.record), + "distance": result.distance, + "score": result.score, + } + + class VectorStore: """SQLite vector store for asset generation tracking and RAG. @@ -220,7 +239,7 @@ def record_generation( task_id: str | None = None, embedding: list[float] | None = None, metadata: dict[str, Any] | None = None, - ) -> GenerationRecord: + ) -> ExtendedDict: """Record a new generation (idempotent by spec_hash). If a record with the same spec_hash exists, returns existing. @@ -235,7 +254,7 @@ def record_generation( metadata: Additional metadata dict Returns: - GenerationRecord (existing or newly created) + Extended generation record payload (existing or newly created) """ now = _utc_now().isoformat() @@ -245,7 +264,7 @@ def record_generation( row = cursor.fetchone() if row: - return self._row_to_record(row) + return cast(ExtendedDict, extend_data(_record_payload(self._row_to_record(row)))) # Insert new record metadata_json = json.dumps(metadata) if metadata else None @@ -270,7 +289,7 @@ def record_generation( (record_id, embedding_blob), ) - return GenerationRecord( + record = GenerationRecord( id=record_id, spec_hash=spec_hash, project=project, @@ -283,6 +302,7 @@ def record_generation( created_at=datetime.fromisoformat(now), updated_at=datetime.fromisoformat(now), ) + return cast(ExtendedDict, extend_data(_record_payload(record))) def update_status( self, @@ -325,40 +345,44 @@ def update_status( return cursor.rowcount > 0 - def get_by_spec_hash(self, spec_hash: str) -> GenerationRecord | None: + def get_by_spec_hash(self, spec_hash: str) -> ExtendedDict | None: """Get generation record by spec hash. Args: spec_hash: Generation spec hash Returns: - GenerationRecord or None + Extended generation record payload or None """ conn = self._get_conn() cursor = conn.execute("SELECT * FROM generations WHERE spec_hash = ?", (spec_hash,)) row = cursor.fetchone() - return self._row_to_record(row) if row else None + if not row: + return None + return cast(ExtendedDict, extend_data(_record_payload(self._row_to_record(row)))) - def get_by_task_id(self, task_id: str) -> GenerationRecord | None: + def get_by_task_id(self, task_id: str) -> ExtendedDict | None: """Get generation record by Meshy task ID. Args: task_id: Meshy task ID Returns: - GenerationRecord or None + Extended generation record payload or None """ conn = self._get_conn() cursor = conn.execute("SELECT * FROM generations WHERE task_id = ?", (task_id,)) row = cursor.fetchone() - return self._row_to_record(row) if row else None + if not row: + return None + return cast(ExtendedDict, extend_data(_record_payload(self._row_to_record(row)))) def search_similar( self, query_embedding: list[float], limit: int = 10, project: str | None = None, - ) -> list[SimilarityResult]: + ) -> ExtendedList[ExtendedDict]: """Search for similar generations using vector similarity. Args: @@ -367,10 +391,10 @@ def search_similar( project: Optional project filter Returns: - List of SimilarityResult ordered by similarity (highest first) + Extended similarity result payloads ordered by similarity (highest first) """ if not _HAS_VECTOR: - return [] + return cast(ExtendedList[ExtendedDict], extend_data([])) conn = self._get_conn() query_blob = self._serialize_embedding(query_embedding) @@ -399,26 +423,28 @@ def search_similar( (query_blob, limit), ) - results = [] + results: list[dict[str, Any]] = [] for row in cursor: record = self._row_to_record(row) distance = row["distance"] results.append( - SimilarityResult( - record=record, - distance=distance, - score=1.0 - min(distance, 1.0), + _similarity_payload( + SimilarityResult( + record=record, + distance=distance, + score=1.0 - min(distance, 1.0), + ) ) ) - return results + return cast(ExtendedList[ExtendedDict], extend_data(results)) def search_text( self, query: str, limit: int = 10, project: str | None = None, - ) -> list[GenerationRecord]: + ) -> ExtendedList[ExtendedDict]: """Full-text search for prompts. Falls back to this when vector search is unavailable. @@ -429,7 +455,7 @@ def search_text( project: Optional project filter Returns: - List of matching GenerationRecords + Extended generation record payloads """ conn = self._get_conn() @@ -456,16 +482,19 @@ def search_text( (query, limit), ) - return [self._row_to_record(row) for row in cursor] + return cast( + ExtendedList[ExtendedDict], + extend_data([_record_payload(self._row_to_record(row)) for row in cursor]), + ) - def list_pending(self, project: str | None = None) -> list[GenerationRecord]: + def list_pending(self, project: str | None = None) -> ExtendedList[ExtendedDict]: """List all pending/in-progress generations. Args: project: Optional project filter Returns: - List of pending GenerationRecords + Extended pending generation record payloads """ conn = self._get_conn() @@ -477,7 +506,10 @@ def list_pending(self, project: str | None = None) -> list[GenerationRecord]: else: cursor = conn.execute("SELECT * FROM generations WHERE status IN ('pending', 'in_progress')") - return [self._row_to_record(row) for row in cursor] + return cast( + ExtendedList[ExtendedDict], + extend_data([_record_payload(self._row_to_record(row)) for row in cursor]), + ) def compute_spec_hash(self, spec: dict[str, Any]) -> str: """Compute deterministic hash for a generation spec. diff --git a/tests/connectors/meshy/test_vector_store.py b/tests/connectors/meshy/test_vector_store.py new file mode 100644 index 0000000..baac662 --- /dev/null +++ b/tests/connectors/meshy/test_vector_store.py @@ -0,0 +1,105 @@ +"""Tests for Meshy vector store persistence helpers.""" + +from __future__ import annotations + +from extended_data.connectors.meshy.persistence import vector_store as vector_store_module +from extended_data.connectors.meshy.persistence.vector_store import VectorStore +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + + +def test_record_generation_returns_extended_payload(temp_dir) -> None: + """Recording a generation should expose an extended mapping payload.""" + with VectorStore(temp_dir / "assets.db") as store: + result = store.record_generation( + spec_hash="hash-abc", + prompt="cute otter character", + project="project1", + task_id="task-123", + metadata={"source": "test"}, + ) + + assert isinstance(result, ExtendedDict) + assert result["spec_hash"] == "hash-abc" + assert result["prompt"] == "cute otter character" + assert isinstance(result["prompt"], ExtendedString) + assert isinstance(result["metadata"], ExtendedDict) + assert result["metadata"]["source"] == "test" + assert isinstance(result["created_at"], ExtendedString) + + +def test_record_generation_is_idempotent_with_extended_payload(temp_dir) -> None: + """Duplicate spec hashes should return the existing extended payload.""" + with VectorStore(temp_dir / "assets.db") as store: + first = store.record_generation( + spec_hash="hash-abc", + prompt="first prompt", + project="project1", + ) + second = store.record_generation( + spec_hash="hash-abc", + prompt="second prompt", + project="project1", + ) + + assert isinstance(second, ExtendedDict) + assert second["id"] == first["id"] + assert second["prompt"] == "first prompt" + + +def test_get_record_methods_return_extended_payloads(temp_dir) -> None: + """Spec hash and task ID lookups should return extended mapping payloads.""" + with VectorStore(temp_dir / "assets.db") as store: + store.record_generation( + spec_hash="hash-abc", + prompt="cute otter character", + project="project1", + task_id="task-123", + ) + + by_hash = store.get_by_spec_hash("hash-abc") + by_task = store.get_by_task_id("task-123") + + assert isinstance(by_hash, ExtendedDict) + assert by_hash["spec_hash"] == "hash-abc" + assert isinstance(by_task, ExtendedDict) + assert by_task["task_id"] == "task-123" + + +def test_search_text_and_list_pending_return_extended_payloads(temp_dir) -> None: + """Search and pending queries should return extended lists of mappings.""" + with VectorStore(temp_dir / "assets.db") as store: + store.record_generation( + spec_hash="hash-otter", + prompt="cute otter character", + project="project1", + ) + store.record_generation( + spec_hash="hash-badger", + prompt="armored badger character", + project="project2", + ) + store.update_status("hash-badger", "SUCCEEDED") + + search_results = store.search_text("otter") + pending_results = store.list_pending(project="project1") + + assert isinstance(search_results, ExtendedList) + assert len(search_results) == 1 + assert isinstance(search_results[0], ExtendedDict) + assert search_results[0]["spec_hash"] == "hash-otter" + + assert isinstance(pending_results, ExtendedList) + assert len(pending_results) == 1 + assert isinstance(pending_results[0]["prompt"], ExtendedString) + assert pending_results[0]["project"] == "project1" + + +def test_search_similar_without_vector_extension_returns_extended_list(temp_dir, monkeypatch) -> None: + """The no-vector fallback should still expose an extended list.""" + monkeypatch.setattr(vector_store_module, "_HAS_VECTOR", False) + + with VectorStore(temp_dir / "assets.db") as store: + result = store.search_similar([0.0] * store.embedding_dim) + + assert isinstance(result, ExtendedList) + assert result == [] From 71e2e34c1eaaf19c4f1b5c0b539279626a3613e8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:07:39 -0500 Subject: [PATCH 082/287] feat: promote meshy repository payloads --- .../meshy/persistence/repository.py | 79 +++++++++++------- .../connectors/meshy/webhooks/handler.py | 10 ++- tests/connectors/meshy/test_repository.py | 80 +++++++++++-------- tests/connectors/meshy/test_webhooks.py | 17 +++- 4 files changed, 119 insertions(+), 67 deletions(-) diff --git a/src/extended_data/connectors/meshy/persistence/repository.py b/src/extended_data/connectors/meshy/persistence/repository.py index 0400a1e..fbaf752 100644 --- a/src/extended_data/connectors/meshy/persistence/repository.py +++ b/src/extended_data/connectors/meshy/persistence/repository.py @@ -8,7 +8,7 @@ from datetime import datetime, timezone from pathlib import Path -from typing import Any +from typing import Any, cast from extended_data.connectors.meshy.persistence.schemas import ( ArtifactRecord, @@ -19,6 +19,7 @@ TaskSubmission, ) from extended_data.connectors.meshy.persistence.utils import compute_spec_hash as util_compute_spec_hash +from extended_data.containers import ExtendedDict, ExtendedList, extend_data def _utc_now() -> datetime: @@ -26,6 +27,16 @@ def _utc_now() -> datetime: return datetime.now(timezone.utc) +def _manifest_payload(manifest: ProjectManifest) -> dict[str, Any]: + """Convert an internal project manifest model to a JSON-friendly payload.""" + return manifest.model_dump(mode="json") + + +def _asset_payload(asset: AssetManifest) -> dict[str, Any]: + """Convert an internal asset manifest model to a JSON-friendly payload.""" + return asset.model_dump(mode="json") + + class TaskRepository: """File-backed repository for task manifests with atomic operations.""" @@ -37,15 +48,8 @@ def _manifest_path(self, project: str) -> Path: """Get path to project manifest file.""" return self.base_path / project / "manifest.json" - def load_project_manifest(self, project: str) -> ProjectManifest: - """Load manifest for a project, creating empty one if missing. - - Args: - project: Project name (e.g., "otter", "beaver") - - Returns: - ProjectManifest instance - """ + def _load_project_manifest_model(self, project: str) -> ProjectManifest: + """Load the internal project manifest model, creating an empty one if missing.""" manifest_path = self._manifest_path(project) if not manifest_path.exists(): @@ -58,6 +62,17 @@ def load_project_manifest(self, project: str) -> ProjectManifest: data = json.load(f) return ProjectManifest(**data) + def load_project_manifest(self, project: str) -> ExtendedDict: + """Load manifest for a project, creating empty one if missing. + + Args: + project: Project name (e.g., "otter", "beaver") + + Returns: + Extended project manifest payload. + """ + return cast(ExtendedDict, extend_data(_manifest_payload(self._load_project_manifest_model(project)))) + def save_project_manifest(self, manifest: ProjectManifest) -> None: """Atomically save project manifest to disk. @@ -79,7 +94,7 @@ def save_project_manifest(self, manifest: ProjectManifest) -> None: # Atomic rename os.replace(tmp_path, manifest_path) - def get_asset_record(self, project: str, spec_hash: str) -> AssetManifest | None: + def get_asset_record(self, project: str, spec_hash: str) -> ExtendedDict | None: """Get asset manifest by spec hash. Args: @@ -87,10 +102,13 @@ def get_asset_record(self, project: str, spec_hash: str) -> AssetManifest | None spec_hash: Asset spec hash Returns: - AssetManifest if found, None otherwise + Extended asset manifest payload if found, None otherwise """ - manifest = self.load_project_manifest(project) - return manifest.asset_specs.get(spec_hash) + manifest = self._load_project_manifest_model(project) + asset = manifest.asset_specs.get(spec_hash) + if asset is None: + return None + return cast(ExtendedDict, extend_data(_asset_payload(asset))) def upsert_asset_record(self, project: str, asset_manifest: AssetManifest) -> None: """Insert or update asset manifest. @@ -99,7 +117,7 @@ def upsert_asset_record(self, project: str, asset_manifest: AssetManifest) -> No project: Project name asset_manifest: AssetManifest to save """ - manifest = self.load_project_manifest(project) + manifest = self._load_project_manifest_model(project) asset_manifest.updated_at = _utc_now() manifest.asset_specs[asset_manifest.asset_spec_hash] = asset_manifest self.save_project_manifest(manifest) @@ -131,7 +149,7 @@ def record_task_update( source: Update source (orchestrator, webhook, manual) error: Error message if failed """ - manifest = self.load_project_manifest(project) + manifest = self._load_project_manifest_model(project) asset_record = manifest.asset_specs.get(spec_hash) if not asset_record: @@ -200,28 +218,28 @@ def record_task_update( # Save updated manifest self.save_project_manifest(manifest) - def list_pending_assets(self, project: str) -> list[AssetManifest]: + def list_pending_assets(self, project: str) -> ExtendedList[ExtendedDict]: """List all assets with pending/in-progress tasks. Args: project: Project name Returns: - List of AssetManifest with non-terminal tasks + Extended asset manifest payloads with non-terminal tasks """ - manifest = self.load_project_manifest(project) - pending = [] + manifest = self._load_project_manifest_model(project) + pending: list[dict[str, Any]] = [] terminal_statuses = {"SUCCEEDED", "FAILED", "EXPIRED", "CANCELED"} for asset_record in manifest.asset_specs.values(): has_pending = any(task.status not in terminal_statuses for task in asset_record.task_graph) if has_pending: - pending.append(asset_record) + pending.append(_asset_payload(asset_record)) - return pending + return cast(ExtendedList[ExtendedDict], extend_data(pending)) - def find_task_by_id(self, task_id: str, project: str | None = None) -> tuple[str, str, AssetManifest] | None: + def find_task_by_id(self, task_id: str, project: str | None = None) -> ExtendedDict | None: """Find asset by task ID (for webhook lookups). Args: @@ -229,7 +247,7 @@ def find_task_by_id(self, task_id: str, project: str | None = None) -> tuple[str project: Optional project to narrow search Returns: - Tuple of (project, spec_hash, AssetManifest) if found + Extended payload with project, spec_hash, and asset if found """ # Determine which project to search if project: @@ -239,11 +257,18 @@ def find_task_by_id(self, task_id: str, project: str | None = None) -> tuple[str project_list = [d.name for d in self.base_path.iterdir() if d.is_dir() and (d / "manifest.json").exists()] for sp in project_list: - manifest = self.load_project_manifest(sp) + manifest = self._load_project_manifest_model(sp) for spec_hash, asset_record in manifest.asset_specs.items(): for task in asset_record.task_graph: if task.task_id == task_id: - return (sp, spec_hash, asset_record) + return cast( + ExtendedDict, + extend_data({ + "project": sp, + "spec_hash": spec_hash, + "asset": _asset_payload(asset_record), + }), + ) return None @@ -280,7 +305,7 @@ def record_task_submission(self, submission: TaskSubmission) -> None: msg = "spec_hash cannot be empty" raise ValueError(msg) - manifest = self.load_project_manifest(submission.project) + manifest = self._load_project_manifest_model(submission.project) asset_record = manifest.asset_specs.get(submission.spec_hash) if not asset_record: diff --git a/src/extended_data/connectors/meshy/webhooks/handler.py b/src/extended_data/connectors/meshy/webhooks/handler.py index da6dab6..59fc987 100644 --- a/src/extended_data/connectors/meshy/webhooks/handler.py +++ b/src/extended_data/connectors/meshy/webhooks/handler.py @@ -92,12 +92,14 @@ def handle_webhook( "task_id": payload.id, }) - found_project, found_spec_hash, asset_manifest = task_lookup + found_project = str(task_lookup["project"]) + found_spec_hash = str(task_lookup["spec_hash"]) + asset_manifest = task_lookup["asset"] service_name = None - for task_entry in asset_manifest.task_graph: - if task_entry.task_id == payload.id: - service_name = task_entry.service + for task_entry in asset_manifest.get("task_graph", []): + if task_entry["task_id"] == payload.id: + service_name = str(task_entry["service"]) break if not service_name: diff --git a/tests/connectors/meshy/test_repository.py b/tests/connectors/meshy/test_repository.py index e8ad2f9..ceb3905 100644 --- a/tests/connectors/meshy/test_repository.py +++ b/tests/connectors/meshy/test_repository.py @@ -12,9 +12,11 @@ from extended_data.connectors.meshy.persistence.schemas import ( ArtifactRecord, AssetManifest, + ProjectManifest, TaskStatus, TaskSubmission, ) +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString class TestTaskRepositoryInit: @@ -40,9 +42,11 @@ def test_load_creates_new_manifest(self, task_repository): """Test loading non-existent project creates new manifest.""" manifest = task_repository.load_project_manifest("project1") - assert manifest.project == "project1" - assert manifest.asset_specs == {} - assert manifest.version == "1.0" + assert isinstance(manifest, ExtendedDict) + assert manifest["project"] == "project1" + assert manifest["asset_specs"] == {} + assert manifest["version"] == "1.0" + assert isinstance(manifest["last_updated"], ExtendedString) def test_load_existing_manifest(self, task_repository, temp_dir): """Test loading existing manifest.""" @@ -62,11 +66,12 @@ def test_load_existing_manifest(self, task_repository, temp_dir): json.dump(manifest_data, f) manifest = task_repository.load_project_manifest("project2") - assert manifest.project == "project2" + assert isinstance(manifest, ExtendedDict) + assert manifest["project"] == "project2" def test_save_and_load_manifest(self, task_repository): """Test saving and reloading manifest.""" - manifest = task_repository.load_project_manifest("project1") + manifest = ProjectManifest(project="project1") # Add an asset record asset = AssetManifest( @@ -81,8 +86,9 @@ def test_save_and_load_manifest(self, task_repository): # Reload and verify reloaded = task_repository.load_project_manifest("project1") - assert "hash-123" in reloaded.asset_specs - assert reloaded.asset_specs["hash-123"].project == "project1" + assert isinstance(reloaded, ExtendedDict) + assert "hash-123" in reloaded["asset_specs"] + assert reloaded["asset_specs"]["hash-123"]["project"] == "project1" class TestAssetRecordOperations: @@ -107,8 +113,10 @@ def test_upsert_and_get_asset_record(self, task_repository): retrieved = task_repository.get_asset_record("project1", "hash-abc") assert retrieved is not None - assert retrieved.asset_spec_hash == "hash-abc" - assert retrieved.prompts["text3d"] == "An project1 character" + assert isinstance(retrieved, ExtendedDict) + assert retrieved["asset_spec_hash"] == "hash-abc" + assert retrieved["prompts"]["text3d"] == "An project1 character" + assert isinstance(retrieved["prompts"]["text3d"], ExtendedString) def test_upsert_updates_existing(self, task_repository): """Test that upsert updates existing record.""" @@ -125,7 +133,8 @@ def test_upsert_updates_existing(self, task_repository): task_repository.upsert_asset_record("project1", asset) retrieved = task_repository.get_asset_record("project1", "hash-abc") - assert retrieved.prompts["text3d"] == "Updated prompt" + assert retrieved is not None + assert retrieved["prompts"]["text3d"] == "Updated prompt" class TestTaskSubmission: @@ -147,9 +156,9 @@ def test_record_task_submission(self, task_repository): # Verify it was saved asset = task_repository.get_asset_record("project1", "hash-abc") assert asset is not None - assert len(asset.task_graph) == 1 - assert asset.task_graph[0].task_id == "task-12345" - assert asset.task_graph[0].service == "text3d" + assert len(asset["task_graph"]) == 1 + assert asset["task_graph"][0]["task_id"] == "task-12345" + assert asset["task_graph"][0]["service"] == "text3d" def test_record_duplicate_submission_idempotent(self, task_repository): """Test that duplicate submissions are idempotent.""" @@ -166,7 +175,8 @@ def test_record_duplicate_submission_idempotent(self, task_repository): task_repository.record_task_submission(submission) # Duplicate asset = task_repository.get_asset_record("project1", "hash-abc") - assert len(asset.task_graph) == 1 # Still just one task + assert asset is not None + assert len(asset["task_graph"]) == 1 # Still just one task def test_record_submission_validates_fields(self, task_repository): """Test that submission validation works.""" @@ -210,9 +220,10 @@ def test_record_task_update(self, repo_with_task): ) asset = repo_with_task.get_asset_record("project1", "hash-abc") - task = asset.task_graph[0] - assert task.status == "SUCCEEDED" - assert task.result_paths["glb"] == "https://example.com/model.glb" + assert asset is not None + task = asset["task_graph"][0] + assert task["status"] == "SUCCEEDED" + assert task["result_paths"]["glb"] == "https://example.com/model.glb" def test_record_task_update_with_error(self, repo_with_task): """Test updating task with error.""" @@ -225,9 +236,10 @@ def test_record_task_update_with_error(self, repo_with_task): ) asset = repo_with_task.get_asset_record("project1", "hash-abc") - task = asset.task_graph[0] - assert task.status == "FAILED" - assert task.error == "Generation failed" + assert asset is not None + task = asset["task_graph"][0] + assert task["status"] == "FAILED" + assert task["error"] == "Generation failed" def test_record_task_update_adds_history(self, repo_with_task): """Test that updates add history entries.""" @@ -240,17 +252,18 @@ def test_record_task_update_adds_history(self, repo_with_task): ) asset = repo_with_task.get_asset_record("project1", "hash-abc") - assert len(asset.history) >= 1 + assert asset is not None + assert len(asset["history"]) >= 1 # Find the update entry update_entry = None - for entry in asset.history: - if entry.new_status == "SUCCEEDED": + for entry in asset["history"]: + if entry["new_status"] == "SUCCEEDED": update_entry = entry break assert update_entry is not None - assert update_entry.source == "webhook" + assert update_entry["source"] == "webhook" def test_record_task_update_not_found_raises(self, task_repository): """Test that updating non-existent asset raises.""" @@ -281,8 +294,9 @@ def test_record_task_update_with_artifacts(self, repo_with_task): ) asset = repo_with_task.get_asset_record("project1", "hash-abc") - assert len(asset.artifacts) == 1 - assert asset.artifacts[0].relative_path == "hash-abc_text3d.glb" + assert asset is not None + assert len(asset["artifacts"]) == 1 + assert asset["artifacts"][0]["relative_path"] == "hash-abc_text3d.glb" class TestTaskLookup: @@ -308,17 +322,18 @@ def test_find_task_by_id_with_project(self, repo_with_tasks): result = repo_with_tasks.find_task_by_id("task-project1-123", project="project1") assert result is not None - project, spec_hash, _asset = result - assert project == "project1" - assert spec_hash == "hash-project1" + assert isinstance(result, ExtendedDict) + assert result["project"] == "project1" + assert result["spec_hash"] == "hash-project1" + assert result["asset"]["asset_spec_hash"] == "hash-project1" def test_find_task_by_id_without_project(self, repo_with_tasks): """Test finding task by scanning all project.""" result = repo_with_tasks.find_task_by_id("task-project2-123") assert result is not None - project, _spec_hash, _asset = result - assert project == "project2" + assert isinstance(result, ExtendedDict) + assert result["project"] == "project2" def test_find_task_not_found(self, repo_with_tasks): """Test finding non-existent task.""" @@ -343,8 +358,9 @@ def test_list_pending_assets(self, task_repository): task_repository.record_task_submission(submission) pending = task_repository.list_pending_assets("project1") + assert isinstance(pending, ExtendedList) assert len(pending) == 1 - assert pending[0].asset_spec_hash == "hash-pending" + assert pending[0]["asset_spec_hash"] == "hash-pending" def test_list_pending_excludes_completed(self, task_repository): """Test that completed assets are not listed.""" diff --git a/tests/connectors/meshy/test_webhooks.py b/tests/connectors/meshy/test_webhooks.py index 6d250bf..6367417 100644 --- a/tests/connectors/meshy/test_webhooks.py +++ b/tests/connectors/meshy/test_webhooks.py @@ -22,7 +22,16 @@ WebhookModelUrls, WebhookRiggingResult, ) -from extended_data.containers import ExtendedDict, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedString, extend_data + + +def _task_lookup_payload(project: str, spec_hash: str, asset_manifest: AssetManifest) -> ExtendedDict: + """Build the repository task lookup payload shape.""" + return extend_data({ + "project": project, + "spec_hash": spec_hash, + "asset": asset_manifest.model_dump(mode="json"), + }) class TestMeshyWebhookPayload: @@ -128,7 +137,7 @@ def mock_repository(self, temp_dir): ], ) - repo.find_task_by_id.return_value = ("project1", "hash-abc123", asset_manifest) + repo.find_task_by_id.return_value = _task_lookup_payload("project1", "hash-abc123", asset_manifest) repo.record_task_update.return_value = None return repo @@ -194,7 +203,7 @@ def test_handle_webhook_failed_task(self, webhook_handler, mock_repository, webh ) ], ) - mock_repository.find_task_by_id.return_value = ("project1", "hash-xyz", asset_manifest) + mock_repository.find_task_by_id.return_value = _task_lookup_payload("project1", "hash-xyz", asset_manifest) payload = MeshyWebhookPayload(**webhook_payload_failed) result = webhook_handler.handle_webhook(payload) @@ -234,7 +243,7 @@ def test_handle_webhook_downloads_artifact(self, temp_dir, webhook_payload_succe ) ], ) - mock_repository.find_task_by_id.return_value = ("project1", "hash-abc123", asset_manifest) + mock_repository.find_task_by_id.return_value = _task_lookup_payload("project1", "hash-abc123", asset_manifest) mock_repository.record_task_update.return_value = None def mock_download(url, output_path): From 7c63a0f6b5c84979acffd4b0ab57bb17a0e61c58 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:10:02 -0500 Subject: [PATCH 083/287] feat: promote connector optional metadata --- src/extended_data/connectors/_optional.py | 38 ++++++++++++------- src/extended_data/connectors/registry.py | 22 ++++++----- .../connectors/test_optional_dependencies.py | 30 ++++++++++++++- 3 files changed, 66 insertions(+), 24 deletions(-) diff --git a/src/extended_data/connectors/_optional.py b/src/extended_data/connectors/_optional.py index 54ac540..b3d45e0 100644 --- a/src/extended_data/connectors/_optional.py +++ b/src/extended_data/connectors/_optional.py @@ -18,7 +18,7 @@ import importlib -from typing import Any +from typing import Any, cast from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data @@ -84,7 +84,7 @@ def is_available(package: str) -> bool: return False -def get_extra_for_package(package: str) -> str | None: +def get_extra_for_package(package: str) -> ExtendedString | None: """Get the extra name for a package. Args: @@ -93,7 +93,10 @@ def get_extra_for_package(package: str) -> str | None: Returns: Extra name or None if not mapped """ - return PACKAGE_TO_EXTRA.get(package) + extra = PACKAGE_TO_EXTRA.get(package) + if extra is None: + return None + return ExtendedString(extra) def require_extra(package: str, extra: str | None = None) -> Any: @@ -114,7 +117,7 @@ def require_extra(package: str, extra: str | None = None) -> Any: except ImportError as e: if package in PACKAGE_INSTALL_HINTS: raise ImportError(f"Package '{package}' is required but not installed.\n{PACKAGE_INSTALL_HINTS[package]}") from e - extra_name = extra or get_extra_for_package(package) or package + extra_name = str(extra or get_extra_for_package(package) or package) raise ImportError( f"Package '{package}' is required but not installed.\n" f"Install with: pip install extended-data[{extra_name}]" @@ -231,27 +234,36 @@ def _normalize_connector_name(connector: str) -> str: return connector.strip().lower() -def get_extra_for_connector(connector: str) -> str | None: +def get_extra_for_connector(connector: str) -> ExtendedString | None: """Get the optional dependency extra for a connector.""" - return CONNECTOR_EXTRAS.get(_normalize_connector_name(connector)) + extra = CONNECTOR_EXTRAS.get(_normalize_connector_name(connector)) + if extra is None: + return None + return ExtendedString(extra) -def get_connector_requirements(connector: str) -> list[str]: +def get_connector_requirements(connector: str) -> ExtendedList[ExtendedString]: """Get package imports required by a connector.""" - return list(CONNECTOR_REQUIREMENTS.get(_normalize_connector_name(connector), [])) + return cast( + ExtendedList[ExtendedString], + extend_data(list(CONNECTOR_REQUIREMENTS.get(_normalize_connector_name(connector), []))), + ) -def get_missing_connector_requirements(connector: str) -> list[str]: +def get_missing_connector_requirements(connector: str) -> ExtendedList[ExtendedString]: """Get missing package imports for a connector.""" - return [pkg for pkg in get_connector_requirements(connector) if not is_available(pkg)] + return cast( + ExtendedList[ExtendedString], + extend_data([str(pkg) for pkg in get_connector_requirements(connector) if not is_available(str(pkg))]), + ) -def get_connector_install_command(connector: str) -> str | None: +def get_connector_install_command(connector: str) -> ExtendedString | None: """Get the pip install command for a connector extra.""" extra = get_extra_for_connector(connector) if extra is None: return None - return f"pip install extended-data[{extra}]" + return ExtendedString(f"pip install extended-data[{extra}]") def is_connector_available(connector: str) -> bool: @@ -290,6 +302,6 @@ def require_connector(connector: str) -> None: extra = get_extra_for_connector(connector) or connector raise ImportError( f"The '{connector}' connector requires additional dependencies.\n" - f"Missing packages: {', '.join(missing)}\n" + f"Missing packages: {', '.join(str(package) for package in missing)}\n" f"Install with: pip install extended-data[{extra}]" ) diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 6b420d1..30da5b5 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -160,14 +160,14 @@ def _discover_connectors() -> dict[str, builtins.type[VendorConnectorBase]]: def _raise_missing_builtin_connector(name: str, error: ImportError) -> NoReturn: """Raise a clear install hint for a known built-in connector.""" - install = get_connector_install_command(name) or f"pip install extended-data[{BUILTIN_CONNECTORS[name].extra}]" + install = str(get_connector_install_command(name) or f"pip install extended-data[{BUILTIN_CONNECTORS[name].extra}]") missing = get_missing_connector_requirements(name) msg = ( f"The '{name}' connector is built in but its optional dependencies are not installed.\n" f"Install with: {install}" ) if missing: - msg = f"{msg}\nMissing packages: {', '.join(missing)}" + msg = f"{msg}\nMissing packages: {', '.join(str(package) for package in missing)}" if str(error): msg = f"{msg}\nOriginal import error: {error}" raise ImportError(msg) from error @@ -218,7 +218,7 @@ def get_connector_class(name: str) -> builtins.type[VendorConnectorBase]: if name_lower in BUILTIN_CONNECTORS: missing = get_missing_connector_requirements(name_lower) if missing: - error = ImportError(f"Missing packages: {', '.join(missing)}") + error = ImportError(f"Missing packages: {', '.join(str(package) for package in missing)}") _raise_missing_builtin_connector(name_lower, error) return connectors[name_lower] @@ -267,16 +267,18 @@ def _available_connector_info(name: str, cls: builtins.type[VendorConnectorBase] """Build metadata for a loadable connector.""" spec = BUILTIN_CONNECTORS.get(name) source = "builtin" if spec else "entry_point" - extra = spec.extra if spec else get_extra_for_connector(name) - requirements = tuple(get_connector_requirements(name)) - missing = tuple(get_missing_connector_requirements(name)) + extra_value = spec.extra if spec else get_extra_for_connector(name) + extra = str(extra_value) if extra_value is not None else None + requirements = tuple(str(requirement) for requirement in get_connector_requirements(name)) + missing = tuple(str(requirement) for requirement in get_missing_connector_requirements(name)) + install_value = get_connector_install_command(name) return ConnectorInfo( name=name, available=not missing, source=source, extra=extra, - install=get_connector_install_command(name), + install=str(install_value) if install_value is not None else None, requirements=requirements, missing=missing, class_name=cls.__name__, @@ -301,9 +303,9 @@ def _missing_builtin_connector_info(name: str, error: ImportError | None) -> Con available=False, source="builtin", extra=spec.extra, - install=get_connector_install_command(name), - requirements=tuple(get_connector_requirements(name)), - missing=tuple(get_missing_connector_requirements(name)), + install=str(install) if (install := get_connector_install_command(name)) is not None else None, + requirements=tuple(str(requirement) for requirement in get_connector_requirements(name)), + missing=tuple(str(requirement) for requirement in get_missing_connector_requirements(name)), class_name=spec.class_name, module=spec.module_path, base_url=None, diff --git a/tests/connectors/test_optional_dependencies.py b/tests/connectors/test_optional_dependencies.py index 38b5910..056d8b0 100644 --- a/tests/connectors/test_optional_dependencies.py +++ b/tests/connectors/test_optional_dependencies.py @@ -27,7 +27,35 @@ def test_builtin_connector_metadata_maps_stay_aligned() -> None: assert names == set(_optional.CONNECTOR_EXTRAS) for name, spec in registry.BUILTIN_CONNECTORS.items(): - assert _optional.get_extra_for_connector(name) == spec.extra + extra = _optional.get_extra_for_connector(name) + assert isinstance(extra, ExtendedString) + assert extra == spec.extra + + +def test_connector_optional_metadata_returns_extended_values(monkeypatch) -> None: + """Connector optional dependency metadata helpers return extended values.""" + monkeypatch.setattr(_optional, "is_available", lambda package: package == "present") + monkeypatch.setitem(_optional.CONNECTOR_REQUIREMENTS, "custom", ["present", "missing"]) + monkeypatch.setitem(_optional.CONNECTOR_EXTRAS, "custom", "custom-extra") + + package_extra = _optional.get_extra_for_package("boto3") + connector_extra = _optional.get_extra_for_connector("custom") + requirements = _optional.get_connector_requirements("custom") + missing = _optional.get_missing_connector_requirements("custom") + install = _optional.get_connector_install_command("custom") + + assert isinstance(package_extra, ExtendedString) + assert package_extra == "aws" + assert isinstance(connector_extra, ExtendedString) + assert connector_extra == "custom-extra" + assert isinstance(requirements, ExtendedList) + assert requirements == ["present", "missing"] + assert isinstance(requirements[0], ExtendedString) + assert isinstance(missing, ExtendedList) + assert missing == ["missing"] + assert isinstance(missing[0], ExtendedString) + assert isinstance(install, ExtendedString) + assert install == "pip install extended-data[custom-extra]" def test_builtin_connectors_are_registered_as_entry_points() -> None: From b0ec62cb70eb4bc734c996371a9cd20f0dbf9cc4 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:11:15 -0500 Subject: [PATCH 084/287] feat: promote anthropic model metadata --- src/extended_data/connectors/anthropic/__init__.py | 8 ++++---- tests/connectors/test_anthropic.py | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index 2dc6bb7..f24165d 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -37,7 +37,7 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import to_builtin +from extended_data.containers import ExtendedDict, extend_data, to_builtin from extended_data.logging import Logging @@ -261,13 +261,13 @@ def is_available() -> bool: return bool(os.environ.get("ANTHROPIC_API_KEY")) @staticmethod - def get_available_models() -> dict[str, str]: + def get_available_models() -> ExtendedDict: """Get dictionary of available Claude models. Returns: - Dictionary mapping model IDs to display names. + Extended dictionary mapping model IDs to display names. """ - return CLAUDE_MODELS.copy() + return extend_data(CLAUDE_MODELS.copy()) def _handle_error(self, response: httpx.Response) -> None: """Handle API error responses. diff --git a/tests/connectors/test_anthropic.py b/tests/connectors/test_anthropic.py index e878e73..6ea1cea 100644 --- a/tests/connectors/test_anthropic.py +++ b/tests/connectors/test_anthropic.py @@ -102,11 +102,12 @@ def test_is_available_false(self): assert AnthropicConnector.is_available() is False def test_get_available_models(self): - """get_available_models should return model dictionary.""" + """get_available_models should return extended model metadata.""" models = AnthropicConnector.get_available_models() assert "claude-sonnet-4-20250514" in models assert "claude-opus-4-20250514" in models - assert isinstance(models, dict) + assert isinstance(models, ExtendedDict) + assert isinstance(models["claude-sonnet-4-20250514"], ExtendedString) def test_validate_model(self): """validate_model should check against known models.""" From 6bbbda8620f67b65aadc91a916c02004b2220929 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:12:32 -0500 Subject: [PATCH 085/287] feat: promote meshy webhook url payloads --- src/extended_data/connectors/meshy/webhooks/handler.py | 4 ++-- src/extended_data/connectors/meshy/webhooks/schemas.py | 8 +++++--- tests/connectors/meshy/test_webhooks.py | 2 ++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/extended_data/connectors/meshy/webhooks/handler.py b/src/extended_data/connectors/meshy/webhooks/handler.py index 59fc987..5c86be6 100644 --- a/src/extended_data/connectors/meshy/webhooks/handler.py +++ b/src/extended_data/connectors/meshy/webhooks/handler.py @@ -10,7 +10,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.webhooks.schemas import MeshyWebhookPayload -from extended_data.containers import ExtendedDict, extend_data +from extended_data.containers import ExtendedDict, extend_data, to_builtin from ..persistence.repository import TaskRepository from ..persistence.schemas import ArtifactRecord @@ -113,7 +113,7 @@ def handle_webhook( if payload.status == "FAILED": error_message = payload.get_error_message() - result_paths = payload.get_all_urls() + result_paths = to_builtin(payload.get_all_urls()) artifacts = [] if payload.status == "SUCCEEDED" and self.download_artifacts: diff --git a/src/extended_data/connectors/meshy/webhooks/schemas.py b/src/extended_data/connectors/meshy/webhooks/schemas.py index f6eee24..4f0de68 100644 --- a/src/extended_data/connectors/meshy/webhooks/schemas.py +++ b/src/extended_data/connectors/meshy/webhooks/schemas.py @@ -4,6 +4,8 @@ from pydantic import BaseModel, Field +from extended_data.containers import ExtendedDict, extend_data + class WebhookModelUrls(BaseModel): """Model URLs in webhook payload.""" @@ -107,8 +109,8 @@ def get_glb_url(self) -> str | None: return None - def get_all_urls(self) -> dict[str, str]: - """Get all available URLs as a flat dict.""" + def get_all_urls(self) -> ExtendedDict: + """Get all available URLs as an extended flat dict.""" urls = {} # Model URLs @@ -143,4 +145,4 @@ def get_all_urls(self) -> dict[str, str]: if self.thumbnail_url: urls["thumbnail"] = self.thumbnail_url - return urls + return extend_data(urls) diff --git a/tests/connectors/meshy/test_webhooks.py b/tests/connectors/meshy/test_webhooks.py index 6367417..01e3daf 100644 --- a/tests/connectors/meshy/test_webhooks.py +++ b/tests/connectors/meshy/test_webhooks.py @@ -106,6 +106,8 @@ def test_get_all_urls(self): thumbnail_url="https://example.com/thumb.png", ) urls = payload.get_all_urls() + assert isinstance(urls, ExtendedDict) + assert isinstance(urls["glb"], ExtendedString) assert urls["glb"] == "https://example.com/model.glb" assert urls["fbx"] == "https://example.com/model.fbx" assert urls["thumbnail"] == "https://example.com/thumb.png" From eb27d6228fd1f58732c5acdbb2b0802ca2956c06 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:15:18 -0500 Subject: [PATCH 086/287] feat: promote ai tool schema payloads --- src/extended_data/connectors/ai_tools.py | 10 ++++++---- src/extended_data/connectors/base.py | 11 +++++++---- tests/connectors/test_ai_tools.py | 8 ++++++++ tests/connectors/test_base.py | 24 ++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/extended_data/connectors/ai_tools.py b/src/extended_data/connectors/ai_tools.py index 82435b6..660719d 100644 --- a/src/extended_data/connectors/ai_tools.py +++ b/src/extended_data/connectors/ai_tools.py @@ -8,12 +8,14 @@ import builtins -from typing import Any +from typing import cast from pydantic import BaseModel +from extended_data.containers import ExtendedDict, extend_data -def get_pydantic_schema(model: builtins.type[BaseModel]) -> dict[str, Any]: + +def get_pydantic_schema(model: builtins.type[BaseModel]) -> ExtendedDict: """Generate a Vercel AI SDK-compatible JSON schema from a Pydantic model. This function removes the top-level 'title' and 'description' fields, @@ -25,7 +27,7 @@ def get_pydantic_schema(model: builtins.type[BaseModel]) -> dict[str, Any]: model: The Pydantic model class. Returns: - A JSON schema dictionary. + An extended JSON schema dictionary. """ schema = model.model_json_schema() @@ -33,4 +35,4 @@ def get_pydantic_schema(model: builtins.type[BaseModel]) -> dict[str, Any]: schema.pop("title", None) schema.pop("description", None) - return schema + return cast(ExtendedDict, extend_data(schema)) diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 6608c54..5d6a34c 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -60,6 +60,8 @@ def my_operation(self) -> dict: from langchain_core.tools import StructuredTool from pydantic import BaseModel + from extended_data.containers import ExtendedDict, ExtendedList + class RateLimitError(Exception): """Raised when API rate limit is hit - triggers retry.""" @@ -499,21 +501,22 @@ def get_tools(self) -> list[StructuredTool]: # AI Tool Definition Helpers # ------------------------------------------------------------------------- - def get_ai_tool_definitions(self) -> list[dict[str, Any]]: + def get_ai_tool_definitions(self) -> ExtendedList[ExtendedDict]: """Get tool definitions in Vercel AI SDK-compatible format. Returns: - List of AI tool definition dicts + Extended list of AI tool definition payloads. """ import inspect from extended_data.connectors.ai_tools import get_pydantic_schema + from extended_data.containers import to_builtin definitions = [] for name, func in self._tool_functions.items(): # Use Pydantic schema if available if name in self._tool_schemas: - input_schema = get_pydantic_schema(self._tool_schemas[name]) + input_schema = to_builtin(get_pydantic_schema(self._tool_schemas[name])) else: # Fallback to inspect-based schema generation sig = inspect.signature(func) @@ -550,7 +553,7 @@ def get_ai_tool_definitions(self) -> list[dict[str, Any]]: } ) - return definitions + return self.extend_result(definitions) def handle_ai_tool_call(self, name: str, arguments: dict[str, Any]) -> Any: """Handle an AI tool call. diff --git a/tests/connectors/test_ai_tools.py b/tests/connectors/test_ai_tools.py index 9108683..f944936 100644 --- a/tests/connectors/test_ai_tools.py +++ b/tests/connectors/test_ai_tools.py @@ -4,6 +4,8 @@ from pydantic import BaseModel, Field +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + class TestGetPydanticSchema: """Tests for get_pydantic_schema function.""" @@ -20,6 +22,10 @@ class MyTool(BaseModel): schema = get_pydantic_schema(MyTool) + assert isinstance(schema, ExtendedDict) + assert isinstance(schema["properties"], ExtendedDict) + assert isinstance(schema["required"], ExtendedList) + assert isinstance(schema["type"], ExtendedString) assert schema == { "type": "object", "properties": { @@ -41,6 +47,7 @@ class MyTool(BaseModel): schema = get_pydantic_schema(MyTool) + assert isinstance(schema, ExtendedDict) assert schema == { "type": "object", "properties": { @@ -70,6 +77,7 @@ class MyTool(BaseModel): schema = get_pydantic_schema(MyTool) + assert isinstance(schema, ExtendedDict) assert schema == { "type": "object", "properties": { diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index a55db58..743514b 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -9,6 +9,8 @@ import httpx import pytest +from pydantic import BaseModel, Field + from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.logging import Logging @@ -140,6 +142,28 @@ def test_handle_ai_tool_call_promotes_result_payloads() -> None: assert result["status"].upper_first() == "Ok" +def test_get_ai_tool_definitions_promotes_definition_payloads() -> None: + """AI tool definition export should expose extended containers.""" + + class StatusArgs(BaseModel): + verbose: bool = Field(..., description="Include detailed status.") + + def status(verbose: bool) -> dict[str, str]: + """Read service status.""" + return {"status": "ok" if verbose else "quiet"} + + connector = _connector() + connector.register_tool(status, name="status", schema=StatusArgs) + + definitions = connector.get_ai_tool_definitions() + + assert isinstance(definitions, ExtendedList) + assert isinstance(definitions[0], ExtendedDict) + assert definitions[0]["name"] == "status" + assert isinstance(definitions[0]["inputSchema"], ExtendedDict) + assert isinstance(definitions[0]["inputSchema"]["properties"]["verbose"]["description"], ExtendedString) + + def test_request_uses_connector_max_retries(mocker) -> None: """Connector subclasses control the retry attempt count.""" From 660d467594823b2dcbc1a74b2a9b260ede495a93 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:16:52 -0500 Subject: [PATCH 087/287] feat: promote meshy embedding payloads --- .../meshy/persistence/vector_store.py | 6 ++-- tests/connectors/meshy/test_vector_store.py | 28 ++++++++++++++++++- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/extended_data/connectors/meshy/persistence/vector_store.py b/src/extended_data/connectors/meshy/persistence/vector_store.py index 1f457bf..efa170c 100644 --- a/src/extended_data/connectors/meshy/persistence/vector_store.py +++ b/src/extended_data/connectors/meshy/persistence/vector_store.py @@ -570,7 +570,7 @@ def __exit__( # Convenience function for getting embeddings -def get_embedding(text: str, model: str = "all-MiniLM-L6-v2") -> list[float] | None: +def get_embedding(text: str, model: str = "all-MiniLM-L6-v2") -> ExtendedList[float] | None: """Get embedding for text using sentence-transformers. Args: @@ -578,13 +578,13 @@ def get_embedding(text: str, model: str = "all-MiniLM-L6-v2") -> list[float] | N model: Model name (default: all-MiniLM-L6-v2) Returns: - Embedding vector or None if sentence-transformers not available + Extended embedding vector or None if sentence-transformers not available """ try: from sentence_transformers import SentenceTransformer encoder = SentenceTransformer(model) embedding = encoder.encode(text) - return embedding.tolist() + return cast(ExtendedList[float], extend_data(embedding.tolist())) except ImportError: return None diff --git a/tests/connectors/meshy/test_vector_store.py b/tests/connectors/meshy/test_vector_store.py index baac662..e8d7281 100644 --- a/tests/connectors/meshy/test_vector_store.py +++ b/tests/connectors/meshy/test_vector_store.py @@ -2,8 +2,12 @@ from __future__ import annotations +import sys + +from types import ModuleType + from extended_data.connectors.meshy.persistence import vector_store as vector_store_module -from extended_data.connectors.meshy.persistence.vector_store import VectorStore +from extended_data.connectors.meshy.persistence.vector_store import VectorStore, get_embedding from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString @@ -103,3 +107,25 @@ def test_search_similar_without_vector_extension_returns_extended_list(temp_dir, assert isinstance(result, ExtendedList) assert result == [] + + +def test_get_embedding_returns_extended_vector(monkeypatch) -> None: + """Embedding helper should promote vectors when the optional encoder exists.""" + + class _FakeEmbedding: + def tolist(self) -> list[float]: + return [0.1, 0.2, 0.3] + + class _FakeEncoder: + def encode(self, text: str) -> _FakeEmbedding: + assert text == "cute otter" + return _FakeEmbedding() + + module = ModuleType("sentence_transformers") + module.SentenceTransformer = lambda model: _FakeEncoder() # type: ignore[attr-defined] + monkeypatch.setitem(sys.modules, "sentence_transformers", module) + + result = get_embedding("cute otter") + + assert isinstance(result, ExtendedList) + assert result == [0.1, 0.2, 0.3] From 6182ecfed7fa596e71ba4c74430c810f256a7668 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:27:11 -0500 Subject: [PATCH 088/287] feat: promote input provider snapshots --- README.md | 5 +++ docs/package-surface.md | 25 ++++++++--- examples/inputs/README.md | 1 + examples/inputs/basic_usage.py | 1 + examples/inputs/decorator_api.py | 5 ++- pyproject.toml | 1 - src/extended_data/inputs/__main__.py | 62 ++++++++++++++------------ src/extended_data/inputs/decorators.py | 28 +++++++----- tests/inputs/test_decorators.py | 20 +++++++++ tests/inputs/test_main.py | 35 +++++++++++++++ uv.lock | 11 ----- 11 files changed, 135 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 322383f..3a8c7f3 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,11 @@ decoded files, Base64 payloads, and directed inputs can immediately use `DataWorkflow` makes those compositions first-class: read or decode data, apply named transformations, write an output artifact, and keep the step trail in a `WorkflowResult`. Missing workflow inputs and empty writes fail loudly. +`InputProvider` stores its active, frozen, and merged input snapshots as +`ExtendedDict` values, so direct input-data access can use Tier 2 container +methods. `get_input()` remains the scalar coercion boundary for booleans, +numbers, paths, datetimes, and credential strings; pass `as_extended=True` when +an injected raw input value should stay in Tier 2 form. More detail lives in [`docs/package-surface.md`](docs/package-surface.md). diff --git a/docs/package-surface.md b/docs/package-surface.md index 4603978..e69e025 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -107,11 +107,26 @@ and stdin, then decodes or coerces values through the primitive layer. Its container bridge as file and Base64 decoding. Requested input coercions are strict, and diagnostics identify the input key and failed operation without echoing raw values from environment variables, stdin, JSON, YAML, or Base64 -payloads. `Logging` provides structured lifecycle logging for applications and -connector workflows without creating log files unless file output is explicitly -enabled. `ConnectorFabric` caches and coordinates vendor connectors while -sharing input loading, logging, data normalization, retry behavior, and -serialization. +payloads. Active, frozen, shifted, and merged input snapshots are `ExtendedDict` +values, and input decorator metadata/options are promoted the same way. The old +case-insensitive input mapping is intentionally not preserved; exact keys keep +configuration wiring explicit while still letting direct snapshots use Tier 2 +methods. + +```python +inputs = InputProvider(inputs={"service": {"name": "api"}}, from_environment=False) +assert inputs.inputs["service"]["name"].upper_first() == "Api" +assert isinstance(inputs.merge_inputs({"service": {"region": "us-east-1"}}), ExtendedDict) +``` + +`get_input()` is the scalar coercion boundary for booleans, numbers, paths, +datetimes, and credential strings. Pass `as_extended=True` when a raw injected +input value should remain in Tier 2 form. + +`Logging` provides structured lifecycle logging for applications and connector +workflows without creating log files unless file output is explicitly enabled. +`ConnectorFabric` caches and coordinates vendor connectors while sharing input +loading, logging, data normalization, retry behavior, and serialization. ## Connector Fabric diff --git a/examples/inputs/README.md b/examples/inputs/README.md index 7556b32..c8e6439 100644 --- a/examples/inputs/README.md +++ b/examples/inputs/README.md @@ -24,6 +24,7 @@ uv run python examples/inputs/encoding_decoding.py Demonstrates the `InputProvider` API: - Loading inputs from environment variables - Environment variable prefix filtering +- Direct `ExtendedDict`/`ExtendedString` input snapshot access - Type conversion (boolean, integer, float) - Default values - Input freezing and thawing diff --git a/examples/inputs/basic_usage.py b/examples/inputs/basic_usage.py index a5507f7..c9dd423 100644 --- a/examples/inputs/basic_usage.py +++ b/examples/inputs/basic_usage.py @@ -38,6 +38,7 @@ def main() -> None: inputs.get_input("PORT", is_integer=True) inputs.get_input("TIMEOUT", is_float=True) inputs.get_input("NAME") + inputs.inputs["NAME"].to_snake_case() # Demonstrate default values inputs.get_input("LOG_LEVEL", default="INFO") diff --git a/examples/inputs/decorator_api.py b/examples/inputs/decorator_api.py index 1897e60..1b5f7f3 100644 --- a/examples/inputs/decorator_api.py +++ b/examples/inputs/decorator_api.py @@ -26,6 +26,7 @@ class UserService: """Example service demonstrating decorator-based input handling.""" + @input_config("user_id", source_name="USER_ID", required=True) def get_user(self, user_id: str) -> dict[str, str]: """Get a user by ID. @@ -43,7 +44,7 @@ def authenticated_call(self, api_key: str, endpoint: str = "/users") -> str: """ return f"Calling {endpoint} with key {api_key[:4]}..." - @input_config("config", decode_from_json=True) + @input_config("config", source_name="CONFIG", decode_from_json=True) def parse_config(self, config: dict[str, str] | None = None) -> dict[str, str]: """Parse configuration from JSON input. @@ -51,7 +52,7 @@ def parse_config(self, config: dict[str, str] | None = None) -> dict[str, str]: """ return config or {} - @input_config("port", is_integer=True, default=8080) + @input_config("port", source_name="PORT", is_integer=True, default=8080) def get_port(self, port: int) -> int: """Get the configured port. diff --git a/pyproject.toml b/pyproject.toml index 55cbb41..25bd2d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,6 @@ classifiers = [ "Typing :: Typed", ] dependencies = [ - "case-insensitive-dictionary>=0.2.1", "deepmerge>=2.0", "gitpython>=3.1.0", "httpx>=0.28.1", diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index c1a5668..1c7e06c 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -16,10 +16,10 @@ from copy import deepcopy from typing import TYPE_CHECKING, Any -from case_insensitive_dict import CaseInsensitiveDict from deepmerge import Merger # type: ignore[attr-defined] -from extended_data.containers.factory import extend_data +from extended_data.containers.factory import extend_data, to_builtin +from extended_data.containers.mappings import ExtendedDict from extended_data.io.base64 import base64_decode from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.json import decode_json @@ -38,8 +38,8 @@ class InputProvider: stdin, or provided dictionaries. Attributes: - inputs (CaseInsensitiveDict): Dictionary to store inputs. - frozen_inputs (CaseInsensitiveDict): Dictionary to store frozen inputs. + inputs (ExtendedDict): Dictionary to store inputs. + frozen_inputs (ExtendedDict): Dictionary to store frozen inputs. from_stdin (bool): Flag indicating if inputs were read from stdin. merger (Merger): Object to manage deep merging of dictionaries. """ @@ -80,15 +80,15 @@ def __init__( current_inputs = self._merge_inputs(stdin_inputs, current_inputs) self.from_stdin = from_stdin - self.inputs: CaseInsensitiveDict[str, Any] = CaseInsensitiveDict(current_inputs) - self.frozen_inputs: CaseInsensitiveDict[str, Any] = CaseInsensitiveDict() + self.inputs: ExtendedDict = ExtendedDict(current_inputs) + self.frozen_inputs: ExtendedDict = ExtendedDict() @staticmethod def _normalize_inputs(inputs: Mapping[str, Any] | None) -> dict[str, Any]: if inputs is None or is_nothing(inputs): return {} - return dict(inputs) + return to_builtin(dict(inputs)) @staticmethod def _filtered_environment( @@ -108,10 +108,10 @@ def _filtered_environment( def _merge_inputs(self, base: Mapping[str, Any], incoming: Mapping[str, Any]) -> dict[str, Any]: if is_nothing(incoming): - return deepcopy(dict(base)) + return deepcopy(to_builtin(base)) - clean_base = deepcopy(dict(base)) - clean_incoming = deepcopy(dict(incoming)) + clean_base = deepcopy(to_builtin(base)) + clean_incoming = deepcopy(to_builtin(incoming)) return self.merger.merge(clean_base, clean_incoming) @@ -162,6 +162,7 @@ def get_input( is_float: bool = False, is_path: bool = False, is_datetime: bool = False, + as_extended: bool = False, ) -> Any: """Retrieves an input by key, with options for type conversion and default values. @@ -178,25 +179,26 @@ def get_input( is_float (bool): Whether to convert the input to a float. is_path (bool): Whether to convert the input to a Path object. is_datetime (bool): Whether to convert the input to a datetime object. + as_extended (bool): Whether to wrap the returned value in Tier 2 containers. Returns: Any: The retrieved input, potentially converted or defaulted. """ - inp = self.inputs.get(k, default) + inp = to_builtin(self.inputs.get(k, default)) if is_nothing(inp): inp = default if is_bool and not isinstance(inp, bool): try: - inp = strtobool(inp, raise_on_error=True) + inp = strtobool(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: message = f"Input {k} cannot be converted to boolean." raise RuntimeError(message) from exc if is_integer and inp is not None and not isinstance(inp, int): try: - inp = strtoint(inp, raise_on_error=True) + inp = strtoint(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: message = f"Input {k} cannot be converted to integer." raise RuntimeError(message) from exc @@ -227,6 +229,9 @@ def get_input( message = f"Required input {k} not passed. Available input keys: {available}." raise RuntimeError(message) + if as_extended: + return extend_data(inp) + return inp def decode_input( @@ -303,51 +308,52 @@ def decode_input( return conf - def freeze_inputs(self) -> CaseInsensitiveDict[str, Any]: + def freeze_inputs(self) -> ExtendedDict: """Freezes the current inputs, preventing further modifications until thawed. Returns: - CaseInsensitiveDict: The frozen inputs. + ExtendedDict: The frozen inputs. """ if is_nothing(self.frozen_inputs): - self.frozen_inputs = deepcopy(self.inputs) - self.inputs = CaseInsensitiveDict() + self.frozen_inputs = ExtendedDict(deepcopy(to_builtin(self.inputs))) + self.inputs = ExtendedDict() return self.frozen_inputs - def thaw_inputs(self) -> CaseInsensitiveDict[str, Any]: + def thaw_inputs(self) -> ExtendedDict: """Thaws the inputs, merging the frozen inputs back into the current inputs. Returns: - CaseInsensitiveDict: The thawed inputs. + ExtendedDict: The thawed inputs. """ if is_nothing(self.inputs): - self.inputs = deepcopy(self.frozen_inputs) - self.frozen_inputs = CaseInsensitiveDict() + self.inputs = ExtendedDict(deepcopy(to_builtin(self.frozen_inputs))) + self.frozen_inputs = ExtendedDict() return self.inputs - self.inputs = self.merger.merge(deepcopy(self.inputs), deepcopy(self.frozen_inputs)) - self.frozen_inputs = CaseInsensitiveDict() + merged = self._merge_inputs(self.inputs, self.frozen_inputs) + self.inputs = ExtendedDict(merged) + self.frozen_inputs = ExtendedDict() return self.inputs - def merge_inputs(self, new_inputs: Mapping[str, Any] | None) -> CaseInsensitiveDict[str, Any]: + def merge_inputs(self, new_inputs: Mapping[str, Any] | None) -> ExtendedDict: """Merge new inputs into the current inputs using deep merge semantics. Args: new_inputs (Mapping[str, Any] | None): Incoming values to merge. Returns: - CaseInsensitiveDict[str, Any]: The updated input mapping. + ExtendedDict: The updated input mapping. """ merged = self._merge_inputs(self.inputs, self._normalize_inputs(new_inputs)) - self.inputs = CaseInsensitiveDict(merged) + self.inputs = ExtendedDict(merged) return self.inputs - def shift_inputs(self) -> CaseInsensitiveDict[str, Any]: + def shift_inputs(self) -> ExtendedDict: """Shifts between frozen and thawed inputs. Returns: - CaseInsensitiveDict: The resulting inputs after the shift. + ExtendedDict: The resulting inputs after the shift. """ if is_nothing(self.frozen_inputs): return self.freeze_inputs() diff --git a/src/extended_data/inputs/decorators.py b/src/extended_data/inputs/decorators.py index 9fb1365..ab92297 100644 --- a/src/extended_data/inputs/decorators.py +++ b/src/extended_data/inputs/decorators.py @@ -24,6 +24,7 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any +from extended_data.containers import ExtendedDict, to_builtin from extended_data.inputs.__main__ import InputProvider @@ -93,6 +94,7 @@ def resolve(self, provider: InputProvider) -> Any | object: is_float=self.is_float, is_path=self.is_path, is_datetime=self.is_datetime, + as_extended=self.as_extended, ) if value is None and not source_present and self.default is _MISSING and not self.required: @@ -105,7 +107,7 @@ def resolve(self, provider: InputProvider) -> Any | object: class InputProviderMetadata: """Metadata exposed on decorated classes for runtime integrations.""" - options: dict[str, Any] = field(default_factory=dict) + options: ExtendedDict = field(default_factory=ExtendedDict) class InputContext: @@ -120,13 +122,15 @@ def __init__( env_prefix: str | None = None, strip_env_prefix: bool = False, ): - self._options: dict[str, Any] = { - "inputs": dict(inputs) if inputs else None, - "from_environment": from_environment, - "from_stdin": from_stdin, - "env_prefix": env_prefix, - "strip_env_prefix": strip_env_prefix, - } + self._options = ExtendedDict( + { + "inputs": dict(inputs) if inputs else None, + "from_environment": from_environment, + "from_stdin": from_stdin, + "env_prefix": env_prefix, + "strip_env_prefix": strip_env_prefix, + } + ) self._instance: InputProvider | None = None def refresh(self, **overrides: Any) -> None: @@ -135,9 +139,9 @@ def refresh(self, **overrides: Any) -> None: self._instance = None @property - def options(self) -> dict[str, Any]: + def options(self) -> ExtendedDict: """Current configuration (copy) used for instantiation.""" - return dict(self._options) + return ExtendedDict(to_builtin(self._options)) def resolve(self, config: InputConfig) -> Any | object: """Resolve a parameter value using the provided configuration.""" @@ -150,7 +154,7 @@ def input_provider(self) -> InputProvider: def _ensure_instance(self) -> InputProvider: if self._instance is None: - kwargs = {k: v for k, v in self._options.items() if v is not None} + kwargs = {k: v for k, v in to_builtin(self._options).items() if v is not None} self._instance = InputProvider(**kwargs) return self._instance @@ -193,7 +197,7 @@ def decorator(cls: builtins.type[Any]) -> builtins.type[Any]: if getattr(cls, "__input_provider_enabled__", False): return cls - metadata = InputProviderMetadata(options={k: v for k, v in base_options.items() if v is not None}) + metadata = InputProviderMetadata(options=ExtendedDict({k: v for k, v in base_options.items() if v is not None})) cls.__input_provider_enabled__ = True cls.__input_provider_metadata__ = metadata diff --git a/tests/inputs/test_decorators.py b/tests/inputs/test_decorators.py index 63b5dd3..ba99175 100644 --- a/tests/inputs/test_decorators.py +++ b/tests/inputs/test_decorators.py @@ -27,6 +27,10 @@ def parse_config(self, config: dict[str, str]) -> dict[str, str]: def parse_extended_config(self, extended_config: ExtendedDict) -> ExtendedDict: return extended_config + @input_config("raw_config", as_extended=True) + def parse_raw_extended_config(self, raw_config: ExtendedDict) -> ExtendedDict: + return raw_config + def greet(self, prefix: str = "hello") -> str: return prefix @@ -65,6 +69,14 @@ def test_decode_from_json_input_config_can_return_extended_containers() -> None: assert isinstance(parsed["name"], ExtendedString) +def test_plain_input_config_can_return_extended_containers() -> None: + service = ExampleService(_input_provider_config={"inputs": {"raw_config": {"name": "api"}}}) + parsed = service.parse_raw_extended_config() + + assert isinstance(parsed, ExtendedDict) + assert isinstance(parsed["name"], ExtendedString) + + def test_method_default_used_when_input_missing() -> None: service = ExampleService(_input_provider_config={"inputs": {"domain": "acme.io"}}) assert service.greet() == "hello" @@ -82,3 +94,11 @@ def test_decorator_exposes_input_provider_property() -> None: assert service.input_provider.get_input("domain") == "override.io" assert not hasattr(service, "directed_inputs") + + +def test_decorator_metadata_uses_extended_options() -> None: + metadata = ExampleService.__input_provider_metadata__ + + assert isinstance(metadata.options, ExtendedDict) + assert isinstance(metadata.options["inputs"], ExtendedDict) + assert isinstance(metadata.options["inputs"]["domain"], ExtendedString) diff --git a/tests/inputs/test_main.py b/tests/inputs/test_main.py index 399df58..32ee674 100644 --- a/tests/inputs/test_main.py +++ b/tests/inputs/test_main.py @@ -103,10 +103,34 @@ def test_get_input_with_default(): returning a default value if the key is not found. """ dic = InputProvider(inputs={"key1": "value1"}) + assert isinstance(dic.inputs, ExtendedDict) + assert isinstance(dic.inputs["key1"], ExtendedString) assert dic.get_input("key1", default="default_value") == "value1" + assert isinstance(dic.get_input("key1"), str) assert dic.get_input("key2", default="default_value") == "default_value" +def test_get_input_uses_exact_keys(): + """InputProvider now uses the package's exact-key ExtendedDict surface.""" + dic = InputProvider(inputs={"API_KEY": "secret"}, from_environment=False) + + assert dic.get_input("api_key", default="fallback") == "fallback" + assert dic.get_input("API_KEY") == "secret" + + +def test_get_input_can_return_extended_containers(): + """Plain input retrieval can opt into the Tier 2 container layer.""" + dic = InputProvider(inputs={"config": {"service": "api"}, "name": "gateway"}, from_environment=False) + + config = dic.get_input("config", as_extended=True) + name = dic.get_input("name", as_extended=True) + + assert isinstance(config, ExtendedDict) + assert isinstance(config["service"], ExtendedString) + assert isinstance(name, ExtendedString) + assert name.upper_first() == "Gateway" + + def test_get_input_required(): """Test retrieving a required input. @@ -276,7 +300,10 @@ def test_freeze_inputs(): """ dic = InputProvider(inputs={"key1": "value1"}) frozen_inputs = dic.freeze_inputs() + assert isinstance(frozen_inputs, ExtendedDict) assert frozen_inputs["key1"] == "value1" + assert isinstance(frozen_inputs["key1"], ExtendedString) + assert isinstance(dic.inputs, ExtendedDict) assert dic.inputs == {} @@ -289,7 +316,10 @@ def test_thaw_inputs(): dic = InputProvider(inputs={"key1": "value1"}) dic.freeze_inputs() dic.thaw_inputs() + assert isinstance(dic.inputs, ExtendedDict) assert dic.inputs["key1"] == "value1" + assert isinstance(dic.inputs["key1"], ExtendedString) + assert isinstance(dic.frozen_inputs, ExtendedDict) assert dic.frozen_inputs == {} @@ -301,6 +331,8 @@ def test_shift_inputs(): """ dic = InputProvider(inputs={"key1": "value1"}) dic.shift_inputs() + assert isinstance(dic.inputs, ExtendedDict) + assert isinstance(dic.frozen_inputs, ExtendedDict) assert dic.inputs == {} assert dic.frozen_inputs["key1"] == "value1" @@ -314,6 +346,8 @@ def test_merge_inputs_deep_merge(): dic = InputProvider(inputs={"nested": {"left": 1}}) merged = dic.merge_inputs({"nested": {"right": 2}}) + assert isinstance(merged, ExtendedDict) + assert isinstance(merged["nested"], ExtendedDict) assert merged["nested"] == {"left": 1, "right": 2} @@ -326,5 +360,6 @@ def test_environment_prefix_filter(monkeypatch): dic = InputProvider(from_environment=True, env_prefix="APP_", strip_env_prefix=True) assert dic.inputs["ALPHA"] == "alpha" + assert dic.inputs["ALPHA"].upper_first() == "Alpha" assert dic.inputs["BETA"] == "beta" assert "UNSCOPED" not in dic.inputs diff --git a/uv.lock b/uv.lock index 367b146..71fafe3 100644 --- a/uv.lock +++ b/uv.lock @@ -314,15 +314,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/e6/5a5ec1033613e7812e5b19ec8c2a1889834fde336d8812d53019eac6e04a/botocore-1.43.26-py3-none-any.whl", hash = "sha256:eeb92265bae289555182a46341c998a656ab49c0dbdb762c65b30fe354fcc9e8", size = 15183593, upload-time = "2026-06-09T19:34:03.012Z" }, ] -[[package]] -name = "case-insensitive-dictionary" -version = "0.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/01/5e/8de464e77e2bb6f0b47f2ac94f75cd46f4f14ba55529619e68ae5e81443e/case-insensitive-dictionary-0.2.1.tar.gz", hash = "sha256:7e94726f97eb2c0ceac53209971db50ffc996def663e5e5080d0a1acb4a42280", size = 5938, upload-time = "2022-10-18T10:35:30.298Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/94/2a5f43133bfd07d8c826f5fe277d54fb6c45c0c2ac5df3bc71c2ba8ff4ea/case_insensitive_dictionary-0.2.1-py3-none-any.whl", hash = "sha256:a8971780be1ba25e363db259515f0a2f003013465f82552e2f9aed08d5a9ca28", size = 6060, upload-time = "2022-10-18T10:35:31.701Z" }, -] - [[package]] name = "certifi" version = "2026.5.20" @@ -847,7 +838,6 @@ name = "extended-data" version = "7.0.0" source = { editable = "." } dependencies = [ - { name = "case-insensitive-dictionary" }, { name = "deepmerge" }, { name = "gitpython" }, { name = "httpx" }, @@ -987,7 +977,6 @@ requires-dist = [ { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.96.0" }, { name = "boto3", marker = "extra == 'all'", specifier = ">=1.42.92" }, { name = "boto3", marker = "extra == 'aws'", specifier = ">=1.42.92" }, - { name = "case-insensitive-dictionary", specifier = ">=0.2.1" }, { name = "coverage", extras = ["toml"], marker = "extra == 'tests'", specifier = ">=7.6.0" }, { name = "deepmerge", specifier = ">=2.0" }, { name = "extended-data", extras = ["tests", "typing"], marker = "extra == 'dev'" }, From da64571e2452e8a3d0a8217b791ee7a4e7448920 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:29:25 -0500 Subject: [PATCH 089/287] feat: promote logging storage payloads --- README.md | 2 ++ docs/package-surface.md | 4 ++++ src/extended_data/logging/logging.py | 16 +++++++++++++--- .../integration/test_lifecycle_logging.py | 3 +++ tests/logging/test_logging.py | 9 +++++++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3a8c7f3..94e43a9 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,8 @@ in a `WorkflowResult`. Missing workflow inputs and empty writes fail loudly. methods. `get_input()` remains the scalar coercion boundary for booleans, numbers, paths, datetimes, and credential strings; pass `as_extended=True` when an injected raw input value should stay in Tier 2 form. +`Logging` stores marked log message collections as `ExtendedDict` and +`ExtendedSet` values while keeping Python logger and handler objects plain. More detail lives in [`docs/package-surface.md`](docs/package-surface.md). diff --git a/docs/package-surface.md b/docs/package-surface.md index e69e025..cb84257 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -125,6 +125,10 @@ input value should remain in Tier 2 form. `Logging` provides structured lifecycle logging for applications and connector workflows without creating log files unless file output is explicitly enabled. +Stored log message collections are exposed as `ExtendedDict` values keyed by +storage marker, with each marker containing an `ExtendedSet` of promoted +messages. + `ConnectorFabric` caches and coordinates vendor connectors while sharing input loading, logging, data normalization, retry behavior, and serialization. diff --git a/src/extended_data/logging/logging.py b/src/extended_data/logging/logging.py index 6ea8966..4cd5415 100644 --- a/src/extended_data/logging/logging.py +++ b/src/extended_data/logging/logging.py @@ -18,7 +18,6 @@ import os import sys -from collections import defaultdict from collections.abc import Callable, Mapping, Sequence from copy import deepcopy from pathlib import Path @@ -40,6 +39,7 @@ to_snake_case, wrap_raw_data_for_export, ) +from extended_data.containers import ExtendedDict, ExtendedSet from extended_data.logging.const import VERBOSITY from extended_data.logging.handlers import add_console_handler, add_file_handler from extended_data.logging.log_types import LogLevel @@ -118,7 +118,7 @@ def __init__( ) # Message storage - self.stored_messages: defaultdict[str, set[str]] = defaultdict(set) + self.stored_messages: ExtendedDict = ExtendedDict() self.error_list: list[str] = [] self.last_error_instance: Any = None self.last_error_text: str | None = None @@ -281,10 +281,20 @@ def _store_logged_message( return if (not allowed_levels or log_level in allowed_levels) and log_level not in denied_levels: - self.stored_messages[storage_marker].add( + self._stored_messages_for(storage_marker).add( f":warning: {msg}" if log_level not in ["debug", "info"] else msg, ) + def _stored_messages_for(self, storage_marker: str) -> ExtendedSet[str]: + """Return the promoted message collection for a storage marker.""" + stored_messages = self.stored_messages.get(storage_marker) + if isinstance(stored_messages, ExtendedSet): + return stored_messages + + promoted_messages = ExtendedSet[str](stored_messages or []) + self.stored_messages[storage_marker] = promoted_messages + return promoted_messages + def logged_statement( self, msg: str, diff --git a/tests/logging/integration/test_lifecycle_logging.py b/tests/logging/integration/test_lifecycle_logging.py index 0b1d7a2..d891150 100644 --- a/tests/logging/integration/test_lifecycle_logging.py +++ b/tests/logging/integration/test_lifecycle_logging.py @@ -6,6 +6,7 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedSet from extended_data.logging import Logging @@ -59,6 +60,8 @@ def test_full_logging_lifecycle(temp_logger: Logging, tmp_path: Path) -> None: log_level="info", # type: ignore[arg-type] ) assert storage_result is not None + assert isinstance(temp_logger.stored_messages, ExtendedDict) + assert isinstance(temp_logger.stored_messages[storage_marker], ExtendedSet) assert storage_msg in temp_logger.stored_messages[storage_marker] # Verify file output exists at the location specified in fixture diff --git a/tests/logging/test_logging.py b/tests/logging/test_logging.py index 608daf2..872a630 100644 --- a/tests/logging/test_logging.py +++ b/tests/logging/test_logging.py @@ -7,6 +7,7 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedSet, ExtendedString from extended_data.logging import Logging from extended_data.logging.log_types import LogLevel @@ -70,8 +71,12 @@ def test_storage_marker(logger: Logging) -> None: ) assert result == msg + assert isinstance(logger.stored_messages, ExtendedDict) assert storage_marker in logger.stored_messages + assert isinstance(logger.stored_messages[storage_marker], ExtendedSet) assert msg in logger.stored_messages[storage_marker] + stored_msg = next(iter(logger.stored_messages[storage_marker])) + assert isinstance(stored_msg, ExtendedString) def test_context_marker(logger: Logging) -> None: @@ -184,6 +189,8 @@ def test_log_level_filtering(logger: Logging) -> None: # Allowed level should be stored logger.logged_statement(msg, log_level="info") # type: ignore[arg-type] + assert isinstance(logger.stored_messages, ExtendedDict) + assert isinstance(logger.stored_messages[storage_marker], ExtendedSet) assert msg in logger.stored_messages[storage_marker] # Denied level should not be stored @@ -230,7 +237,9 @@ def test_all_log_levels(logger: Logging, log_level: LogLevel) -> None: assert result == msg assert storage_marker in logger.stored_messages + assert isinstance(logger.stored_messages[storage_marker], ExtendedSet) stored_msg = next(iter(m for m in logger.stored_messages[storage_marker] if msg in m)) + assert isinstance(stored_msg, ExtendedString) # Check for warning prefix on appropriate levels if log_level not in ["debug", "info"]: From 41f2382c7025aa522e5869726a18e9a0ffa75b26 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:31:14 -0500 Subject: [PATCH 090/287] feat: promote mapping container results --- docs/package-surface.md | 5 +++++ src/extended_data/containers/mappings.py | 13 ++++++++----- tests/core/test_containers.py | 12 ++++++++++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index cb84257..c3c54e4 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -62,6 +62,11 @@ payload = ExtendedDict({"service": {"name": "api"}}) payload["service"]["name"].upper_first() ``` +Container methods that return derived collections stay in Tier 2 as well: +`ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected +`ExtendedDict` values, and `ExtendedDict.all_values()` returns an +`ExtendedList`. + Tier 3 decode surfaces can promote plain decoded values into Tier 2 containers: ```python diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index 614be6a..d26ac89 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -10,6 +10,8 @@ if TYPE_CHECKING: from _typeshed import SupportsKeysAndGetItem + from extended_data.containers.sequences import ExtendedList, ExtendedTuple + from extended_data.primitives.mappings import ( all_values_from_map, deduplicate_map, @@ -83,12 +85,13 @@ def filter( *, allowlist: list[str] | None = None, denylist: list[str] | None = None, - ) -> tuple[ExtendedDict, ExtendedDict]: + ) -> ExtendedTuple[ExtendedDict]: """Return accepted and rejected mapping entries.""" from extended_data.containers.factory import extend_data, to_builtin + from extended_data.containers.sequences import ExtendedTuple accepted, rejected = filter_map(to_builtin(self.data), allowlist=allowlist, denylist=denylist) - return extend_data(accepted), extend_data(rejected) + return ExtendedTuple((extend_data(accepted), extend_data(rejected))) def compact(self) -> ExtendedDict: """Return a copy without values considered empty.""" @@ -108,11 +111,11 @@ def unhump(self, *, drop_without_prefix: str | None = None) -> ExtendedDict: return extend_data(unhump_map(to_builtin(self.data), drop_without_prefix=drop_without_prefix)) - def all_values(self) -> list[Any]: + def all_values(self) -> ExtendedList[Any]: """Return all values from the nested mapping.""" - from extended_data.containers.factory import to_builtin + from extended_data.containers.factory import extend_data, to_builtin - return all_values_from_map(to_builtin(self.data)) + return extend_data(all_values_from_map(to_builtin(self.data))) def first_non_empty_value(self, *keys: str) -> Any: """Return the first non-empty value for the provided keys.""" diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 0fe97cd..cf39249 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -32,8 +32,14 @@ def test_extended_dict_composes_mapping_primitives() -> None: value = ExtendedDict({"outer": {"inner": 1}, "items": [1, 1, 2], "empty": ""}) merged = value.deep_merge({"outer": {"other": 2}}) - accepted, rejected = merged.filter(allowlist=["outer"]) - + filtered = merged.filter(allowlist=["outer"]) + accepted, rejected = filtered + all_values = value.all_values() + + assert isinstance(filtered, ExtendedTuple) + assert isinstance(accepted, ExtendedDict) + assert isinstance(rejected, ExtendedDict) + assert isinstance(all_values, ExtendedList) assert merged["outer"] == {"inner": 1, "other": 2} assert value["outer"] == {"inner": 1} assert value.flatten() == {"outer.inner": 1, "items.0": 1, "items.1": 1, "items.2": 2, "empty": ""} @@ -41,6 +47,8 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert value.compact() == {"outer": {"inner": 1}, "items": [1, 1, 2]} assert accepted == {"outer": {"inner": 1, "other": 2}} assert "items" in rejected + assert all_values == [1, 1, 1, 2, ""] + assert isinstance(all_values[-1], ExtendedString) def test_extended_dict_promotes_nested_values_on_mutation() -> None: From 18798192e34aa81295bf40b05d1566cbdda20e5e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:39:13 -0500 Subject: [PATCH 091/287] feat: promote decode outputs by default --- README.md | 4 +- docs/package-surface.md | 10 ++-- examples/core/composed_workflows.py | 2 +- examples/core/file_operations.py | 4 +- src/extended_data/inputs/__main__.py | 1 + src/extended_data/io/base64.py | 4 +- src/extended_data/io/files.py | 4 +- src/extended_data/io/importers.py | 4 +- .../primitives/string_transforms.py | 7 ++- src/extended_data/primitives/strings.py | 13 ++++- src/extended_data/primitives/types.py | 29 +++++++--- tests/core/test_base64_utils.py | 15 ++++- tests/core/test_file_data_type.py | 26 ++++++--- tests/core/test_import_utils.py | 19 +++++-- tests/core/test_string_data_type.py | 15 +++++ tests/core/test_string_transformations.py | 15 +++++ tests/core/test_type_utils.py | 56 +++++++++++++++++++ tests/core/test_workflows.py | 8 +-- 18 files changed, 186 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 94e43a9..72803ca 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ inputs = InputProvider(inputs={"GITHUB_OWNER": "jbcom"}, from_environment=False) connectors = ConnectorFabric(inputs=inputs.inputs, logger=logger) data = decode_json('{"status": "ok"}') payload = ExtendedDict(data).deep_merge({"source": "example"}) -decoded_file = decode_file('{"service": {"name": "api"}}', suffix="json", as_extended=True) +decoded_file = decode_file('{"service": {"name": "api"}}', suffix="json") workflow = DataWorkflow.from_value(payload).then(("normalize", lambda data: data.unhump())).result() print(encode_yaml(payload)) @@ -119,7 +119,7 @@ The package is intentionally tiered: - Tier 3 processors use the first two tiers to handle files, inputs, API data, vendor integrations, and workflows. -Tier 3 decoders can opt into Tier 2 containers with `as_extended=True`, so +Tier 3 decoders return Tier 2 containers by default, so decoded files, Base64 payloads, and directed inputs can immediately use `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, `ExtendedSet`, and `ExtendedString` methods. diff --git a/docs/package-surface.md b/docs/package-surface.md index c3c54e4..15261f3 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -67,17 +67,19 @@ Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict` values, and `ExtendedDict.all_values()` returns an `ExtendedList`. -Tier 3 decode surfaces can promote plain decoded values into Tier 2 containers: +Tier 3 decode surfaces promote decoded values into Tier 2 containers by +default: ```python from extended_data import decode_file -payload = decode_file('{"service": {"name": "api"}}', suffix="json", as_extended=True) +payload = decode_file('{"service": {"name": "api"}}', suffix="json") assert payload["service"]["name"].upper_first() == "Api" ``` -Use `extend_data(value)` to promote existing plain data and `to_builtin(value)` -to lower extended containers back to standard Python data. +Pass `as_extended=False` when a decode boundary should return standard Python +containers. Use `extend_data(value)` to promote existing plain data and +`to_builtin(value)` to lower extended containers back to standard Python data. Tuple values are promoted to `ExtendedTuple` and lowered back to Python tuples, so the Tier 2 surface does not silently turn immutable input data into mutable lists. diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index 78e25c4..657d948 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -120,7 +120,7 @@ def demonstrate_yaml_native_workflow() -> None: tld = Path(tmpdir) write_file("template.yaml", template, tld=tld) rendered = read_file("template.yaml", tld=tld) - decoded = decode_file(rendered, file_path="template.yaml", as_extended=True) + decoded = decode_file(rendered, file_path="template.yaml") print(rendered) print(f"\nDecoded tag: {decoded['bucket_name'].tag}") diff --git a/examples/core/file_operations.py b/examples/core/file_operations.py index aefe79e..a94842d 100755 --- a/examples/core/file_operations.py +++ b/examples/core/file_operations.py @@ -90,7 +90,7 @@ def demonstrate_file_operations() -> None: write_file(yaml_file, yaml_content) yaml_text = read_file(yaml_file) - data = decode_file(yaml_text, file_path=yaml_file, as_extended=True) + data = decode_file(yaml_text, file_path=yaml_file) print(f"\nDecoded YAML file: {data}") print(f"YAML service keys: {data.flatten().keys()}") @@ -100,7 +100,7 @@ def demonstrate_file_operations() -> None: write_file(json_file, json_content) json_text = read_file(json_file) - data = decode_file(json_text, file_path=json_file, as_extended=True) + data = decode_file(json_text, file_path=json_file) print(f"Decoded JSON file: {data}") diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 1c7e06c..6bf3e9a 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -277,6 +277,7 @@ def decode_input( conf, unwrap_raw_data=decode_from_json or decode_from_yaml, encoding="json" if decode_from_json else "yaml", + as_extended=False, ) except (binascii.Error, DataDecodeError) as exc: message = f"Failed to decode input {k} from Base64." diff --git a/src/extended_data/io/base64.py b/src/extended_data/io/base64.py index 2011290..19b1343 100644 --- a/src/extended_data/io/base64.py +++ b/src/extended_data/io/base64.py @@ -38,7 +38,7 @@ def base64_decode( unwrap_raw_data: bool = True, encoding: str = "yaml", *, - as_extended: bool = False, + as_extended: bool = True, ) -> Any: """Decodes data from base64 format. @@ -46,7 +46,7 @@ def base64_decode( encoded_data (str): The base64 encoded string to decode. unwrap_raw_data (bool): Whether to unwrap the raw data after decoding. encoding (str): The encoding format used for wrapping (default is 'yaml'). - as_extended (bool): Wrap decoded container values in Tier 2 Extended Data containers. + as_extended (bool): Wrap decoded values in Tier 2 Extended Data containers. Returns: Any: The decoded bytes when ``unwrap_raw_data`` is ``False``, otherwise diff --git a/src/extended_data/io/files.py b/src/extended_data/io/files.py index 188f3e4..7587dd6 100644 --- a/src/extended_data/io/files.py +++ b/src/extended_data/io/files.py @@ -316,7 +316,7 @@ def decode_file( file_path: FilePath | None = None, suffix: str | None = None, *, - as_extended: bool = False, + as_extended: bool = True, ) -> Any: """Decodes file data based on file extension or explicit suffix. @@ -328,7 +328,7 @@ def decode_file( file_path (FilePath | None): Optional file path to infer format from extension. suffix (str | None): Explicit format suffix (e.g., "yaml", "json", "toml", "hcl"). Takes precedence over file_path extension. - as_extended (bool): Wrap decoded container values in Tier 2 Extended Data containers. + as_extended (bool): Wrap decoded values in Tier 2 Extended Data containers. Returns: Any: The decoded data structure, or the original string if format is unknown. diff --git a/src/extended_data/io/importers.py b/src/extended_data/io/importers.py index d81984b..7538ddf 100644 --- a/src/extended_data/io/importers.py +++ b/src/extended_data/io/importers.py @@ -17,14 +17,14 @@ def unwrap_raw_data_from_import( wrapped_data: str | memoryview | bytes | bytearray, encoding: str = "yaml", *, - as_extended: bool = False, + as_extended: bool = True, ) -> Any: """Unwraps the data that was wrapped for import. Args: wrapped_data (str | memoryview | bytes | bytearray): The wrapped data. encoding (str): The encoding format (default is 'yaml'). - as_extended (bool): Wrap decoded container values in Tier 2 Extended Data containers. + as_extended (bool): Wrap decoded values in Tier 2 Extended Data containers. Returns: Any: The unwrapped data. diff --git a/src/extended_data/primitives/string_transforms.py b/src/extended_data/primitives/string_transforms.py index 988a8ce..ff9d388 100644 --- a/src/extended_data/primitives/string_transforms.py +++ b/src/extended_data/primitives/string_transforms.py @@ -12,6 +12,7 @@ def _normalize_separators(text: str) -> str: + text = str(text) spaced = re.sub(r"(?<=[a-z])(?=[A-Z])", " ", text) return spaced.replace("-", " ").replace("_", " ") @@ -41,13 +42,13 @@ def to_kebab_case(text: str) -> str: def pluralize(text: str) -> str: """Convert string to plural form.""" - return inflection.pluralize(text) + return inflection.pluralize(str(text)) def singularize(text: str) -> str: """Convert string to singular form.""" - normalized = text - if text.lower() == "criteria": + normalized = str(text) + if normalized.lower() == "criteria": return "criterion" return inflection.singularize(normalized) diff --git a/src/extended_data/primitives/strings.py b/src/extended_data/primitives/strings.py index a38122d..6ac2b8f 100644 --- a/src/extended_data/primitives/strings.py +++ b/src/extended_data/primitives/strings.py @@ -52,6 +52,7 @@ def sanitize_key(key: str, delim: str = "_") -> str: Returns: str: The sanitized key. """ + key = str(key) return "".join(x if (x.isalnum() or x == delim) else delim for x in key) @@ -66,6 +67,8 @@ def truncate(msg: str, max_length: int, ender: str = "...") -> str: Returns: str: The truncated message. """ + msg = str(msg) + ender = str(ender) if max_length <= 0: return "" @@ -85,6 +88,7 @@ def lower_first_char(inp: str) -> str: Returns: str: The string with the first character in lowercase. """ + inp = str(inp) return inp[:1].lower() + inp[1:] if inp else "" @@ -97,6 +101,7 @@ def upper_first_char(inp: str) -> str: Returns: str: The string with the first character in uppercase. """ + inp = str(inp) return inp[:1].upper() + inp[1:] if inp else "" @@ -109,7 +114,7 @@ def is_url(url: str) -> bool: Returns: bool: True if the string is a valid URL, False otherwise. """ - parsed = urlparse(url.strip()) + parsed = urlparse(str(url).strip()) return all([parsed.scheme, parsed.netloc]) @@ -122,7 +127,7 @@ def titleize_name(name: str) -> str: Returns: str: The TitleCase name. """ - return inflection.titleize(inflection.underscore(name)) + return inflection.titleize(inflection.underscore(str(name))) def removeprefix(string: str, prefix: str) -> str: @@ -135,7 +140,7 @@ def removeprefix(string: str, prefix: str) -> str: Returns: str: The string with the prefix removed if it was present, otherwise the original string. """ - return string.removeprefix(prefix) + return str(string).removeprefix(str(prefix)) def removesuffix(string: str, suffix: str) -> str: @@ -148,6 +153,8 @@ def removesuffix(string: str, suffix: str) -> str: Returns: str: The string with the suffix removed if it was present, otherwise the original string. """ + string = str(string) + suffix = str(suffix) if not suffix: return string return string.removesuffix(suffix) diff --git a/src/extended_data/primitives/types.py b/src/extended_data/primitives/types.py index 617197b..6a2cb4d 100644 --- a/src/extended_data/primitives/types.py +++ b/src/extended_data/primitives/types.py @@ -37,7 +37,9 @@ import pathlib import re +from collections import UserList, UserString from collections.abc import Mapping +from collections.abc import Set as AbstractSet from pathlib import Path from typing import Any @@ -127,6 +129,9 @@ def strtobool(val: str | bool | None, raise_on_error: bool = False) -> bool | No if isinstance(val, bool) or val is None: return val + if isinstance(val, UserString): + val = str(val) + if isinstance(val, str): if TRUTHY_PATTERN.match(val): return True @@ -152,6 +157,7 @@ def strtofloat(val: str, raise_on_error: bool = False) -> float | None: Raises: ConversionError: If the value is invalid and raise_on_error is True. """ + val = str(val) if NUMBER_PATTERN.match(val): try: return float(val) @@ -178,6 +184,7 @@ def strtoint(val: str, raise_on_error: bool = False) -> int | None: Raises: ConversionError: If the value is invalid and raise_on_error is True. """ + val = str(val) try: float_value = strtofloat(val, raise_on_error=raise_on_error) if float_value is not None: @@ -216,7 +223,8 @@ def strtopath(val: str | bytes | os.PathLike[str] | None, raise_on_error: bool = raise ConversionError(Path, val) from exc return None # Ensure val is converted to string before matching - if not PATH_PATTERN.match(str(val)): + val = str(val) + if not PATH_PATTERN.match(val): raise ConversionError(Path, val) return Path(val) except (ValueError, TypeError) as exc: @@ -238,6 +246,7 @@ def strtodate(val: str, raise_on_error: bool = False) -> datetime.date | None: Raises: ConversionError: If the value is invalid and raise_on_error is True. """ + val = str(val) if not DATE_PATTERN.match(val): if raise_on_error: raise ConversionError(datetime.date, val) @@ -264,6 +273,7 @@ def strtodatetime(val: str, raise_on_error: bool = False) -> datetime.datetime | Raises: ConversionError: If the value is invalid and raise_on_error is True. """ + val = str(val) if not DATETIME_PATTERN.match(val): if raise_on_error: raise ConversionError(datetime.datetime, val) @@ -293,6 +303,7 @@ def strtotime(val: str, raise_on_error: bool = False) -> datetime.time | None: Raises: ConversionError: If the value is invalid and raise_on_error is True. """ + val = str(val) if not TIME_PATTERN.match(val): if raise_on_error: raise ConversionError(datetime.time, val) @@ -347,15 +358,15 @@ def convert_special_type(obj: Any) -> Any: return [convert_special_types(list(pair)) for pair in obj] if isinstance(obj, Mapping): return {k: convert_special_types(v) for k, v in obj.items()} - if isinstance(obj, (set, list, tuple, frozenset)): + if isinstance(obj, (set, list, tuple, frozenset, UserList, AbstractSet)) and not isinstance(obj, str | UserString): return [convert_special_types(v) for v in obj] if isinstance(obj, (datetime.date, datetime.datetime)): return removesuffix(obj.isoformat(), "+00:00") if isinstance(obj, pathlib.Path): return str(obj) - if isinstance(obj, (int, float, str, bool, type(None))): - return obj + if isinstance(obj, (int, float, str, bool, type(None), UserString)): + return str(obj) if isinstance(obj, UserString) else obj return str(obj) @@ -368,7 +379,7 @@ def convert_special_types(obj: Any) -> Any: return [convert_special_types(list(pair)) for pair in obj] if isinstance(obj, Mapping): return {k: convert_special_types(v) for k, v in obj.items()} - if isinstance(obj, (set, list, tuple, frozenset)): + if isinstance(obj, (set, list, tuple, frozenset, UserList, AbstractSet)) and not isinstance(obj, str | UserString): return [convert_special_types(v) for v in obj] return convert_special_type(obj) @@ -452,11 +463,11 @@ def reconstruct_special_types(obj: Any, fail_silently: bool = False) -> Any: Returns: Any: The reconstructed object with special types restored where applicable. """ - if isinstance(obj, str): - return reconstruct_special_type(obj, fail_silently=fail_silently) + if isinstance(obj, str | UserString): + return reconstruct_special_type(str(obj), fail_silently=fail_silently) if isinstance(obj, Mapping): return {k: reconstruct_special_types(v, fail_silently=fail_silently) for k, v in obj.items()} - if isinstance(obj, list): + if isinstance(obj, list | UserList): return [reconstruct_special_types(v, fail_silently=fail_silently) for v in obj] if isinstance(obj, tuple): return tuple(reconstruct_special_types(v, fail_silently=fail_silently) for v in obj) @@ -464,6 +475,8 @@ def reconstruct_special_types(obj: Any, fail_silently: bool = False) -> Any: return {reconstruct_special_types(v, fail_silently=fail_silently) for v in obj} if isinstance(obj, frozenset): return frozenset(reconstruct_special_types(v, fail_silently=fail_silently) for v in obj) + if isinstance(obj, AbstractSet): + return {reconstruct_special_types(v, fail_silently=fail_silently) for v in obj} return obj diff --git a/tests/core/test_base64_utils.py b/tests/core/test_base64_utils.py index 0edd04a..5e7c3b5 100644 --- a/tests/core/test_base64_utils.py +++ b/tests/core/test_base64_utils.py @@ -179,10 +179,10 @@ def test_base64_decode_with_tf_alias_unwrap() -> None: assert result == {"locals": [{"region": "us-east-1"}]} -def test_base64_decode_can_return_extended_containers() -> None: - """Base64 decoding can opt into the Tier 2 container layer.""" +def test_base64_decode_returns_extended_containers_by_default() -> None: + """Base64 unwrapping enters the Tier 2 container layer by default.""" encoded_data = base64.b64encode(b'{"service": {"name": "api"}, "ports": [8080]}').decode("utf-8") - result = base64_decode(encoded_data, unwrap_raw_data=True, encoding="json", as_extended=True) + result = base64_decode(encoded_data, unwrap_raw_data=True, encoding="json") assert isinstance(result, ExtendedDict) assert isinstance(result["service"], ExtendedDict) @@ -190,6 +190,15 @@ def test_base64_decode_can_return_extended_containers() -> None: assert isinstance(result["ports"], ExtendedList) +def test_base64_decode_can_return_builtin_containers() -> None: + """Base64 unwrapping can explicitly return plain Python containers.""" + encoded_data = base64.b64encode(b'{"service": {"name": "api"}}').decode("utf-8") + result = base64_decode(encoded_data, unwrap_raw_data=True, encoding="json", as_extended=False) + + assert isinstance(result, dict) + assert not isinstance(result, ExtendedDict) + + def test_base64_decode_rejects_non_utf8_when_unwrapping() -> None: """Raise a clear error when wrapped decoding requires non-text bytes to be parsed.""" encoded_data = base64.b64encode(b"\xff\xfe").decode("utf-8") diff --git a/tests/core/test_file_data_type.py b/tests/core/test_file_data_type.py index 3232a2b..0485792 100644 --- a/tests/core/test_file_data_type.py +++ b/tests/core/test_file_data_type.py @@ -468,10 +468,10 @@ def test_read_file_return_path(tmp_path: Path) -> None: @pytest.mark.parametrize( ("data", "suffix", "expected_type"), [ - ('{"key": "value"}', "json", dict), - ("key: value", "yaml", dict), - ("key: value", "yml", dict), - ("plain text", "txt", str), + ('{"key": "value"}', "json", ExtendedDict), + ("key: value", "yaml", ExtendedDict), + ("key: value", "yml", ExtendedDict), + ("plain text", "txt", ExtendedString), ], ) def test_decode_file(data: str, suffix: str, expected_type: type) -> None: @@ -489,6 +489,14 @@ def test_decode_file(data: str, suffix: str, expected_type: type) -> None: assert isinstance(result, expected_type) +def test_decode_file_can_return_builtin_containers() -> None: + """File decoding can explicitly lower back to plain Python containers.""" + result = decode_file('{"key": "value"}', suffix="json", as_extended=False) + + assert isinstance(result, dict) + assert not isinstance(result, ExtendedDict) + + def test_decode_file_infer_suffix() -> None: """Tests decode_file inferring suffix from file path. @@ -496,34 +504,36 @@ def test_decode_file_infer_suffix() -> None: Suffix is correctly inferred from file path. """ result = decode_file('{"key": "value"}', file_path="/path/to/file.json") - assert isinstance(result, dict) + assert isinstance(result, ExtendedDict) assert result == {"key": "value"} def test_decode_file_infer_hcl_suffix() -> None: """Infer HCL decoding from a Terraform file path.""" result = decode_file('variable "region" { default = "us-east-1" }', file_path="/path/to/variables.tf") + assert isinstance(result, ExtendedDict) assert result == {"variable": [{"region": {"default": "us-east-1"}}]} def test_decode_file_infer_toml_alias_suffix() -> None: """Infer TOML decoding from historical .tml suffixes.""" result = decode_file('title = "Example"\n', file_path="/path/to/config.tml") + assert isinstance(result, ExtendedDict) assert result == {"title": "Example"} def test_decode_file_accepts_bytes_payload() -> None: """Decode bytes-like payloads through the same file helper.""" result = decode_file(b'{"key":"value"}', file_path="/path/to/file.json") + assert isinstance(result, ExtendedDict) assert result == {"key": "value"} -def test_decode_file_can_return_extended_containers() -> None: - """File decoding can opt into the Tier 2 container layer.""" +def test_decode_file_returns_extended_containers_by_default() -> None: + """File decoding enters the Tier 2 container layer by default.""" result = decode_file( '{"service": {"name": "api"}, "ports": [8080]}', file_path="/path/to/file.json", - as_extended=True, ) assert isinstance(result, ExtendedDict) diff --git a/tests/core/test_import_utils.py b/tests/core/test_import_utils.py index 37d6bef..8ddab89 100644 --- a/tests/core/test_import_utils.py +++ b/tests/core/test_import_utils.py @@ -38,12 +38,11 @@ def test_unwrap_raw_data_from_import_rejects_unsupported_encoding() -> None: unwrap_raw_data_from_import("value", "xml") -def test_unwrap_raw_data_from_import_can_return_extended_containers() -> None: - """Decoded imports can opt into the Tier 2 container layer.""" +def test_unwrap_raw_data_from_import_returns_extended_containers_by_default() -> None: + """Decoded imports enter the Tier 2 container layer by default.""" result = unwrap_raw_data_from_import( '{"service": {"name": "api"}, "ports": [8080]}', encoding="json", - as_extended=True, ) assert isinstance(result, ExtendedDict) @@ -52,9 +51,17 @@ def test_unwrap_raw_data_from_import_can_return_extended_containers() -> None: assert isinstance(result["ports"], ExtendedList) -def test_unwrap_raw_data_from_import_can_return_extended_raw_strings() -> None: - """Raw imports can opt into ExtendedString.""" - result = unwrap_raw_data_from_import("plain text", encoding="raw", as_extended=True) +def test_unwrap_raw_data_from_import_can_return_builtin_containers() -> None: + """Decoded imports can explicitly return plain Python containers.""" + result = unwrap_raw_data_from_import('{"service": {"name": "api"}}', encoding="json", as_extended=False) + + assert isinstance(result, dict) + assert not isinstance(result, ExtendedDict) + + +def test_unwrap_raw_data_from_import_returns_extended_raw_strings_by_default() -> None: + """Raw imports promote to ExtendedString by default.""" + result = unwrap_raw_data_from_import("plain text", encoding="raw") assert isinstance(result, ExtendedString) assert result.upper_first() == "Plain text" diff --git a/tests/core/test_string_data_type.py b/tests/core/test_string_data_type.py index aa3a152..e855157 100644 --- a/tests/core/test_string_data_type.py +++ b/tests/core/test_string_data_type.py @@ -47,6 +47,7 @@ import pytest +from extended_data.containers import ExtendedString from extended_data.primitives.strings import ( bytestostr, is_url, @@ -317,3 +318,17 @@ def test_removesuffix(removesuffix_data: tuple[str, str, str]) -> None: """ string, suffix, expected = removesuffix_data assert removesuffix(string, suffix) == expected + + +def test_string_utilities_accept_extended_string_values() -> None: + """Tier 1 string utilities compose with Tier 2 ExtendedString values.""" + value = ExtendedString("helloWorld") + + assert sanitize_key(ExtendedString("key-with*invalid_chars")) == "key_with_invalid_chars" + assert truncate(ExtendedString("abcdef"), 4) == "a..." + assert lower_first_char(ExtendedString("Hello")) == "hello" + assert upper_first_char(ExtendedString("hello")) == "Hello" + assert is_url(ExtendedString("https://example.com")) + assert titleize_name(value) == "Hello World" + assert removeprefix(ExtendedString("pre_value"), ExtendedString("pre_")) == "value" + assert removesuffix(ExtendedString("value_suffix"), ExtendedString("_suffix")) == "value" diff --git a/tests/core/test_string_transformations.py b/tests/core/test_string_transformations.py index 1ce3ffa..ec14153 100644 --- a/tests/core/test_string_transformations.py +++ b/tests/core/test_string_transformations.py @@ -4,6 +4,7 @@ import pytest +from extended_data.containers import ExtendedString from extended_data.primitives.string_transforms import ( humanize, ordinalize, @@ -94,3 +95,17 @@ def test_ordinalize_rejects_non_numeric_values() -> None: """Reject non-numeric ordinal inputs.""" with pytest.raises(ValueError, match="ordinalize expects a numeric value"): ordinalize("forty-two") + + +def test_string_transforms_accept_extended_string_values() -> None: + """Tier 1 string transforms compose with Tier 2 ExtendedString values.""" + value = ExtendedString("helloWorld") + + assert to_snake_case(value) == "hello_world" + assert to_camel_case(ExtendedString("hello_world")) == "helloWorld" + assert to_pascal_case(value) == "HelloWorld" + assert to_kebab_case(value) == "hello-world" + assert pluralize(ExtendedString("book")) == "books" + assert singularize(ExtendedString("criteria")) == "criterion" + assert humanize(ExtendedString("api_key")) == "API key" + assert titleize(ExtendedString("HELLO WORLD")) == "Hello World" diff --git a/tests/core/test_type_utils.py b/tests/core/test_type_utils.py index e907175..5b4453a 100644 --- a/tests/core/test_type_utils.py +++ b/tests/core/test_type_utils.py @@ -14,6 +14,7 @@ import pytest +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString from extended_data.primitives.formats.yaml import YamlPairs, YamlTagged from extended_data.primitives.types import ( ConversionError, @@ -197,6 +198,25 @@ def test_strtobool_rejects_non_strings_when_requested() -> None: strtobool(123, raise_on_error=True) +def test_string_type_converters_accept_extended_string_values() -> None: + """Type conversion primitives compose with Tier 2 ExtendedString values.""" + assert strtobool(ExtendedString("true")) is True + assert strtofloat(ExtendedString("3.14")) == EXPECTED_FLOAT_1 + assert strtoint(ExtendedString("42")) == EXPECTED_INT_1 + assert strtodate(ExtendedString("2023-09-05")) == datetime.date(2023, 9, 5) + assert strtodatetime(ExtendedString("2023-09-05T12:30:00")) == datetime.datetime( + 2023, + 9, + 5, + 12, + 30, + 0, + tzinfo=datetime.timezone.utc, + ) + assert strtotime(ExtendedString("12:30")) == datetime.time(12, 30, 0) + assert strtopath(ExtendedString("/valid/path")) == Path("/valid/path") + + def test_strtofloat(strtofloat_data: tuple[str, float | None]) -> None: """Tests converting a string to a float value. @@ -498,6 +518,23 @@ def test_convert_special_type_handles_mappings_and_sequences_directly() -> None: assert convert_special_type((Path("/tmp/a"), datetime.date(2025, 1, 15))) == ["/tmp/a", "2025-01-15"] +def test_convert_special_types_handles_extended_containers() -> None: + """Normalize Tier 2 containers without stringifying nested collections.""" + value = ExtendedDict( + { + "enabled": ExtendedString("true"), + "paths": ExtendedList([Path("/tmp/a"), datetime.date(2025, 1, 15)]), + "tags": ExtendedSet({ExtendedString("api")}), + } + ) + + result = convert_special_types(value) + + assert result["enabled"] == "true" + assert result["paths"] == ["/tmp/a", "2025-01-15"] + assert result["tags"] == ["api"] + + # Test for convert_special_types function @pytest.mark.parametrize( ("obj", "expected"), @@ -635,6 +672,25 @@ def test_reconstruct_special_types_handles_tuples_and_frozensets() -> None: assert frozenset_result == frozenset([datetime.date(2023, 9, 5), True]) +def test_reconstruct_special_types_handles_extended_containers() -> None: + """Reconstruct special values inside Tier 2 containers.""" + value = ExtendedDict( + { + "enabled": ExtendedString("true"), + "count": ExtendedString("5"), + "items": ExtendedList([ExtendedString("2023-09-05")]), + "tags": ExtendedSet({ExtendedString("false")}), + } + ) + + result = reconstruct_special_types(value, fail_silently=False) + + assert result["enabled"] is True + assert result["count"] == 5 + assert result["items"] == [datetime.date(2023, 9, 5)] + assert result["tags"] == {False} + + def test_reconstruct_special_types_leaves_non_container_values_alone() -> None: """Pass through values that do not need recursive reconstruction.""" assert reconstruct_special_types(123, fail_silently=False) == 123 diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 369f758..769c6bb 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -158,8 +158,8 @@ def test_layered_config_workflow_round_trip(tmp_path: Path) -> None: write_file("config/base.yaml", base_config, tld=tmp_path) write_file("config/dev.yaml", env_config, tld=tmp_path) - base_data = decode_file(read_file("config/base.yaml", tld=tmp_path), file_path="config/base.yaml", as_extended=True) - env_data = decode_file(read_file("config/dev.yaml", tld=tmp_path), file_path="config/dev.yaml", as_extended=True) + base_data = decode_file(read_file("config/base.yaml", tld=tmp_path), file_path="config/base.yaml") + env_data = decode_file(read_file("config/dev.yaml", tld=tmp_path), file_path="config/dev.yaml") merged = base_data.deep_merge(env_data) output_path = write_file("build/config.yaml", merged, tld=tmp_path) @@ -228,7 +228,7 @@ def test_api_payload_factory_workflow_round_trip(tmp_path: Path) -> None: } raw_path = write_file("build/raw-payload.json", raw_payload, tld=tmp_path) - decoded = decode_file(read_file(raw_path), file_path=raw_path, as_extended=True) + decoded = decode_file(read_file(raw_path), file_path=raw_path) normalized = decoded.deduplicate().unhump() output_path = write_file("build/payload.json", normalized, tld=tmp_path) @@ -251,7 +251,7 @@ def test_yaml_native_workflow_round_trip(tmp_path: Path) -> None: } output_path = write_file("template.yaml", template, tld=tmp_path) - decoded = decode_file(read_file(output_path), file_path=output_path, as_extended=True) + decoded = decode_file(read_file(output_path), file_path=output_path) assert output_path == tmp_path / "template.yaml" assert isinstance(decoded, ExtendedDict) From 27318878aa1574cd7836a8e42566edee8ac240b5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:45:48 -0500 Subject: [PATCH 092/287] feat: advertise extended tool payloads --- README.md | 3 ++ docs/package-surface.md | 4 ++ examples/connectors/README.md | 2 + .../connectors/anthropic/tools.py | 9 ++-- src/extended_data/connectors/aws/tools.py | 18 ++++---- src/extended_data/connectors/cursor/tools.py | 6 +-- src/extended_data/connectors/github/tools.py | 14 +++---- src/extended_data/connectors/google/tools.py | 14 +++---- src/extended_data/connectors/meshy/tools.py | 20 ++++----- src/extended_data/connectors/secrets/tools.py | 14 +++---- src/extended_data/connectors/slack/tools.py | 10 ++--- src/extended_data/connectors/vault/tools.py | 6 +-- src/extended_data/connectors/zoom/tools.py | 10 ++--- .../connectors/test_tool_payload_contracts.py | 41 +++++++++++++++++++ 14 files changed, 111 insertions(+), 60 deletions(-) create mode 100644 tests/connectors/test_tool_payload_contracts.py diff --git a/README.md b/README.md index 72803ca..59b6e99 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,9 @@ generic vendor lookup. Connector data payloads are promoted into Tier 2 containers at the boundary, so decoded files, HTTP response data, GraphQL responses, and SDK-shaped maps can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. +Data-returning AI tool wrappers expose the same `ExtendedDict`/`ExtendedList` +payload contract; framework factory functions still return framework tool +objects. Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. diff --git a/docs/package-surface.md b/docs/package-surface.md index 15261f3..f4ff0d6 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -181,6 +181,10 @@ first-class `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and `ExtendedString` values. This is an intentional major-version break from plain `dict`/`list` payloads; use `to_builtin()` at serialization, CLI, MCP, or SDK handoff boundaries. +Data-returning AI tool wrapper functions follow the same contract and annotate +their payload returns as `ExtendedDict` or `ExtendedList[ExtendedDict]`. +LangChain, CrewAI, Strands, and auto-detection factory functions still return +plain framework tool object lists. ```python payload = github.get_repository_file("service.json") diff --git a/examples/connectors/README.md b/examples/connectors/README.md index af2da39..091b1c3 100644 --- a/examples/connectors/README.md +++ b/examples/connectors/README.md @@ -8,6 +8,8 @@ data payloads are promoted into Tier 2 containers at connector boundaries. Callers can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods on decoded API, file, and SDK-shaped results, then call `to_builtin()` only when a plain Python payload is needed for serialization or SDK handoff. +The direct AI-tool functions follow that same payload contract; only the +framework factory helpers return plain framework tool objects. ## Quick Start diff --git a/src/extended_data/connectors/anthropic/tools.py b/src/extended_data/connectors/anthropic/tools.py index a725c82..ce473af 100644 --- a/src/extended_data/connectors/anthropic/tools.py +++ b/src/extended_data/connectors/anthropic/tools.py @@ -6,14 +6,15 @@ from __future__ import annotations +from collections.abc import Mapping from typing import Any from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedList, extend_data -def _message_text(message: dict[str, Any]) -> str: +def _message_text(message: Mapping[str, Any]) -> str: """Extract concatenated text blocks from a message payload.""" return "".join( str(block.get("text", "")) @@ -40,7 +41,7 @@ def anthropic_create_message( prompt: str, max_tokens: int = 1024, system: str | None = None, -) -> dict[str, Any]: +) -> ExtendedDict: """Create a message using Anthropic Claude. Args: @@ -75,7 +76,7 @@ def anthropic_create_message( ) -def anthropic_list_models() -> list[dict[str, Any]]: +def anthropic_list_models() -> ExtendedList[ExtendedDict]: """List available Anthropic Claude models. Returns: diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index 9729e87..aae1f6d 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -32,7 +32,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedList, extend_data # ============================================================================= @@ -84,7 +84,7 @@ class GetSecretSchema(BaseModel): # ============================================================================= -def get_caller_account_id() -> dict[str, str]: +def get_caller_account_id() -> ExtendedDict: """Get the AWS account ID of the caller. Returns: @@ -97,7 +97,7 @@ def get_caller_account_id() -> dict[str, str]: return extend_data({"account_id": account_id}) -def list_s3_buckets() -> list[dict[str, Any]]: +def list_s3_buckets() -> ExtendedList[ExtendedDict]: """List S3 buckets in the account. Returns: @@ -119,7 +119,7 @@ def list_s3_buckets() -> list[dict[str, Any]]: ) -def list_s3_objects(bucket: str) -> list[dict[str, Any]]: +def list_s3_objects(bucket: str) -> ExtendedList[ExtendedDict]: """List objects in an S3 bucket. Args: @@ -151,7 +151,7 @@ def list_s3_objects(bucket: str) -> list[dict[str, Any]]: return extend_data(result) -def list_accounts() -> list[dict[str, Any]]: +def list_accounts() -> ExtendedList[ExtendedDict]: """List AWS organization accounts. Returns: @@ -174,7 +174,7 @@ def list_accounts() -> list[dict[str, Any]]: ) -def list_sso_users() -> list[dict[str, Any]]: +def list_sso_users() -> ExtendedList[ExtendedDict]: """List IAM Identity Center users. Returns: @@ -197,7 +197,7 @@ def list_sso_users() -> list[dict[str, Any]]: ) -def list_sso_groups() -> list[dict[str, Any]]: +def list_sso_groups() -> ExtendedList[ExtendedDict]: """List IAM Identity Center groups. Returns: @@ -222,7 +222,7 @@ def list_sso_groups() -> list[dict[str, Any]]: def list_secrets( prefix: str = "", get_values: bool = False, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List secrets from AWS Secrets Manager. Args: @@ -255,7 +255,7 @@ def list_secrets( return extend_data(result) -def get_secret(secret_id: str) -> dict[str, Any]: +def get_secret(secret_id: str) -> ExtendedDict: """Get a single secret value from AWS Secrets Manager. Args: diff --git a/src/extended_data/connectors/cursor/tools.py b/src/extended_data/connectors/cursor/tools.py index 387ba62..4ff8735 100644 --- a/src/extended_data/connectors/cursor/tools.py +++ b/src/extended_data/connectors/cursor/tools.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, extend_data def _state_value(state: Any) -> Any: @@ -38,7 +38,7 @@ def cursor_launch_agent( repository: str, ref: str | None = None, branch_name: str | None = None, -) -> dict[str, Any]: +) -> ExtendedDict: """Launch a new Cursor coding agent. Args: @@ -69,7 +69,7 @@ def cursor_launch_agent( ) -def cursor_get_agent_status(agent_id: str) -> dict[str, Any]: +def cursor_get_agent_status(agent_id: str) -> ExtendedDict: """Get the current status of a Cursor agent. Args: diff --git a/src/extended_data/connectors/github/tools.py b/src/extended_data/connectors/github/tools.py index 64b31a8..d3823eb 100644 --- a/src/extended_data/connectors/github/tools.py +++ b/src/extended_data/connectors/github/tools.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedList, extend_data # ============================================================================= @@ -82,7 +82,7 @@ def list_repositories( include_branches: bool = False, github_token: str | None = None, **kwargs: Any, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List repositories in a GitHub organization. Args: @@ -112,7 +112,7 @@ def get_repository( repo_name: str, github_token: str | None = None, **kwargs: Any, -) -> dict[str, Any]: +) -> ExtendedDict: """Get details of a specific GitHub repository. Args: @@ -139,7 +139,7 @@ def list_teams( include_repos: bool = False, github_token: str | None = None, **kwargs: Any, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List teams in a GitHub organization. Args: @@ -163,7 +163,7 @@ def get_team( team_slug: str, github_token: str | None = None, **kwargs: Any, -) -> dict[str, Any]: +) -> ExtendedDict: """Get details of a specific GitHub team. Args: @@ -190,7 +190,7 @@ def list_org_members( include_pending: bool = False, github_token: str | None = None, **kwargs: Any, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List members of a GitHub organization. Args: @@ -216,7 +216,7 @@ def get_repository_file( github_branch: str | None = None, github_token: str | None = None, **kwargs: Any, -) -> dict[str, Any]: +) -> ExtendedDict: """Get a file from a GitHub repository. Args: diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index 35f653d..1d71085 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -30,7 +30,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedList, extend_data # ============================================================================= @@ -89,7 +89,7 @@ class ListWorkspaceGroupsSchema(BaseModel): def list_projects( parent: str = "", max_results: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List Google Cloud projects. Args: @@ -123,7 +123,7 @@ def list_projects( def list_folders( parent: str, max_results: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List folders under a parent resource. Args: @@ -156,7 +156,7 @@ def list_folders( def list_enabled_services( project_id: str, max_results: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List enabled services in a Google Cloud project. Args: @@ -187,7 +187,7 @@ def list_enabled_services( def list_billing_accounts( max_results: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List Google Cloud billing accounts. Args: @@ -219,7 +219,7 @@ def list_billing_accounts( def list_workspace_users( domain: str = "", max_results: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List users from Google Workspace. Args: @@ -261,7 +261,7 @@ def list_workspace_users( def list_workspace_groups( domain: str = "", max_results: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List groups from Google Workspace. Args: diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index 2e6a63e..1479d69 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -12,7 +12,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, extend_data # ============================================================================= @@ -110,7 +110,7 @@ def _result_status(result: object) -> str: return str(status.value) if hasattr(status, "value") else str(status) -def _extract_result_fields(result: object) -> dict[str, object]: +def _extract_result_fields(result: object) -> ExtendedDict: """Extract common fields from Meshy API result objects. Safely extracts status, model_url, and thumbnail_url from result objects, @@ -149,7 +149,7 @@ def text3d_generate( negative_prompt: str = "", target_polycount: int = 30000, enable_pbr: bool = True, -) -> dict[str, Any]: +) -> ExtendedDict: """Generate a 3D model from text description. Args: @@ -194,7 +194,7 @@ def image3d_generate( topology: str = "", target_polycount: int = 15000, enable_pbr: bool = True, -) -> dict[str, Any]: +) -> ExtendedDict: """Generate a 3D model from an image. Args: @@ -230,7 +230,7 @@ def image3d_generate( }) -def rig_model(model_id: str, wait: bool = True) -> dict[str, Any]: +def rig_model(model_id: str, wait: bool = True) -> ExtendedDict: """Add skeleton/rig to a static 3D model. Args: @@ -262,7 +262,7 @@ def rig_model(model_id: str, wait: bool = True) -> dict[str, Any]: raise TypeError(msg) -def apply_animation(model_id: str, animation_id: int, wait: bool = True) -> dict[str, Any]: +def apply_animation(model_id: str, animation_id: int, wait: bool = True) -> ExtendedDict: """Apply animation to a rigged model. Args: @@ -301,7 +301,7 @@ def retexture_model( texture_prompt: str, enable_pbr: bool = True, wait: bool = True, -) -> dict[str, Any]: +) -> ExtendedDict: """Apply new textures to an existing model. Args: @@ -341,7 +341,7 @@ def retexture_model( raise TypeError(msg) -def list_animations(category: str = "", limit: int = 50) -> dict[str, Any]: +def list_animations(category: str = "", limit: int = 50) -> ExtendedDict: """List available animations from the Meshy catalog. Args: @@ -376,7 +376,7 @@ def list_animations(category: str = "", limit: int = 50) -> dict[str, Any]: }) -def check_task_status(task_id: str, task_type: str = "text-to-3d") -> dict[str, Any]: +def check_task_status(task_id: str, task_type: str = "text-to-3d") -> ExtendedDict: """Check status of a Meshy task. Args: @@ -422,7 +422,7 @@ def check_task_status(task_id: str, task_type: str = "text-to-3d") -> dict[str, }) -def get_animation(animation_id: int) -> dict[str, Any]: +def get_animation(animation_id: int) -> ExtendedDict: """Get details of a specific animation. Args: diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index fd4b0d9..3326f91 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, extend_data # ============================================================================= @@ -54,7 +54,7 @@ class GetConfigInfoSchema(BaseModel): # ============================================================================= -def validate_config(config_path: str) -> dict[str, Any]: +def validate_config(config_path: str) -> ExtendedDict: """Validate a secrets sync pipeline configuration file. Args: @@ -75,7 +75,7 @@ def run_pipeline( operation: str = "pipeline", targets: str | None = None, continue_on_error: bool = True, -) -> dict[str, Any]: +) -> ExtendedDict: """Run the secrets synchronization pipeline. This executes the two-phase pipeline (merge → sync) to synchronize @@ -136,7 +136,7 @@ def run_pipeline( }) -def dry_run(config_path: str) -> dict[str, Any]: +def dry_run(config_path: str) -> ExtendedDict: """Perform a dry run to see what changes would be made. Args: @@ -162,7 +162,7 @@ def dry_run(config_path: str) -> dict[str, Any]: }) -def get_config_info(config_path: str) -> dict[str, Any]: +def get_config_info(config_path: str) -> ExtendedDict: """Get detailed information about a pipeline configuration. Args: @@ -177,7 +177,7 @@ def get_config_info(config_path: str) -> dict[str, Any]: return extend_data(connector.get_config_info(config_path)) -def get_targets(config_path: str) -> dict[str, Any]: +def get_targets(config_path: str) -> ExtendedDict: """Get the list of sync targets from a configuration. Args: @@ -192,7 +192,7 @@ def get_targets(config_path: str) -> dict[str, Any]: return extend_data(connector.get_targets(config_path)) -def get_sources(config_path: str) -> dict[str, Any]: +def get_sources(config_path: str) -> ExtendedDict: """Get the list of secret sources from a configuration. Args: diff --git a/src/extended_data/connectors/slack/tools.py b/src/extended_data/connectors/slack/tools.py index 6f50b1d..e22f1d4 100644 --- a/src/extended_data/connectors/slack/tools.py +++ b/src/extended_data/connectors/slack/tools.py @@ -29,7 +29,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedList, extend_data if TYPE_CHECKING: @@ -104,7 +104,7 @@ def list_channels( exclude_archived: bool = True, channels_only: bool = True, limit: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List Slack channels. Args: @@ -142,7 +142,7 @@ def list_users( include_bots: bool = False, include_deleted: bool = False, max_results: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List Slack users. Args: @@ -181,7 +181,7 @@ def send_message( channel: str, text: str, thread_id: str = "", -) -> dict[str, Any]: +) -> ExtendedDict: """Send a message to a Slack channel. Args: @@ -212,7 +212,7 @@ def send_message( def get_channel_history( channel: str, limit: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """Get recent messages from a Slack channel. Args: diff --git a/src/extended_data/connectors/vault/tools.py b/src/extended_data/connectors/vault/tools.py index 01a8e7e..5e8a1ca 100644 --- a/src/extended_data/connectors/vault/tools.py +++ b/src/extended_data/connectors/vault/tools.py @@ -11,7 +11,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedList, extend_data # ============================================================================= @@ -43,7 +43,7 @@ def list_secrets( root_path: str = "/", mount_point: str = "secret", max_depth: int | None = 10, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List secrets recursively from Vault KV v2 engine. Args: @@ -75,7 +75,7 @@ def list_secrets( def read_secret( path: str, mount_point: str = "secret", -) -> dict[str, Any]: +) -> ExtendedDict: """Read a single secret from Vault. Args: diff --git a/src/extended_data/connectors/zoom/tools.py b/src/extended_data/connectors/zoom/tools.py index 111721e..d6e84d5 100644 --- a/src/extended_data/connectors/zoom/tools.py +++ b/src/extended_data/connectors/zoom/tools.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, Field -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, ExtendedList, extend_data # ============================================================================= @@ -51,7 +51,7 @@ class GetMeetingSchema(BaseModel): # ============================================================================= -def list_users(max_results: int = 100) -> list[dict[str, Any]]: +def list_users(max_results: int = 100) -> ExtendedList[ExtendedDict]: """List Zoom users. Args: @@ -69,7 +69,7 @@ def list_users(max_results: int = 100) -> list[dict[str, Any]]: return extend_data(sorted_users[:max_results]) -def get_user(user_id: str) -> dict[str, Any]: +def get_user(user_id: str) -> ExtendedDict: """Get a specific Zoom user by ID or email. Args: @@ -88,7 +88,7 @@ def list_meetings( user_id: str, meeting_type: str = "scheduled", max_results: int = 100, -) -> list[dict[str, Any]]: +) -> ExtendedList[ExtendedDict]: """List Zoom meetings for a specific user. Args: @@ -106,7 +106,7 @@ def list_meetings( return extend_data(meetings[:max_results]) -def get_meeting(meeting_id: str) -> dict[str, Any]: +def get_meeting(meeting_id: str) -> ExtendedDict: """Get details of a specific Zoom meeting. Args: diff --git a/tests/connectors/test_tool_payload_contracts.py b/tests/connectors/test_tool_payload_contracts.py new file mode 100644 index 0000000..fee6e7f --- /dev/null +++ b/tests/connectors/test_tool_payload_contracts.py @@ -0,0 +1,41 @@ +"""Contracts for connector AI-tool payload surfaces.""" + +from __future__ import annotations + +from importlib import import_module +from typing import get_args, get_origin, get_type_hints + +import pytest + +from extended_data.containers import ExtendedDict, ExtendedList + + +TOOL_MODULES = ( + "extended_data.connectors.anthropic.tools", + "extended_data.connectors.aws.tools", + "extended_data.connectors.cursor.tools", + "extended_data.connectors.github.tools", + "extended_data.connectors.google.tools", + "extended_data.connectors.meshy.tools", + "extended_data.connectors.secrets.tools", + "extended_data.connectors.slack.tools", + "extended_data.connectors.vault.tools", + "extended_data.connectors.zoom.tools", +) + + +@pytest.mark.parametrize("module_name", TOOL_MODULES) +def test_tool_definition_functions_advertise_extended_payloads(module_name: str) -> None: + """Data-returning AI tools expose Tier 2 payload contracts.""" + module = import_module(module_name) + + for definition in module.TOOL_DEFINITIONS: + func = definition["func"] + return_type = get_type_hints(func)["return"] + origin = get_origin(return_type) + + if origin is ExtendedList: + assert get_args(return_type) == (ExtendedDict,), f"{module_name}.{func.__name__}" + continue + + assert return_type is ExtendedDict, f"{module_name}.{func.__name__}" From 9dd4319c21a4624c58734b75e4d5ec7a739a8782 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:51:51 -0500 Subject: [PATCH 093/287] feat: advertise extended connector payloads --- .../connectors/anthropic/__init__.py | 11 +-- .../connectors/cursor/__init__.py | 14 ++-- .../connectors/github/__init__.py | 62 +++++++++-------- .../connectors/slack/__init__.py | 8 +-- .../connectors/vault/__init__.py | 13 ++-- src/extended_data/connectors/zoom/__init__.py | 11 +-- .../test_connector_payload_contracts.py | 68 +++++++++++++++++++ 7 files changed, 130 insertions(+), 57 deletions(-) create mode 100644 tests/connectors/test_connector_payload_contracts.py diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index f24165d..a1e10fc 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -27,6 +27,7 @@ import os +from collections.abc import Mapping from dataclasses import dataclass from datetime import datetime from enum import Enum @@ -37,7 +38,7 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import ExtendedDict, extend_data, to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin from extended_data.logging import Logging @@ -299,7 +300,7 @@ def _model_payload(model: BaseModel) -> dict[str, Any]: return model.model_dump(mode="json") @staticmethod - def _message_text(message: dict[str, Any]) -> str: + def _message_text(message: Mapping[str, Any]) -> str: """Extract concatenated text blocks from an extended message payload.""" return "".join( str(block.get("text", "")) @@ -324,7 +325,7 @@ def create_message( tools: list[dict[str, Any]] | None = None, tool_choice: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a message using Claude. Args: @@ -423,7 +424,7 @@ def count_tokens( # Model Operations # ========================================================================= - def list_models(self) -> list[dict[str, Any]]: + def list_models(self) -> ExtendedList[ExtendedDict]: """List available models from the API. Returns: @@ -443,7 +444,7 @@ def list_models(self) -> list[dict[str, Any]]: models_data = data.get("data", []) return self.extend_result([self._model_payload(Model.model_validate(m)) for m in models_data]) - def get_model(self, model_id: str) -> dict[str, Any]: + def get_model(self, model_id: str) -> ExtendedDict: """Get information about a specific model. Args: diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index bdaab42..10cf8f2 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -34,7 +34,7 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging @@ -418,7 +418,7 @@ def _model_payload(model: BaseModel) -> dict[str, Any]: # Agent Operations # ========================================================================= - def list_agents(self) -> list[dict[str, Any]]: + def list_agents(self) -> ExtendedList[ExtendedDict]: """List all agents. Returns: @@ -435,7 +435,7 @@ def list_agents(self) -> list[dict[str, Any]]: agents_data = data.get("agents", []) return self.extend_result([self._model_payload(Agent.model_validate(a)) for a in agents_data]) - def get_agent_status(self, agent_id: str) -> dict[str, Any]: + def get_agent_status(self, agent_id: str) -> ExtendedDict: """Get status of a specific agent. Args: @@ -456,7 +456,7 @@ def get_agent_status(self, agent_id: str) -> dict[str, Any]: raise CursorAPIError(f"Empty response when getting agent status for {agent_id}") return self.extend_result(self._model_payload(Agent.model_validate(data))) - def get_agent_conversation(self, agent_id: str) -> dict[str, Any]: + def get_agent_conversation(self, agent_id: str) -> ExtendedDict: """Get conversation history for an agent. Args: @@ -491,7 +491,7 @@ def launch_agent( skip_reviewer_request: bool = False, webhook_url: str | None = None, webhook_secret: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Launch a new agent. Args: @@ -590,7 +590,7 @@ def add_followup(self, agent_id: str, prompt_text: str) -> None: # Repository Operations # ========================================================================= - def list_repositories(self) -> list[dict[str, Any]]: + def list_repositories(self) -> ExtendedList[ExtendedDict]: """List available repositories. Returns: @@ -611,7 +611,7 @@ def list_repositories(self) -> list[dict[str, Any]]: # Model Operations # ========================================================================= - def list_models(self) -> list[str]: + def list_models(self) -> ExtendedList[ExtendedString]: """List available models. Returns: diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index 4c31e19..e2e7994 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -5,6 +5,7 @@ import io import os +from collections.abc import Mapping, Sequence from copy import deepcopy from typing import TYPE_CHECKING, Any @@ -18,6 +19,7 @@ ) from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict from extended_data.logging import Logging @@ -299,7 +301,7 @@ def list_org_members( self, role: str | None = None, include_pending: bool = False, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List organization members. Args: @@ -347,7 +349,7 @@ def list_org_members( self.logger.info(f"Retrieved {len(members)} organization members") return self.extend_result(members) - def get_org_member(self, username: str) -> dict[str, Any] | None: + def get_org_member(self, username: str) -> ExtendedDict | None: """Get a specific organization member. Args: @@ -383,7 +385,7 @@ def list_repositories( self, type_filter: str = "all", include_branches: bool = False, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List organization repositories. Args: @@ -433,7 +435,7 @@ def list_repositories( self.logger.info(f"Retrieved {len(repos)} repositories") return self.extend_result(repos) - def get_repository(self, repo_name: str) -> dict[str, Any] | None: + def get_repository(self, repo_name: str) -> ExtendedDict | None: """Get a specific repository. Args: @@ -472,7 +474,7 @@ def list_teams( self, include_members: bool = False, include_repos: bool = False, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List organization teams. Args: @@ -528,7 +530,7 @@ def list_teams( self.logger.info(f"Retrieved {len(teams)} teams") return self.extend_result(teams) - def get_team(self, team_slug: str) -> dict[str, Any] | None: + def get_team(self, team_slug: str) -> ExtendedDict | None: """Get a specific team. Args: @@ -603,7 +605,7 @@ def remove_team_member(self, team_slug: str, username: str) -> bool: # GraphQL Queries # ========================================================================= - def execute_graphql(self, query: str, variables: dict[str, Any] | None = None) -> dict[str, Any]: + def execute_graphql(self, query: str, variables: dict[str, Any] | None = None) -> ExtendedDict: """Execute a GraphQL query against the GitHub API. Args: @@ -628,9 +630,9 @@ def execute_graphql(self, query: str, variables: dict[str, Any] | None = None) - def get_users_with_verified_emails( self, - members: dict[str, dict[str, Any]] | None = None, + members: Mapping[str, Mapping[str, Any]] | None = None, domain_filter: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Get organization members with their verified emails. Uses GraphQL to get verified email addresses for org members. @@ -668,7 +670,7 @@ def get_users_with_verified_emails( verified_emails = user_data.get("organizationVerifiedDomainEmails", []) primary_email = user_data.get("email") - enriched_data = member_data.copy() + enriched_data = dict(member_data) enriched_data["verified_emails"] = verified_emails enriched_data["primary_email"] = primary_email @@ -683,7 +685,7 @@ def get_users_with_verified_emails( except Exception as e: self.logger.warning(f"Failed to get verified emails for {username}: {e}") - enriched[username] = member_data + enriched[username] = dict(member_data) self.logger.info(f"Retrieved verified emails for {len(enriched)} users") return self.extend_result(enriched) @@ -695,13 +697,13 @@ def get_users_with_verified_emails( def build_workflow( self, name: str, - on: dict[str, Any], - jobs: dict[str, dict[str, Any]], - env: dict[str, str] | None = None, - permissions: dict[str, str] | None = None, - concurrency: dict[str, Any] | None = None, - defaults: dict[str, Any] | None = None, - ) -> dict[str, Any]: + on: Mapping[str, Any], + jobs: Mapping[str, Mapping[str, Any]], + env: Mapping[str, str] | None = None, + permissions: Mapping[str, str] | None = None, + concurrency: Mapping[str, Any] | None = None, + defaults: Mapping[str, Any] | None = None, + ) -> ExtendedDict: """Build a GitHub Actions workflow structure. Args: @@ -739,15 +741,15 @@ def build_workflow( def build_workflow_job( self, runs_on: str = "ubuntu-latest", - steps: list[dict[str, Any]] | None = None, - needs: list[str] | None = None, + steps: Sequence[Mapping[str, Any]] | None = None, + needs: Sequence[str] | None = None, if_condition: str | None = None, - env: dict[str, str] | None = None, - strategy: dict[str, Any] | None = None, + env: Mapping[str, str] | None = None, + strategy: Mapping[str, Any] | None = None, timeout_minutes: int | None = None, - services: dict[str, Any] | None = None, - outputs: dict[str, str] | None = None, - ) -> dict[str, Any]: + services: Mapping[str, Any] | None = None, + outputs: Mapping[str, str] | None = None, + ) -> ExtendedDict: """Build a GitHub Actions workflow job. Args: @@ -787,7 +789,7 @@ def build_workflow_job( if outputs: job["outputs"] = outputs - job["steps"] = steps or [] + job["steps"] = list(steps or []) return self.extend_result(job) @@ -796,13 +798,13 @@ def build_workflow_step( name: str, uses: str | None = None, run: str | None = None, - with_params: dict[str, Any] | None = None, - env: dict[str, str] | None = None, + with_params: Mapping[str, Any] | None = None, + env: Mapping[str, str] | None = None, if_condition: str | None = None, working_directory: str | None = None, shell: str | None = None, id: str | None = None, # noqa: A002 - ) -> dict[str, Any]: + ) -> ExtendedDict: """Build a GitHub Actions workflow step. Args: @@ -851,7 +853,7 @@ def create_python_ci_workflow( format_command: str | None = "ruff format --check", install_command: str = "uv sync --all-packages", working_directory: str = ".", - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a standard Python CI workflow. Args: diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 66849a9..5a5ee97 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -284,7 +284,7 @@ def send_message( raise SlackAPIError(exc.response) from exc return exc.response - def get_bot_channels(self) -> dict[str, dict[str, Any]]: + def get_bot_channels(self) -> ExtendedDict: """Return channels the bot account is a member of. Returns: @@ -308,7 +308,7 @@ def list_users( include_bots: bool | None = None, include_app_users: bool | None = None, **kwargs: Any, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List Slack users with optional filtering flags. Args: @@ -368,7 +368,7 @@ def list_usergroups( team_id: str | None = None, usergroup_ids: str | Sequence[str] | None = None, **kwargs: Any, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List Slack user groups with optional filtering. Args: @@ -420,7 +420,7 @@ def list_conversations( get_members: bool | None = None, channels_only: bool | None = None, **kwargs: Any, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List Slack conversations with optional filtering. Args: diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 76a6741..f1fe993 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -9,6 +9,7 @@ from extended_data import is_nothing from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.logging import Logging @@ -173,7 +174,7 @@ def list_secrets( root_path: str = "/", mount_point: str = "secret", max_depth: int | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List secrets recursively from Vault KV v2 engine. Args: @@ -253,7 +254,7 @@ def read_secret( self, path: str, mount_point: str = "secret", - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Read a single secret from Vault. Args: @@ -282,7 +283,7 @@ def get_secret( secret_name: str | None = None, matchers: dict[str, str] | None = None, mount_point: str = "secret", - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Get Vault secret by path, name, or by searching with matchers. This method supports three modes: @@ -405,7 +406,7 @@ def list_aws_iam_roles( self, mount_point: str = "aws", name_prefix: str | None = None, - ) -> list[str]: + ) -> ExtendedList[ExtendedString]: """List AWS IAM roles configured in Vault's AWS secrets engine. Args: @@ -437,7 +438,7 @@ def get_aws_iam_role( self, role_name: str, mount_point: str = "aws", - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Retrieve details about a specific AWS IAM role configured in Vault. Args: @@ -472,7 +473,7 @@ def generate_aws_credentials( mount_point: str = "aws", ttl: str | None = None, credential_type: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Generate AWS credentials via Vault's AWS secrets engine. Args: diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index e6c1ac0..b5a6bfa 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -9,6 +9,7 @@ import requests from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict, ExtendedList from extended_data.logging import Logging @@ -60,7 +61,7 @@ def get_headers(self) -> dict[str, str]: raise RuntimeError(msg) return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} - def get_zoom_users(self) -> dict[str, dict[str, Any]]: + def get_zoom_users(self) -> ExtendedDict: """Get all Zoom users.""" url = "https://api.zoom.us/v2/users" headers = self.get_headers() @@ -120,7 +121,7 @@ def create_zoom_user(self, email: str, first_name: str, last_name: str) -> bool: self.logger.exception(error_msg) return False - def list_users(self) -> dict[str, dict[str, Any]]: + def list_users(self) -> ExtendedDict: """List all Zoom users. This is an alias for get_zoom_users() for consistency with AI tools naming. @@ -130,7 +131,7 @@ def list_users(self) -> dict[str, dict[str, Any]]: """ return self.get_zoom_users() - def get_user(self, user_id: str) -> dict[str, Any]: + def get_user(self, user_id: str) -> ExtendedDict: """Get a specific Zoom user by ID or email. Args: @@ -149,7 +150,7 @@ def get_user(self, user_id: str) -> dict[str, Any]: except requests.exceptions.RequestException as exc: raise RuntimeError(f"Failed to get Zoom user {user_id}: {exc}") from exc - def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> list[dict[str, Any]]: + def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> ExtendedList[ExtendedDict]: """List meetings for a specific user. Args: @@ -171,7 +172,7 @@ def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> list[d except requests.exceptions.RequestException as exc: raise RuntimeError(f"Failed to list meetings for user {user_id}: {exc}") from exc - def get_meeting(self, meeting_id: str) -> dict[str, Any]: + def get_meeting(self, meeting_id: str) -> ExtendedDict: """Get details of a specific meeting. Args: diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py new file mode 100644 index 0000000..c6a8560 --- /dev/null +++ b/tests/connectors/test_connector_payload_contracts.py @@ -0,0 +1,68 @@ +"""Contracts for direct connector payload surfaces.""" + +from __future__ import annotations + +from typing import get_args, get_origin, get_type_hints + +import pytest + +from extended_data.connectors.anthropic import AnthropicConnector +from extended_data.connectors.cursor import CursorConnector +from extended_data.connectors.github import GitHubConnector +from extended_data.connectors.slack import SlackConnector +from extended_data.connectors.vault import VaultConnector +from extended_data.connectors.zoom import ZoomConnector +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString + + +PAYLOAD_METHODS = ( + (AnthropicConnector.create_message, ExtendedDict), + (AnthropicConnector.list_models, ExtendedList[ExtendedDict]), + (AnthropicConnector.get_model, ExtendedDict), + (CursorConnector.list_agents, ExtendedList[ExtendedDict]), + (CursorConnector.get_agent_status, ExtendedDict), + (CursorConnector.get_agent_conversation, ExtendedDict), + (CursorConnector.launch_agent, ExtendedDict), + (CursorConnector.list_repositories, ExtendedList[ExtendedDict]), + (CursorConnector.list_models, ExtendedList[ExtendedString]), + (GitHubConnector.list_org_members, ExtendedDict), + (GitHubConnector.get_org_member, ExtendedDict | None), + (GitHubConnector.list_repositories, ExtendedDict), + (GitHubConnector.get_repository, ExtendedDict | None), + (GitHubConnector.list_teams, ExtendedDict), + (GitHubConnector.get_team, ExtendedDict | None), + (GitHubConnector.execute_graphql, ExtendedDict), + (GitHubConnector.get_users_with_verified_emails, ExtendedDict), + (GitHubConnector.build_workflow, ExtendedDict), + (GitHubConnector.build_workflow_job, ExtendedDict), + (GitHubConnector.build_workflow_step, ExtendedDict), + (GitHubConnector.create_python_ci_workflow, ExtendedDict), + (SlackConnector.get_bot_channels, ExtendedDict), + (SlackConnector.list_users, ExtendedDict), + (SlackConnector.list_usergroups, ExtendedDict), + (SlackConnector.list_conversations, ExtendedDict), + (VaultConnector.list_secrets, ExtendedDict), + (VaultConnector.read_secret, ExtendedDict | None), + (VaultConnector.get_secret, ExtendedDict | None), + (VaultConnector.list_aws_iam_roles, ExtendedList[ExtendedString]), + (VaultConnector.get_aws_iam_role, ExtendedDict | None), + (VaultConnector.generate_aws_credentials, ExtendedDict), + (ZoomConnector.get_zoom_users, ExtendedDict), + (ZoomConnector.list_users, ExtendedDict), + (ZoomConnector.get_user, ExtendedDict), + (ZoomConnector.list_meetings, ExtendedList[ExtendedDict]), + (ZoomConnector.get_meeting, ExtendedDict), +) + + +@pytest.mark.parametrize(("method", "expected_return"), PAYLOAD_METHODS) +def test_direct_connector_methods_advertise_extended_payloads(method: object, expected_return: object) -> None: + """Public connector data methods expose Tier 2 payload contracts.""" + return_type = get_type_hints(method)["return"] + + if get_origin(expected_return) is ExtendedList: + assert get_origin(return_type) is ExtendedList + assert get_args(return_type) == get_args(expected_return) + return + + assert return_type == expected_return From bab99e49fe358163599a2d399bf895674201b98d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 06:58:22 -0500 Subject: [PATCH 094/287] feat: advertise extended google payloads --- .../connectors/google/__init__.py | 5 +- .../connectors/google/billing.py | 22 +++--- src/extended_data/connectors/google/cloud.py | 35 ++++----- src/extended_data/connectors/google/jules.py | 15 ++-- .../connectors/google/services.py | 46 ++++++------ .../connectors/google/workspace.py | 30 ++++---- .../test_connector_payload_contracts.py | 71 +++++++++++++++++++ 7 files changed, 150 insertions(+), 74 deletions(-) diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index 219737a..a2008a2 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -9,6 +9,7 @@ from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict, ExtendedList from extended_data.logging import Logging @@ -414,7 +415,7 @@ def list_users( exclude_bots: bool | None = None, flatten_names: bool | None = None, key_by_email: bool | None = None, - ) -> list[dict[str, Any]] | dict[str, dict[str, Any]]: + ) -> ExtendedList[ExtendedDict] | ExtendedDict: """List users from Google Workspace with optional filtering. Args: @@ -487,7 +488,7 @@ def list_groups( exclude_bots: bool | None = None, flatten_names: bool | None = None, key_by_email: bool | None = None, - ) -> list[dict[str, Any]] | dict[str, dict[str, Any]]: + ) -> ExtendedList[ExtendedDict] | ExtendedDict: """List groups from Google Workspace with optional filtering. Args: diff --git a/src/extended_data/connectors/google/billing.py b/src/extended_data/connectors/google/billing.py index 18b1060..a56e3ba 100644 --- a/src/extended_data/connectors/google/billing.py +++ b/src/extended_data/connectors/google/billing.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any from extended_data import unhump_map -from extended_data.containers import to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, to_builtin class GoogleBillingMixin: @@ -32,7 +32,7 @@ def list_billing_accounts( self, filter_query: str | None = None, unhump_accounts: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List Google Cloud billing accounts. Args: @@ -69,7 +69,7 @@ def list_billing_accounts( return self.extend_result(accounts) - def get_billing_account(self, billing_account_id: str) -> dict[str, Any] | None: + def get_billing_account(self, billing_account_id: str) -> ExtendedDict | None: """Get a specific billing account. Args: @@ -93,7 +93,7 @@ def get_billing_account(self, billing_account_id: str) -> dict[str, Any] | None: return None raise - def get_project_billing_info(self, project_id: str) -> dict[str, Any] | None: + def get_project_billing_info(self, project_id: str) -> ExtendedDict | None: """Get billing info for a project. Args: @@ -118,7 +118,7 @@ def update_project_billing_info( self, project_id: str, billing_account_name: str, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Link a project to a billing account. Args: @@ -146,7 +146,7 @@ def update_project_billing_info( self.logger.info(f"Linked project {project_id} to billing account") return self.extend_result(result) - def disable_project_billing(self, project_id: str) -> dict[str, Any]: + def disable_project_billing(self, project_id: str) -> ExtendedDict: """Disable billing for a project. Args: @@ -174,7 +174,7 @@ def list_billing_account_projects( self, billing_account_id: str, unhump_projects: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List projects linked to a billing account. Args: @@ -216,7 +216,7 @@ def list_billing_account_projects( def get_billing_account_iam_policy( self, billing_account_id: str, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Get IAM policy for a billing account. Args: @@ -237,7 +237,7 @@ def set_billing_account_iam_policy( self, billing_account_id: str, policy: dict[str, Any], - ) -> dict[str, Any]: + ) -> ExtendedDict: """Set IAM policy for a billing account. Args: @@ -267,7 +267,7 @@ def get_bigquery_billing_dataset( self, project_id: str, dataset_id: str = "billing_export", - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Get BigQuery billing export dataset configuration. Args: @@ -325,7 +325,7 @@ def setup_billing_export( project_id: str, dataset_id: str = "billing_export", location: str = "US", - ) -> dict[str, Any]: + ) -> ExtendedDict: """Set up BigQuery billing export for a billing account. Creates the dataset if it doesn't exist and returns configuration. diff --git a/src/extended_data/connectors/google/cloud.py b/src/extended_data/connectors/google/cloud.py index 7f98119..350cdf6 100644 --- a/src/extended_data/connectors/google/cloud.py +++ b/src/extended_data/connectors/google/cloud.py @@ -6,10 +6,11 @@ from __future__ import annotations +from collections.abc import Mapping from typing import TYPE_CHECKING, Any from extended_data import unhump_map -from extended_data.containers import to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin class GoogleCloudMixin: @@ -30,7 +31,7 @@ def get_iam_service(self) -> Any: ... def extend_result(self, value: Any) -> Any: ... - def get_organization_id(self) -> str: + def get_organization_id(self) -> ExtendedString: """Get the Google Cloud organization ID. Returns: @@ -54,7 +55,7 @@ def get_organization_id(self) -> str: self.logger.info(f"Organization ID: {org_id}") return self.extend_result(org_id) - def get_organization(self) -> dict[str, Any]: + def get_organization(self) -> ExtendedDict: """Get the Google Cloud organization details. Returns: @@ -80,7 +81,7 @@ def list_projects( parent: str | None = None, filter_query: str | None = None, unhump_projects: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List Google Cloud projects. Args: @@ -120,7 +121,7 @@ def list_projects( return self.extend_result(projects) - def get_project(self, project_id: str) -> dict[str, Any] | None: + def get_project(self, project_id: str) -> ExtendedDict | None: """Get a specific Google Cloud project. Args: @@ -147,7 +148,7 @@ def create_project( display_name: str, parent: str | None = None, labels: dict[str, str] | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a Google Cloud project. Args: @@ -176,7 +177,7 @@ def create_project( self.logger.info(f"Created project: {project_id}") return self.extend_result(result) - def delete_project(self, project_id: str) -> dict[str, Any]: + def delete_project(self, project_id: str) -> ExtendedDict: """Delete a Google Cloud project. Args: @@ -196,7 +197,7 @@ def move_project( self, project_id: str, destination_parent: str, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Move a project to a different folder/organization. Args: @@ -224,7 +225,7 @@ def list_folders( self, parent: str, unhump_folders: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List folders under a parent. Args: @@ -263,7 +264,7 @@ def get_org_policy( self, resource: str, constraint: str, - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Get an organization policy. Args: @@ -295,7 +296,7 @@ def set_org_policy( self, resource: str, policy: dict[str, Any], - ) -> dict[str, Any]: + ) -> ExtendedDict: """Set an organization policy. Args: @@ -321,7 +322,7 @@ def get_iam_policy( self, resource: str, resource_type: str = "projects", - ) -> dict[str, Any]: + ) -> ExtendedDict: """Get IAM policy for a resource. Args: @@ -366,9 +367,9 @@ def get_iam_policy( def set_iam_policy( self, resource: str, - policy: dict[str, Any], + policy: Mapping[str, Any], resource_type: str = "projects", - ) -> dict[str, Any]: + ) -> ExtendedDict: """Set IAM policy for a resource. Args: @@ -421,7 +422,7 @@ def add_iam_binding( role: str, member: str, resource_type: str = "projects", - ) -> dict[str, Any]: + ) -> ExtendedDict: """Add an IAM binding to a resource. Args: @@ -458,7 +459,7 @@ def list_service_accounts( self, project_id: str, unhump_accounts: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List service accounts in a project. Args: @@ -499,7 +500,7 @@ def create_service_account( account_id: str, display_name: str, description: str = "", - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a service account in a project. Args: diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index 354786e..96213af 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -34,6 +34,7 @@ from pydantic import BaseModel, Field from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict, ExtendedList __all__ = [ @@ -181,7 +182,7 @@ def _model_payload(model: BaseModel) -> dict[str, Any]: """Serialize a Jules model using API field aliases.""" return model.model_dump(by_alias=True) - def list_sources(self, page_size: int = 100, page_token: str = "") -> list[dict[str, Any]]: + def list_sources(self, page_size: int = 100, page_token: str = "") -> ExtendedList[ExtendedDict]: """List available sources (connected GitHub repos). Args: @@ -212,7 +213,7 @@ def create_session( starting_branch: str = "main", automation_mode: str = "AUTO_CREATE_PR", require_plan_approval: bool = False, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a new Jules session. Args: @@ -247,7 +248,7 @@ def create_session( return self.extend_result(self._model_payload(Session(**data))) - def get_session(self, session_name: str) -> dict[str, Any]: + def get_session(self, session_name: str) -> ExtendedDict: """Get a session by name. Args: @@ -265,7 +266,7 @@ def get_session(self, session_name: str) -> dict[str, Any]: return self.extend_result(self._model_payload(Session(**data))) - def list_sessions(self, page_size: int = 20, page_token: str = "") -> list[dict[str, Any]]: + def list_sessions(self, page_size: int = 20, page_token: str = "") -> ExtendedList[ExtendedDict]: """List sessions. Args: @@ -284,7 +285,7 @@ def list_sessions(self, page_size: int = 20, page_token: str = "") -> list[dict[ return self.extend_result([self._model_payload(Session(**s)) for s in data.get("sessions", [])]) - def approve_plan(self, session_name: str) -> dict[str, Any]: + def approve_plan(self, session_name: str) -> ExtendedDict: """Approve the plan for a session that requires approval. Args: @@ -302,7 +303,7 @@ def approve_plan(self, session_name: str) -> dict[str, Any]: # API returns empty on success, fetch updated session return self.get_session(session_name) - def add_user_response(self, session_name: str, message: str = "") -> dict[str, Any]: + def add_user_response(self, session_name: str, message: str = "") -> ExtendedDict: """Add a follow-up message to a session or resume it. Note: The Jules API uses :sendMessage endpoint. An empty body @@ -325,7 +326,7 @@ def add_user_response(self, session_name: str, message: str = "") -> dict[str, A # API returns empty on success, fetch updated session return self.get_session(session_name) - def resume_session(self, session_name: str) -> dict[str, Any]: + def resume_session(self, session_name: str) -> ExtendedDict: """Resume a paused or awaiting session. Args: diff --git a/src/extended_data/connectors/google/services.py b/src/extended_data/connectors/google/services.py index 02d0d8f..f5a5605 100644 --- a/src/extended_data/connectors/google/services.py +++ b/src/extended_data/connectors/google/services.py @@ -8,9 +8,11 @@ import datetime as dt +from collections.abc import Mapping, MutableMapping from typing import TYPE_CHECKING, Any from extended_data import unhump_map +from extended_data.containers import ExtendedDict, ExtendedList _PROJECT_ACTIVITY_TIME_FIELDS = ( @@ -47,7 +49,7 @@ def _parse_project_activity_time(value: Any) -> dt.datetime | None: return parsed.astimezone(dt.timezone.utc) -def _latest_project_activity_time(project_data: dict[str, Any]) -> dt.datetime | None: +def _latest_project_activity_time(project_data: Mapping[str, Any]) -> dt.datetime | None: """Return the latest activity timestamp available on project metadata.""" timestamps = [ parsed @@ -58,7 +60,7 @@ def _latest_project_activity_time(project_data: dict[str, Any]) -> dt.datetime | def _project_activity_is_stale( - project_data: dict[str, Any], + project_data: Mapping[str, Any], *, days_since_activity: int, now: dt.datetime | None = None, @@ -119,7 +121,7 @@ def list_compute_instances( project_id: str, zone: str | None = None, unhump_instances: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List Compute Engine instances in a project. Args: @@ -181,7 +183,7 @@ def list_gke_clusters( project_id: str, location: str = "-", unhump_clusters: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List GKE clusters in a project. Args: @@ -211,7 +213,7 @@ def get_gke_cluster( project_id: str, location: str, cluster_id: str, - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Get a specific GKE cluster. Args: @@ -243,7 +245,7 @@ def list_storage_buckets( self, project_id: str, unhump_buckets: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List Cloud Storage buckets in a project. Args: @@ -286,7 +288,7 @@ def list_sql_instances( self, project_id: str, unhump_instances: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List Cloud SQL instances in a project. Args: @@ -329,7 +331,7 @@ def list_pubsub_topics( self, project_id: str, unhump_topics: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List Pub/Sub topics in a project. Args: @@ -368,7 +370,7 @@ def list_pubsub_subscriptions( self, project_id: str, unhump_subscriptions: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List Pub/Sub subscriptions in a project. Args: @@ -411,7 +413,7 @@ def list_enabled_services( self, project_id: str, unhump_services: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List enabled APIs/services in a project. Args: @@ -453,7 +455,7 @@ def enable_service( self, project_id: str, service_name: str, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Enable an API/service in a project. Args: @@ -477,7 +479,7 @@ def disable_service( project_id: str, service_name: str, force: bool = False, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Disable an API/service in a project. Args: @@ -505,7 +507,7 @@ def batch_enable_services( self, project_id: str, service_names: list[str], - ) -> dict[str, Any]: + ) -> ExtendedDict: """Enable multiple APIs/services in a project. Args: @@ -540,7 +542,7 @@ def list_kms_keyrings( project_id: str, location: str, unhump_keyrings: bool = False, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List KMS key rings in a project location. Args: @@ -582,7 +584,7 @@ def create_kms_keyring( project_id: str, location: str, keyring_id: str, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a KMS key ring. Args: @@ -620,7 +622,7 @@ def create_kms_key( key_id: str, purpose: str = "ENCRYPT_DECRYPT", algorithm: str = "GOOGLE_SYMMETRIC_ENCRYPTION", - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a KMS crypto key. Args: @@ -731,7 +733,7 @@ def is_project_empty( def get_project_iam_users( self, project_id: str, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Get IAM users (members) with access to a project. Args: @@ -761,7 +763,7 @@ def get_pubsub_resources_for_project( project_id: str, include_subscriptions: bool = True, unhump_resources: bool = False, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Get all Pub/Sub topics and subscriptions for a project. Args: @@ -799,10 +801,10 @@ def get_pubsub_resources_for_project( def find_inactive_projects( self, - projects: dict[str, dict[str, Any]] | None = None, + projects: MutableMapping[str, MutableMapping[str, Any]] | None = None, check_resources: bool = True, days_since_activity: int = 90, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """Find projects that appear to be inactive or dead. A project is considered inactive if: @@ -829,12 +831,12 @@ def find_inactive_projects( if projects is None: # Get projects from cloud module - requires GoogleCloudMixin if hasattr(self, "list_projects"): - projects = {p["projectId"]: p for p in self.list_projects()} + projects = {str(p["projectId"]): p for p in self.list_projects()} else: self.logger.warning("list_projects not available, cannot find inactive projects") return self.extend_result([]) - inactive: list[dict[str, Any]] = [] + inactive: list[MutableMapping[str, Any]] = [] for project_id, project_data in projects.items(): lifecycle_state = project_data.get("lifecycleState", "ACTIVE") diff --git a/src/extended_data/connectors/google/workspace.py b/src/extended_data/connectors/google/workspace.py index 08714d9..c5722c2 100644 --- a/src/extended_data/connectors/google/workspace.py +++ b/src/extended_data/connectors/google/workspace.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any from extended_data import unhump_map -from extended_data.containers import to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, to_builtin class GoogleWorkspaceMixin: @@ -35,7 +35,7 @@ def list_workspace_users( max_results: int = 500, unhump_users: bool = False, subject: str | None = None, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List users from Google Workspace. Args: @@ -76,7 +76,7 @@ def get_user( self, user_key: str, subject: str | None = None, - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Get a specific user from Google Workspace. Args: @@ -108,7 +108,7 @@ def create_user( org_unit_path: str = "/", subject: str | None = None, **additional_fields: Any, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a user in Google Workspace. Args: @@ -152,7 +152,7 @@ def update_user( user_key: str, subject: str | None = None, **fields: Any, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Update a user in Google Workspace. Args: @@ -189,7 +189,7 @@ def list_workspace_groups( max_results: int = 200, unhump_groups: bool = False, subject: str | None = None, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List groups from Google Workspace. Args: @@ -230,7 +230,7 @@ def get_group( self, group_key: str, subject: str | None = None, - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Get a specific group from Google Workspace. Args: @@ -258,7 +258,7 @@ def create_group( name: str, description: str = "", subject: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a group in Google Workspace. Args: @@ -303,7 +303,7 @@ def list_group_members( roles: list[str] | None = None, unhump_members: bool = False, subject: str | None = None, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List members of a Google Workspace group. Args: @@ -346,7 +346,7 @@ def add_group_member( email: str, role: str = "MEMBER", subject: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Add a member to a Google Workspace group. Args: @@ -391,7 +391,7 @@ def list_org_units( org_unit_path: str = "/", org_unit_type: str = "all", subject: str | None = None, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List organizational units in Google Workspace. Args: @@ -429,7 +429,7 @@ def create_or_update_user( org_unit_path: str = "/", subject: str | None = None, **additional_fields: Any, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create or update a user in Google Workspace. This method provides terraform-style idempotent user management. @@ -497,7 +497,7 @@ def create_or_update_group( update_if_exists: bool = False, subject: str | None = None, **additional_fields: Any, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create or update a group in Google Workspace. This method provides terraform-style idempotent group management. @@ -549,7 +549,7 @@ def list_available_licenses( customer_id: str = "my_customer", product_id: str | None = None, subject: str | None = None, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List available Google Workspace licenses. Args: @@ -631,7 +631,7 @@ def get_license_summary( self, customer_id: str = "my_customer", subject: str | None = None, - ) -> dict[str, dict[str, int]]: + ) -> ExtendedDict: """Get a summary of license usage by product. Args: diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index c6a8560..10502c3 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -9,6 +9,12 @@ from extended_data.connectors.anthropic import AnthropicConnector from extended_data.connectors.cursor import CursorConnector from extended_data.connectors.github import GitHubConnector +from extended_data.connectors.google import GoogleConnector +from extended_data.connectors.google.billing import GoogleBillingMixin +from extended_data.connectors.google.cloud import GoogleCloudMixin +from extended_data.connectors.google.jules import JulesConnector +from extended_data.connectors.google.services import GoogleServicesMixin +from extended_data.connectors.google.workspace import GoogleWorkspaceMixin from extended_data.connectors.slack import SlackConnector from extended_data.connectors.vault import VaultConnector from extended_data.connectors.zoom import ZoomConnector @@ -37,6 +43,71 @@ (GitHubConnector.build_workflow_job, ExtendedDict), (GitHubConnector.build_workflow_step, ExtendedDict), (GitHubConnector.create_python_ci_workflow, ExtendedDict), + (GoogleConnector.list_users, ExtendedList[ExtendedDict] | ExtendedDict), + (GoogleConnector.list_groups, ExtendedList[ExtendedDict] | ExtendedDict), + (GoogleBillingMixin.list_billing_accounts, ExtendedList[ExtendedDict]), + (GoogleBillingMixin.get_billing_account, ExtendedDict | None), + (GoogleBillingMixin.get_project_billing_info, ExtendedDict | None), + (GoogleBillingMixin.update_project_billing_info, ExtendedDict), + (GoogleBillingMixin.disable_project_billing, ExtendedDict), + (GoogleBillingMixin.list_billing_account_projects, ExtendedList[ExtendedDict]), + (GoogleBillingMixin.get_billing_account_iam_policy, ExtendedDict), + (GoogleBillingMixin.set_billing_account_iam_policy, ExtendedDict), + (GoogleBillingMixin.get_bigquery_billing_dataset, ExtendedDict | None), + (GoogleBillingMixin.setup_billing_export, ExtendedDict), + (GoogleCloudMixin.get_organization_id, ExtendedString), + (GoogleCloudMixin.get_organization, ExtendedDict), + (GoogleCloudMixin.list_projects, ExtendedList[ExtendedDict]), + (GoogleCloudMixin.get_project, ExtendedDict | None), + (GoogleCloudMixin.create_project, ExtendedDict), + (GoogleCloudMixin.delete_project, ExtendedDict), + (GoogleCloudMixin.move_project, ExtendedDict), + (GoogleCloudMixin.list_folders, ExtendedList[ExtendedDict]), + (GoogleCloudMixin.get_org_policy, ExtendedDict | None), + (GoogleCloudMixin.set_org_policy, ExtendedDict), + (GoogleCloudMixin.get_iam_policy, ExtendedDict), + (GoogleCloudMixin.set_iam_policy, ExtendedDict), + (GoogleCloudMixin.add_iam_binding, ExtendedDict), + (GoogleCloudMixin.list_service_accounts, ExtendedList[ExtendedDict]), + (GoogleCloudMixin.create_service_account, ExtendedDict), + (GoogleWorkspaceMixin.list_workspace_users, ExtendedList[ExtendedDict]), + (GoogleWorkspaceMixin.get_user, ExtendedDict | None), + (GoogleWorkspaceMixin.create_user, ExtendedDict), + (GoogleWorkspaceMixin.update_user, ExtendedDict), + (GoogleWorkspaceMixin.list_workspace_groups, ExtendedList[ExtendedDict]), + (GoogleWorkspaceMixin.get_group, ExtendedDict | None), + (GoogleWorkspaceMixin.create_group, ExtendedDict), + (GoogleWorkspaceMixin.list_group_members, ExtendedList[ExtendedDict]), + (GoogleWorkspaceMixin.add_group_member, ExtendedDict), + (GoogleWorkspaceMixin.list_org_units, ExtendedList[ExtendedDict]), + (GoogleWorkspaceMixin.create_or_update_user, ExtendedDict), + (GoogleWorkspaceMixin.create_or_update_group, ExtendedDict), + (GoogleWorkspaceMixin.list_available_licenses, ExtendedList[ExtendedDict]), + (GoogleWorkspaceMixin.get_license_summary, ExtendedDict), + (GoogleServicesMixin.list_compute_instances, ExtendedList[ExtendedDict]), + (GoogleServicesMixin.list_gke_clusters, ExtendedList[ExtendedDict]), + (GoogleServicesMixin.get_gke_cluster, ExtendedDict | None), + (GoogleServicesMixin.list_storage_buckets, ExtendedList[ExtendedDict]), + (GoogleServicesMixin.list_sql_instances, ExtendedList[ExtendedDict]), + (GoogleServicesMixin.list_pubsub_topics, ExtendedList[ExtendedDict]), + (GoogleServicesMixin.list_pubsub_subscriptions, ExtendedList[ExtendedDict]), + (GoogleServicesMixin.list_enabled_services, ExtendedList[ExtendedDict]), + (GoogleServicesMixin.enable_service, ExtendedDict), + (GoogleServicesMixin.disable_service, ExtendedDict), + (GoogleServicesMixin.batch_enable_services, ExtendedDict), + (GoogleServicesMixin.list_kms_keyrings, ExtendedList[ExtendedDict]), + (GoogleServicesMixin.create_kms_keyring, ExtendedDict), + (GoogleServicesMixin.create_kms_key, ExtendedDict), + (GoogleServicesMixin.get_project_iam_users, ExtendedDict), + (GoogleServicesMixin.get_pubsub_resources_for_project, ExtendedDict), + (GoogleServicesMixin.find_inactive_projects, ExtendedList[ExtendedDict]), + (JulesConnector.list_sources, ExtendedList[ExtendedDict]), + (JulesConnector.create_session, ExtendedDict), + (JulesConnector.get_session, ExtendedDict), + (JulesConnector.list_sessions, ExtendedList[ExtendedDict]), + (JulesConnector.approve_plan, ExtendedDict), + (JulesConnector.add_user_response, ExtendedDict), + (JulesConnector.resume_session, ExtendedDict), (SlackConnector.get_bot_channels, ExtendedDict), (SlackConnector.list_users, ExtendedDict), (SlackConnector.list_usergroups, ExtendedDict), From ca07acde8a24e63e32b3ff3b06cb54cfea2b7160 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:08:08 -0500 Subject: [PATCH 095/287] feat: advertise extended aws payloads --- examples/connectors/basic_aws.py | 12 +-- src/extended_data/connectors/aws/__init__.py | 54 ++++++------- .../connectors/aws/codedeploy.py | 25 +++--- .../connectors/aws/organizations.py | 76 ++++++++++--------- src/extended_data/connectors/aws/s3.py | 59 +++++++------- src/extended_data/connectors/aws/sso.py | 64 ++++++++-------- .../test_connector_payload_contracts.py | 54 ++++++++++++- 7 files changed, 207 insertions(+), 137 deletions(-) diff --git a/examples/connectors/basic_aws.py b/examples/connectors/basic_aws.py index 4880e85..94c1613 100644 --- a/examples/connectors/basic_aws.py +++ b/examples/connectors/basic_aws.py @@ -45,9 +45,10 @@ def main() -> int: # List S3 buckets print("\n--- S3 Buckets ---") try: - buckets = full_connector.list_buckets() - for bucket in buckets[:5]: # Show first 5 - print(f" Bucket: {bucket}") + buckets = full_connector.list_s3_buckets() + for bucket_name, bucket in list(buckets.items())[:5]: # Show first 5 + created = bucket.get("creation_date") or bucket.get("CreationDate") + print(f" Bucket: {bucket_name} ({created})") if len(buckets) > 5: print(f" ... and {len(buckets) - 5} more buckets") except Exception as e: @@ -57,8 +58,9 @@ def main() -> int: print("\n--- Organization Accounts ---") try: accounts = full_connector.get_accounts() - for account in accounts[:5]: - print(f" Account: {account}") + for account_id, account in list(accounts.items())[:5]: + name = account.get("name") or account.get("Name") or account_id + print(f" Account: {account_id} ({name})") if len(accounts) > 5: print(f" ... and {len(accounts) - 5} more accounts") except Exception as e: diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 4337eb4..e09807d 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -16,17 +16,17 @@ from __future__ import annotations -from collections.abc import Mapping +from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any from extended_data import is_nothing from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import extend_data, to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data, to_builtin from extended_data.logging import Logging -AWSSecretValue = str | dict[str, Any] | None +AWSSecretValue = str | ExtendedString | Mapping[str, Any] | None if TYPE_CHECKING: @@ -76,14 +76,14 @@ def __init__( super().__init__(logger=logger, **kwargs) self._boto3 = _load_aws_sdk() self.execution_role_arn = execution_role_arn - self.aws_sessions: dict[str, dict[str, boto3.Session]] = {} + self.aws_sessions: dict[str, dict[str, Any]] = {} self.default_aws_session = self._boto3.Session() # ========================================================================= # Session Management # ========================================================================= - def assume_role(self, execution_role_arn: str, role_session_name: str) -> boto3.Session: + def assume_role(self, execution_role_arn: str, role_session_name: str) -> Any: """Assume an AWS IAM role and return a boto3 Session. Args: @@ -116,7 +116,7 @@ def get_aws_session( self, execution_role_arn: str | None = None, role_session_name: str | None = None, - ) -> boto3.Session: + ) -> Any: """Get a boto3 Session, optionally assuming a role. Args: @@ -147,7 +147,7 @@ def get_aws_session( # ========================================================================= @staticmethod - def create_standard_retry_config(max_attempts: int = 5) -> Config: + def create_standard_retry_config(max_attempts: int = 5) -> Any: """Create a standard retry configuration. Args: @@ -164,9 +164,9 @@ def get_aws_client( client_name: str, execution_role_arn: str | None = None, role_session_name: str | None = None, - config: Config | None = None, + config: Any | None = None, **client_args: Any, - ) -> boto3.client: + ) -> Any: """Get a boto3 client for the specified service. Args: @@ -189,9 +189,9 @@ def get_aws_resource( service_name: str, execution_role_arn: str | None = None, role_session_name: str | None = None, - config: Config | None = None, + config: Any | None = None, **resource_args: Any, - ) -> ServiceResource: + ) -> Any: """Get a boto3 resource for the specified service. Args: @@ -221,7 +221,7 @@ def get_aws_resource( # Identity Operations # ========================================================================= - def get_caller_account_id(self) -> str: + def get_caller_account_id(self) -> ExtendedString: """Get the AWS account ID of the caller. Returns: @@ -229,7 +229,7 @@ def get_caller_account_id(self) -> str: """ sts = self.get_aws_client("sts") identity = sts.get_caller_identity() - return identity["Account"] + return self.extend_result(identity["Account"]) # ========================================================================= # Secrets Manager Operations @@ -240,8 +240,8 @@ def get_secret( secret_id: str, execution_role_arn: str | None = None, role_session_name: str | None = None, - secretsmanager: boto3.client | None = None, - ) -> str | None: + secretsmanager: Any | None = None, + ) -> ExtendedString | None: """Get a single secret value from AWS Secrets Manager. Args: @@ -279,14 +279,14 @@ def get_secret( def list_secrets( self, - filters: list[dict[str, Any]] | None = None, + filters: Sequence[Mapping[str, Any]] | None = None, prefix: str | None = None, get_secret_values: bool = False, skip_empty_secrets: bool = False, execution_role_arn: str | None = None, role_session_name: str | None = None, **kwargs: Any, - ) -> dict[str, AWSSecretValue]: + ) -> ExtendedDict: """List secrets from AWS Secrets Manager. Args: @@ -327,7 +327,7 @@ def list_secrets( effective_filters: list[dict[str, Any]] = [] if filters: - effective_filters.extend(filters) + effective_filters.extend(dict(to_builtin(filter_item)) for filter_item in filters) if prefix: effective_filters.append({"Key": "name", "Values": [prefix]}) @@ -363,9 +363,9 @@ def create_secret( name: str, secret_value: str, description: str = "", - tags: dict[str, str] | None = None, + tags: Mapping[str, str] | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a new secret in AWS Secrets Manager.""" if not name: msg = "name is required to create a secret" @@ -385,7 +385,7 @@ def create_secret( if description: create_kwargs["Description"] = description if tags: - create_kwargs["Tags"] = [{"Key": key, "Value": value} for key, value in tags.items()] + create_kwargs["Tags"] = [{"Key": str(key), "Value": str(value)} for key, value in tags.items()] try: response = secretsmanager.create_secret(**create_kwargs) @@ -400,7 +400,7 @@ def update_secret( secret_id: str, secret_value: str, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Update an existing secret value.""" if not secret_id: msg = "secret_id is required to update a secret" @@ -431,7 +431,7 @@ def delete_secret( force_delete: bool = False, recovery_window_days: int = 30, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Delete a secret from AWS Secrets Manager.""" if not secret_id: msg = "secret_id is required to delete a secret" @@ -470,7 +470,7 @@ def delete_secrets_matching( dry_run: bool = True, execution_role_arn: str | None = None, **kwargs: Any, - ) -> list[str]: + ) -> ExtendedList[ExtendedString]: """Delete all secrets that match the provided name prefix.""" prefix = prefix or kwargs.get("name_prefix") if not prefix: @@ -518,12 +518,12 @@ def delete_secrets_matching( def copy_secrets_to_s3( self, - secrets: dict[str, AWSSecretValue], + secrets: Mapping[str, AWSSecretValue], bucket: str, key: str, execution_role_arn: str | None = None, role_session_name: str | None = None, - ) -> str: + ) -> ExtendedString: """Copy secrets dictionary to S3 as JSON. Args: @@ -559,7 +559,7 @@ def copy_secrets_to_s3( return self.extend_result(s3_uri) @staticmethod - def load_vendors_from_asm(prefix: str = "/vendors/") -> dict[str, str]: + def load_vendors_from_asm(prefix: str = "/vendors/") -> ExtendedDict: """Load vendor secrets from AWS Secrets Manager. This is used in Lambda environments where vendor credentials are stored diff --git a/src/extended_data/connectors/aws/codedeploy.py b/src/extended_data/connectors/aws/codedeploy.py index f978090..3e11583 100644 --- a/src/extended_data/connectors/aws/codedeploy.py +++ b/src/extended_data/connectors/aws/codedeploy.py @@ -7,12 +7,12 @@ from __future__ import annotations -from collections.abc import Iterable, Sequence +from collections.abc import Iterable, Mapping, Sequence from datetime import datetime, timezone from typing import TYPE_CHECKING, Any from extended_data.connectors.aws import AWSConnector -from extended_data.containers import extend_data +from extended_data.containers import ExtendedDict, extend_data, to_builtin from extended_data.logging import Logging @@ -21,6 +21,9 @@ from botocore.config import Config from botocore.exceptions import ClientError, WaiterError else: + BaseClient = Any + Config = Any + try: from botocore.exceptions import ClientError, WaiterError except ImportError: @@ -149,7 +152,7 @@ def get_aws_codedeploy_deployments( statuses: Sequence[str] | None = None, created_after: datetime | str | float | None = None, created_before: datetime | str | float | None = None, - tag_filters: Sequence[dict[str, Any]] | None = None, + tag_filters: Sequence[Mapping[str, Any]] | None = None, include_details: bool = True, limit: int | None = None, next_token: str | None = None, @@ -161,7 +164,7 @@ def get_aws_codedeploy_deployments( region_name: str | None = None, config: Config | None = None, logging_adapter: Logging | None = None, -) -> dict[str, Any]: +) -> ExtendedDict: """List CodeDeploy deployments with optional detail hydration. Returns a dictionary with the deployment identifiers, optional deployment @@ -198,7 +201,7 @@ def get_aws_codedeploy_deployments( if end: params["createTimeRange"]["end"] = end if tag_filters: - params["tagFilters"] = list(tag_filters) + params["tagFilters"] = [dict(to_builtin(tag_filter)) for tag_filter in tag_filters] deployment_ids: list[str] = [] pages = 0 @@ -266,11 +269,11 @@ def get_aws_codedeploy_deployments( def create_codedeploy_deployment( application_name: str, deployment_group_name: str, - revision: dict[str, Any], + revision: Mapping[str, Any], description: str | None = None, ignore_application_stop_failures: bool | None = None, file_exists_behavior: str | None = None, - auto_rollback_configuration: dict[str, Any] | None = None, + auto_rollback_configuration: Mapping[str, Any] | None = None, update_outdated_instances_only: bool | None = None, wait: bool = False, waiter_delay: int = 15, @@ -284,7 +287,7 @@ def create_codedeploy_deployment( config: Config | None = None, logging_adapter: Logging | None = None, **additional_params: Any, -) -> dict[str, Any]: +) -> ExtendedDict: """Create a CodeDeploy deployment and optionally wait for completion.""" if not revision: msg = "The CodeDeploy revision payload is required." @@ -313,7 +316,7 @@ def create_codedeploy_deployment( request: dict[str, Any] = { "applicationName": application_name, "deploymentGroupName": deployment_group_name, - "revision": revision, + "revision": dict(to_builtin(revision)), } if description: request["description"] = description @@ -322,10 +325,10 @@ def create_codedeploy_deployment( if file_exists_behavior: request["fileExistsBehavior"] = file_exists_behavior if auto_rollback_configuration: - request["autoRollbackConfiguration"] = auto_rollback_configuration + request["autoRollbackConfiguration"] = dict(to_builtin(auto_rollback_configuration)) if update_outdated_instances_only is not None: request["updateOutdatedInstancesOnly"] = update_outdated_instances_only - request.update(additional_params) + request.update(to_builtin(additional_params)) try: response = client.create_deployment(**request) diff --git a/src/extended_data/connectors/aws/organizations.py b/src/extended_data/connectors/aws/organizations.py index f45d417..e8d3aad 100644 --- a/src/extended_data/connectors/aws/organizations.py +++ b/src/extended_data/connectors/aws/organizations.py @@ -9,14 +9,14 @@ import re from collections import defaultdict -from collections.abc import Iterator +from collections.abc import Iterator, Mapping, Sequence from copy import deepcopy from typing import TYPE_CHECKING, Any from deepmerge import always_merger from extended_data import is_nothing, unhump_map -from extended_data.containers import to_builtin +from extended_data.containers import ExtendedDict, to_builtin if TYPE_CHECKING: @@ -45,7 +45,7 @@ def get_aws_client( **client_args: Any, ) -> Any: ... - def get_caller_account_id(self) -> str: ... + def get_caller_account_id(self) -> Any: ... def extend_result(self, value: Any) -> Any: ... @@ -54,7 +54,7 @@ def get_organization_accounts( unhump_accounts: bool = True, sort_by_name: bool = False, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Get all AWS accounts from AWS Organizations. Recursively traverses the organization hierarchy to get all accounts @@ -154,7 +154,7 @@ def get_controltower_accounts( unhump_accounts: bool = True, sort_by_name: bool = False, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Get all AWS accounts managed by AWS Control Tower. Retrieves accounts from the Control Tower Account Factory. @@ -222,7 +222,7 @@ def get_accounts( sort_by_name: bool = False, include_controltower: bool = True, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Get all AWS accounts from Organizations and Control Tower. Combines accounts from AWS Organizations and Control Tower, marking @@ -274,7 +274,7 @@ def get_organization_units( self, unhump_units: bool = True, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Get all organizational units from AWS Organizations. Args: @@ -461,7 +461,7 @@ def _process_classifications(value: str) -> list[str]: def label_account( self, account_id: str, - labels: dict[str, str], + labels: Mapping[str, str], execution_role_arn: str | None = None, ) -> None: """Apply labels (tags) to an AWS account. @@ -479,16 +479,16 @@ def label_account( execution_role_arn=role_arn, ) - tags = [{"Key": k, "Value": v} for k, v in labels.items()] + tags = [{"Key": str(k), "Value": str(v)} for k, v in labels.items()] orgs.tag_resource(ResourceId=account_id, Tags=tags) self.logger.info(f"Applied {len(labels)} tags to account {account_id}") def classify_accounts( self, - accounts: dict[str, dict[str, Any]] | None = None, - classification_rules: dict[str, list[str]] | None = None, + accounts: Mapping[str, Mapping[str, Any]] | None = None, + classification_rules: Mapping[str, Sequence[str]] | None = None, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Classify AWS accounts based on OU paths or tags. Default classification rules: @@ -513,6 +513,9 @@ def classify_accounts( unhump_accounts=True, execution_role_arn=execution_role_arn, ) + account_map: dict[str, dict[str, Any]] = { + account_id: dict(to_builtin(account_data)) for account_id, account_data in accounts.items() + } default_rules = { "production": ["prod", "production"], @@ -525,7 +528,7 @@ def classify_accounts( } rules = classification_rules or default_rules - for account_id, account_data in accounts.items(): + for account_id, account_data in account_map.items(): ou_name = account_data.get("ou_name", "").lower() ou_path = account_data.get("path", "").lower() if "path" in account_data else "" tags = account_data.get("tags", {}) @@ -544,10 +547,10 @@ def classify_accounts( if classification != "unclassified": break - accounts[account_id]["classification"] = classification + account_map[account_id]["classification"] = classification - self.logger.info(f"Classified {len(accounts)} accounts") - return self.extend_result(accounts) + self.logger.info(f"Classified {len(account_map)} accounts") + return self.extend_result(account_map) # --------------------------------------------------------------------- # # Terraform-migrated helpers # @@ -555,11 +558,11 @@ def classify_accounts( def label_aws_accounts( self, - domains: dict[str, str], - aws_organization_units: dict[str, dict[str, Any]] | None = None, + domains: Mapping[str, str], + aws_organization_units: Mapping[str, Mapping[str, Any]] | None = None, caller_account_id: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Return normalized metadata for every AWS account. This mirrors the historical ``label_aws_account`` helper from terraform-modules. @@ -579,8 +582,13 @@ def label_aws_accounts( raise ValueError(msg) role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) - units_lookup = aws_organization_units or self._build_org_units_with_tags(role_arn=role_arn) - caller_account_id = caller_account_id or self.get_caller_account_id() + units_lookup = ( + {unit_id: dict(to_builtin(unit)) for unit_id, unit in aws_organization_units.items()} + if aws_organization_units is not None + else self._build_org_units_with_tags(role_arn=role_arn) + ) + domain_lookup = {str(key): str(value) for key, value in domains.items()} + caller_account_id = caller_account_id or str(self.get_caller_account_id()) organization_accounts = self.get_organization_accounts( unhump_accounts=False, @@ -602,7 +610,7 @@ def label_aws_accounts( account_data=account_data, controltower_data=controltower_data, units_lookup=units_lookup, - domains=domains, + domains=domain_lookup, caller_account_id=caller_account_id, ) @@ -615,7 +623,7 @@ def label_aws_accounts( account_data=controltower_data, controltower_data=controltower_data, units_lookup=units_lookup, - domains=domains, + domains=domain_lookup, caller_account_id=caller_account_id, ) @@ -624,11 +632,11 @@ def label_aws_accounts( def label_aws_account( self, account_id: str, - domains: dict[str, str], - aws_organization_units: dict[str, dict[str, Any]] | None = None, + domains: Mapping[str, str], + aws_organization_units: Mapping[str, Mapping[str, Any]] | None = None, caller_account_id: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Return metadata for a single AWS account.""" labeled_accounts = self.label_aws_accounts( domains=domains, @@ -643,13 +651,13 @@ def label_aws_account( def classify_aws_accounts( self, - labeled_accounts: dict[str, dict[str, Any]] | None = None, + labeled_accounts: Mapping[str, Mapping[str, Any]] | None = None, suffix: str | None = None, - domains: dict[str, str] | None = None, - aws_organization_units: dict[str, dict[str, Any]] | None = None, + domains: Mapping[str, str] | None = None, + aws_organization_units: Mapping[str, Mapping[str, Any]] | None = None, caller_account_id: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, list[str]]: + ) -> ExtendedDict: """Group accounts by classification, matching terraform-modules output.""" if labeled_accounts is None: if not domains: @@ -675,12 +683,12 @@ def classify_aws_accounts( def preprocess_aws_organization( self, - domains: dict[str, str], + domains: Mapping[str, str], suffix: str | None = None, - aws_organization_units: dict[str, dict[str, Any]] | None = None, + aws_organization_units: Mapping[str, Mapping[str, Any]] | None = None, caller_account_id: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Build full organization context (accounts, units, lookups).""" role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) units_lookup = aws_organization_units or self._build_org_units_with_tags(role_arn=role_arn) @@ -735,7 +743,7 @@ def preprocess_organization( include_tags: bool = True, include_classification: bool = True, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Preprocess AWS Organization data for terraform consumption. Returns a structured dict suitable for terraform data sources. diff --git a/src/extended_data/connectors/aws/s3.py b/src/extended_data/connectors/aws/s3.py index 824e60a..0b1cc1c 100644 --- a/src/extended_data/connectors/aws/s3.py +++ b/src/extended_data/connectors/aws/s3.py @@ -7,10 +7,11 @@ import json +from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any from extended_data import unhump_map -from extended_data.containers import to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin if TYPE_CHECKING: @@ -63,7 +64,7 @@ def list_s3_buckets( self, unhump_buckets: bool = True, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List all S3 buckets. Args: @@ -98,7 +99,7 @@ def get_bucket_location( self, bucket_name: str, execution_role_arn: str | None = None, - ) -> str: + ) -> ExtendedString: """Get the region of an S3 bucket. Args: @@ -125,7 +126,7 @@ def get_object( key: str, decode: bool = True, execution_role_arn: str | None = None, - ) -> str | bytes | None: + ) -> ExtendedString | bytes | None: """Get an object from S3. Args: @@ -163,7 +164,7 @@ def get_json_object( bucket: str, key: str, execution_role_arn: str | None = None, - ) -> dict[str, Any] | list[Any] | None: + ) -> ExtendedDict | ExtendedList[Any] | None: """Get a JSON object from S3. Args: @@ -184,7 +185,7 @@ def get_json_object( if content is None: return None - return self.extend_result(json.loads(content)) + return self.extend_result(json.loads(content if isinstance(content, bytes) else str(content))) def put_object( self, @@ -192,9 +193,9 @@ def put_object( key: str, body: str | bytes, content_type: str | None = None, - metadata: dict[str, str] | None = None, + metadata: Mapping[str, str] | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Put an object to S3. Args: @@ -233,7 +234,7 @@ def put_object( put_args["ContentType"] = "text/yaml" if metadata: - put_args["Metadata"] = metadata + put_args["Metadata"] = {str(key): str(value) for key, value in metadata.items()} response = s3.put_object(**put_args) self.logger.debug(f"Put object to s3://{bucket}/{key}") @@ -243,11 +244,11 @@ def put_json_object( self, bucket: str, key: str, - data: dict[str, Any] | list[Any], + data: Mapping[str, Any] | Sequence[Any], indent: int = 2, - metadata: dict[str, str] | None = None, + metadata: Mapping[str, str] | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Put a JSON object to S3. Args: @@ -276,7 +277,7 @@ def delete_object( bucket: str, key: str, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Delete an object from S3. Args: @@ -307,7 +308,7 @@ def list_objects( max_keys: int | None = None, unhump_objects: bool = True, execution_role_arn: str | None = None, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List objects in an S3 bucket. Args: @@ -361,7 +362,7 @@ def copy_object( dest_bucket: str, dest_key: str, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Copy an object within S3. Args: @@ -398,7 +399,7 @@ def get_bucket_features( self, bucket_name: str, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Get bucket configuration features (logging, versioning, lifecycle, policy). Args: @@ -464,7 +465,7 @@ def find_buckets_by_name( name_contains: str, include_features: bool = False, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Find S3 buckets with names containing a string. Args: @@ -490,9 +491,11 @@ def find_buckets_by_name( self.logger.debug(f"Found matching bucket: {bucket.name}") if include_features: - buckets[bucket.name] = self.get_bucket_features( - bucket_name=bucket.name, - execution_role_arn=role_arn, + buckets[bucket.name] = to_builtin( + self.get_bucket_features( + bucket_name=bucket.name, + execution_role_arn=role_arn, + ) ) else: buckets[bucket.name] = { @@ -509,9 +512,9 @@ def create_bucket( region: str | None = None, acl: str = "private", enable_versioning: bool = False, - tags: dict[str, str] | None = None, + tags: Mapping[str, str] | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create an S3 bucket. Args: @@ -557,7 +560,7 @@ def create_bucket( # Apply tags if provided if tags: - tag_set = [{"Key": k, "Value": v} for k, v in tags.items()] + tag_set = [{"Key": str(k), "Value": str(v)} for k, v in tags.items()] s3.put_bucket_tagging( Bucket=bucket_name, Tagging={"TagSet": tag_set}, @@ -612,7 +615,7 @@ def get_bucket_tags( self, bucket_name: str, execution_role_arn: str | None = None, - ) -> dict[str, str]: + ) -> ExtendedDict: """Get tags for an S3 bucket. Args: @@ -640,7 +643,7 @@ def get_bucket_tags( def set_bucket_tags( self, bucket_name: str, - tags: dict[str, str], + tags: Mapping[str, str], execution_role_arn: str | None = None, ) -> None: """Set tags for an S3 bucket. @@ -658,7 +661,7 @@ def set_bucket_tags( execution_role_arn=role_arn, ) - tag_set = [{"Key": k, "Value": v} for k, v in tags.items()] + tag_set = [{"Key": str(k), "Value": str(v)} for k, v in tags.items()] s3.put_bucket_tagging( Bucket=bucket_name, Tagging={"TagSet": tag_set}, @@ -667,9 +670,9 @@ def set_bucket_tags( def get_bucket_sizes( self, - bucket_names: list[str] | None = None, + bucket_names: Sequence[str] | None = None, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """Get sizes of S3 buckets using CloudWatch metrics. Args: diff --git a/src/extended_data/connectors/aws/sso.py b/src/extended_data/connectors/aws/sso.py index e74f058..252e835 100644 --- a/src/extended_data/connectors/aws/sso.py +++ b/src/extended_data/connectors/aws/sso.py @@ -6,12 +6,14 @@ from __future__ import annotations +from collections.abc import Mapping, Sequence from copy import deepcopy from typing import TYPE_CHECKING, Any from deepmerge import always_merger from extended_data import is_nothing, unhump_map +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin if TYPE_CHECKING: @@ -45,7 +47,7 @@ def extend_result(self, value: Any) -> Any: ... def get_identity_store_id( self, execution_role_arn: str | None = None, - ) -> str: + ) -> ExtendedString: """Get the IAM Identity Center identity store ID. Args: @@ -79,7 +81,7 @@ def get_identity_store_id( def get_sso_instance_arn( self, execution_role_arn: str | None = None, - ) -> str: + ) -> ExtendedString: """Get the IAM Identity Center instance ARN. Args: @@ -121,7 +123,7 @@ def list_sso_users( flatten_name: bool = True, sort_by_name: bool = False, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List all users from IAM Identity Center. Args: @@ -138,7 +140,7 @@ def list_sso_users( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) identitystore = self.get_aws_client( client_name="identitystore", @@ -186,7 +188,7 @@ def get_sso_user( user_id: str, identity_store_id: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Get a specific SSO user by ID. Args: @@ -202,7 +204,7 @@ def get_sso_user( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) identitystore = self.get_aws_client( client_name="identitystore", @@ -227,10 +229,10 @@ def create_sso_user( display_name: str, given_name: str | None = None, family_name: str | None = None, - emails: list[dict[str, Any]] | None = None, + emails: Sequence[Mapping[str, Any]] | None = None, identity_store_id: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a user in IAM Identity Center. Args: @@ -249,7 +251,7 @@ def create_sso_user( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) identitystore = self.get_aws_client( client_name="identitystore", @@ -270,7 +272,7 @@ def create_sso_user( user_body["Name"]["FamilyName"] = family_name if emails: - user_body["Emails"] = emails + user_body["Emails"] = to_builtin(list(emails)) result = identitystore.create_user(**user_body) self.logger.info(f"Created SSO user: {user_name} ({result.get('UserId')})") @@ -293,7 +295,7 @@ def delete_sso_user( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) identitystore = self.get_aws_client( client_name="identitystore", @@ -315,10 +317,10 @@ def list_sso_groups( identity_store_id: str | None = None, unhump_groups: bool = True, expand_members: bool = False, - users: dict[str, dict[str, Any]] | None = None, + users: Mapping[str, Mapping[str, Any]] | None = None, sort_by_name: bool = False, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List all groups from IAM Identity Center. Args: @@ -336,7 +338,7 @@ def list_sso_groups( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) # Pre-fetch users if expanding members if expand_members and not users: @@ -397,8 +399,8 @@ def _get_group_members( identity_store_id: str, identitystore: Any, expand_members: bool = False, - users: dict[str, dict[str, Any]] | None = None, - ) -> list[str] | dict[str, dict[str, Any]]: + users: Mapping[str, Mapping[str, Any]] | None = None, + ) -> list[str] | dict[str, Mapping[str, Any]]: """Get members of an SSO group. Args: @@ -411,7 +413,7 @@ def _get_group_members( Returns: List of user IDs or dict mapping user IDs to user data. """ - members: list[str] | dict[str, dict[str, Any]] = {} if expand_members else [] + members: list[str] | dict[str, Mapping[str, Any]] = {} if expand_members else [] page_token: str | None = None while True: @@ -447,7 +449,7 @@ def create_sso_group( description: str = "", identity_store_id: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create a group in IAM Identity Center. Args: @@ -463,7 +465,7 @@ def create_sso_group( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) identitystore = self.get_aws_client( client_name="identitystore", @@ -495,7 +497,7 @@ def delete_sso_group( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) identitystore = self.get_aws_client( client_name="identitystore", @@ -514,7 +516,7 @@ def add_user_to_group( group_id: str, identity_store_id: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Add a user to an SSO group. Args: @@ -530,7 +532,7 @@ def add_user_to_group( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) identitystore = self.get_aws_client( client_name="identitystore", @@ -562,7 +564,7 @@ def remove_user_from_group( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: - identity_store_id = self.get_identity_store_id(execution_role_arn=role_arn) + identity_store_id = str(self.get_identity_store_id(execution_role_arn=role_arn)) identitystore = self.get_aws_client( client_name="identitystore", @@ -587,7 +589,7 @@ def list_permission_sets( unhump_sets: bool = True, sort_by_name: bool = False, execution_role_arn: str | None = None, - ) -> dict[str, dict[str, Any]]: + ) -> ExtendedDict: """List all permission sets from IAM Identity Center. Args: @@ -605,7 +607,7 @@ def list_permission_sets( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not instance_arn: - instance_arn = self.get_sso_instance_arn(execution_role_arn=role_arn) + instance_arn = str(self.get_sso_instance_arn(execution_role_arn=role_arn)) sso_admin = self.get_aws_client( client_name="sso-admin", @@ -704,7 +706,7 @@ def list_account_assignments( instance_arn: str | None = None, unhump_assignments: bool = True, execution_role_arn: str | None = None, - ) -> list[dict[str, Any]]: + ) -> ExtendedList[ExtendedDict]: """List account assignments for a permission set. Args: @@ -721,7 +723,7 @@ def list_account_assignments( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not instance_arn: - instance_arn = self.get_sso_instance_arn(execution_role_arn=role_arn) + instance_arn = str(self.get_sso_instance_arn(execution_role_arn=role_arn)) sso_admin = self.get_aws_client( client_name="sso-admin", @@ -761,7 +763,7 @@ def create_account_assignment( principal_type: str, instance_arn: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Create an account assignment. Args: @@ -779,7 +781,7 @@ def create_account_assignment( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not instance_arn: - instance_arn = self.get_sso_instance_arn(execution_role_arn=role_arn) + instance_arn = str(self.get_sso_instance_arn(execution_role_arn=role_arn)) sso_admin = self.get_aws_client( client_name="sso-admin", @@ -805,7 +807,7 @@ def delete_account_assignment( principal_type: str, instance_arn: str | None = None, execution_role_arn: str | None = None, - ) -> dict[str, Any]: + ) -> ExtendedDict: """Delete an account assignment. Args: @@ -823,7 +825,7 @@ def delete_account_assignment( role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not instance_arn: - instance_arn = self.get_sso_instance_arn(execution_role_arn=role_arn) + instance_arn = str(self.get_sso_instance_arn(execution_role_arn=role_arn)) sso_admin = self.get_aws_client( client_name="sso-admin", diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 10502c3..74e4ada 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -2,11 +2,16 @@ from __future__ import annotations -from typing import get_args, get_origin, get_type_hints +from typing import Any, get_args, get_origin, get_type_hints import pytest from extended_data.connectors.anthropic import AnthropicConnector +from extended_data.connectors.aws import AWSConnector +from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments +from extended_data.connectors.aws.organizations import AWSOrganizationsMixin +from extended_data.connectors.aws.s3 import AWSS3Mixin +from extended_data.connectors.aws.sso import AWSSSOmixin from extended_data.connectors.cursor import CursorConnector from extended_data.connectors.github import GitHubConnector from extended_data.connectors.google import GoogleConnector @@ -25,6 +30,53 @@ (AnthropicConnector.create_message, ExtendedDict), (AnthropicConnector.list_models, ExtendedList[ExtendedDict]), (AnthropicConnector.get_model, ExtendedDict), + (AWSConnector.get_caller_account_id, ExtendedString), + (AWSConnector.get_secret, ExtendedString | None), + (AWSConnector.list_secrets, ExtendedDict), + (AWSConnector.create_secret, ExtendedDict), + (AWSConnector.update_secret, ExtendedDict), + (AWSConnector.delete_secret, ExtendedDict), + (AWSConnector.delete_secrets_matching, ExtendedList[ExtendedString]), + (AWSConnector.copy_secrets_to_s3, ExtendedString), + (AWSConnector.load_vendors_from_asm, ExtendedDict), + (AWSOrganizationsMixin.get_organization_accounts, ExtendedDict), + (AWSOrganizationsMixin.get_controltower_accounts, ExtendedDict), + (AWSOrganizationsMixin.get_accounts, ExtendedDict), + (AWSOrganizationsMixin.get_organization_units, ExtendedDict), + (AWSOrganizationsMixin.classify_accounts, ExtendedDict), + (AWSOrganizationsMixin.label_aws_accounts, ExtendedDict), + (AWSOrganizationsMixin.label_aws_account, ExtendedDict), + (AWSOrganizationsMixin.classify_aws_accounts, ExtendedDict), + (AWSOrganizationsMixin.preprocess_aws_organization, ExtendedDict), + (AWSOrganizationsMixin.preprocess_organization, ExtendedDict), + (AWSS3Mixin.list_s3_buckets, ExtendedDict), + (AWSS3Mixin.get_bucket_location, ExtendedString), + (AWSS3Mixin.get_object, ExtendedString | bytes | None), + (AWSS3Mixin.get_json_object, ExtendedDict | ExtendedList[Any] | None), + (AWSS3Mixin.put_object, ExtendedDict), + (AWSS3Mixin.put_json_object, ExtendedDict), + (AWSS3Mixin.delete_object, ExtendedDict), + (AWSS3Mixin.list_objects, ExtendedList[ExtendedDict]), + (AWSS3Mixin.copy_object, ExtendedDict), + (AWSS3Mixin.get_bucket_features, ExtendedDict), + (AWSS3Mixin.find_buckets_by_name, ExtendedDict), + (AWSS3Mixin.create_bucket, ExtendedDict), + (AWSS3Mixin.get_bucket_tags, ExtendedDict), + (AWSS3Mixin.get_bucket_sizes, ExtendedDict), + (AWSSSOmixin.get_identity_store_id, ExtendedString), + (AWSSSOmixin.get_sso_instance_arn, ExtendedString), + (AWSSSOmixin.list_sso_users, ExtendedDict), + (AWSSSOmixin.get_sso_user, ExtendedDict | None), + (AWSSSOmixin.create_sso_user, ExtendedDict), + (AWSSSOmixin.list_sso_groups, ExtendedDict), + (AWSSSOmixin.create_sso_group, ExtendedDict), + (AWSSSOmixin.add_user_to_group, ExtendedDict), + (AWSSSOmixin.list_permission_sets, ExtendedDict), + (AWSSSOmixin.list_account_assignments, ExtendedList[ExtendedDict]), + (AWSSSOmixin.create_account_assignment, ExtendedDict), + (AWSSSOmixin.delete_account_assignment, ExtendedDict), + (get_aws_codedeploy_deployments, ExtendedDict), + (create_codedeploy_deployment, ExtendedDict), (CursorConnector.list_agents, ExtendedList[ExtendedDict]), (CursorConnector.get_agent_status, ExtendedDict), (CursorConnector.get_agent_conversation, ExtendedDict), From e67d31d7469ccecf1f40d728bbf031601366e2ca Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:10:28 -0500 Subject: [PATCH 096/287] test: guard connector payload boundaries --- .../test_connector_payload_contracts.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 74e4ada..4b90c07 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -2,6 +2,9 @@ from __future__ import annotations +import ast + +from pathlib import Path from typing import Any, get_args, get_origin, get_type_hints import pytest @@ -26,6 +29,8 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +REPO_ROOT = Path(__file__).resolve().parents[2] + PAYLOAD_METHODS = ( (AnthropicConnector.create_message, ExtendedDict), (AnthropicConnector.list_models, ExtendedList[ExtendedDict]), @@ -177,6 +182,53 @@ (ZoomConnector.get_meeting, ExtendedDict), ) +RAW_CONNECTOR_BOUNDARIES = { + ("src/extended_data/connectors/base.py", "VendorConnectorBase.get_tools"), + ("src/extended_data/connectors/connectors.py", "ConnectorFabric.list_connectors"), + ("src/extended_data/connectors/registry.py", "list_connectors"), + ("src/extended_data/connectors/zoom/__init__.py", "ZoomConnector.get_headers"), +} + + +class _RawContainerReturnVisitor(ast.NodeVisitor): + def __init__(self, relative_path: str) -> None: + self.relative_path = relative_path + self.class_stack: list[str] = [] + self.function_depth = 0 + self.offenders: list[str] = [] + + def visit_If(self, node: ast.If) -> None: + if ast.unparse(node.test) == "TYPE_CHECKING": + return + self.generic_visit(node) + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + self.class_stack.append(node.name) + self.generic_visit(node) + self.class_stack.pop() + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._visit_function(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._visit_function(node) + + def _visit_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None: + is_nested_function = self.function_depth > 0 + qualname = ".".join([*self.class_stack, node.name]) + + if not is_nested_function and not node.name.startswith("_") and node.returns is not None: + annotation = ast.unparse(node.returns) + has_raw_container = any(token in annotation for token in ("dict", "list")) + if has_raw_container and "Extended" not in annotation: + boundary = (self.relative_path, qualname) + if boundary not in RAW_CONNECTOR_BOUNDARIES: + self.offenders.append(f"{self.relative_path}:{node.lineno}: {qualname} -> {annotation}") + + self.function_depth += 1 + self.generic_visit(node) + self.function_depth -= 1 + @pytest.mark.parametrize(("method", "expected_return"), PAYLOAD_METHODS) def test_direct_connector_methods_advertise_extended_payloads(method: object, expected_return: object) -> None: @@ -189,3 +241,20 @@ def test_direct_connector_methods_advertise_extended_payloads(method: object, ex return assert return_type == expected_return + + +def test_raw_connector_container_returns_are_explicit_boundaries() -> None: + """Public connector payloads should not drift back to plain dict/list returns.""" + offenders: list[str] = [] + + for path in sorted((REPO_ROOT / "src/extended_data/connectors").rglob("*.py")): + if path.name == "tools.py": + continue + + relative_path = path.relative_to(REPO_ROOT).as_posix() + tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) + visitor = _RawContainerReturnVisitor(relative_path) + visitor.visit(tree) + offenders.extend(visitor.offenders) + + assert offenders == [] From 07f0b4996db6b0e6376ee45c7e29fdcab298de54 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:21:36 -0500 Subject: [PATCH 097/287] fix: harden optional connector extras --- src/extended_data/connectors/ai_tools.py | 25 +++++++- .../connectors/anthropic/tools.py | 18 +----- src/extended_data/connectors/aws/tools.py | 18 +----- src/extended_data/connectors/cursor/tools.py | 18 +----- .../connectors/github/__init__.py | 60 +++++++++++-------- src/extended_data/connectors/github/tools.py | 18 +----- .../connectors/google/__init__.py | 4 +- .../connectors/google/billing.py | 4 +- .../connectors/google/services.py | 7 ++- src/extended_data/connectors/google/tools.py | 18 +----- .../connectors/google/workspace.py | 2 +- src/extended_data/connectors/mcp.py | 2 +- src/extended_data/connectors/meshy/mcp.py | 2 +- src/extended_data/connectors/meshy/tools.py | 18 +----- src/extended_data/connectors/secrets/tools.py | 18 +----- .../connectors/slack/__init__.py | 22 +++---- src/extended_data/connectors/slack/tools.py | 18 +----- src/extended_data/connectors/vault/tools.py | 18 +----- src/extended_data/connectors/zoom/tools.py | 18 +----- .../test_connector_payload_contracts.py | 1 + 20 files changed, 114 insertions(+), 195 deletions(-) diff --git a/src/extended_data/connectors/ai_tools.py b/src/extended_data/connectors/ai_tools.py index 660719d..85afde0 100644 --- a/src/extended_data/connectors/ai_tools.py +++ b/src/extended_data/connectors/ai_tools.py @@ -8,7 +8,8 @@ import builtins -from typing import cast +from collections.abc import Callable, Iterable, Mapping +from typing import Any, cast from pydantic import BaseModel @@ -36,3 +37,25 @@ def get_pydantic_schema(model: builtins.type[BaseModel]) -> ExtendedDict: schema.pop("description", None) return cast(ExtendedDict, extend_data(schema)) + + +def build_langchain_tools(tool_definitions: Iterable[Mapping[str, Any]]) -> list[Any]: + """Build LangChain StructuredTools from connector tool definition mappings.""" + try: + from langchain_core.tools import StructuredTool + except ImportError as e: + msg = "langchain-core is required for LangChain tools.\nInstall with: pip install extended-data[langchain]" + raise ImportError(msg) from e + + tools: list[Any] = [] + for definition in tool_definitions: + args_schema = definition.get("schema") or definition.get("args_schema") + tools.append( + StructuredTool.from_function( + func=cast(Callable[..., Any], definition["func"]), + name=cast(str, definition["name"]), + description=cast(str, definition["description"]), + args_schema=cast(Any, args_schema), + ) + ) + return tools diff --git a/src/extended_data/connectors/anthropic/tools.py b/src/extended_data/connectors/anthropic/tools.py index ce473af..3a6be1c 100644 --- a/src/extended_data/connectors/anthropic/tools.py +++ b/src/extended_data/connectors/anthropic/tools.py @@ -108,21 +108,9 @@ def anthropic_list_models() -> ExtendedList[ExtendedDict]: def get_langchain_tools() -> list[Any]: """Get all Anthropic tools as LangChain StructuredTools.""" - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools." - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index aae1f6d..b3d1025 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -340,21 +340,9 @@ def get_secret(secret_id: str) -> ExtendedDict: def get_langchain_tools() -> list[Any]: """Get all AWS tools as LangChain StructuredTools.""" - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools.\nInstall with: pip install extended-data[langchain]" - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/cursor/tools.py b/src/extended_data/connectors/cursor/tools.py index 4ff8735..f55f203 100644 --- a/src/extended_data/connectors/cursor/tools.py +++ b/src/extended_data/connectors/cursor/tools.py @@ -111,21 +111,9 @@ def cursor_get_agent_status(agent_id: str) -> ExtendedDict: def get_langchain_tools() -> list[Any]: """Get all Cursor tools as LangChain StructuredTools.""" - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools." - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index e2e7994..cb55e45 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -7,7 +7,7 @@ from collections.abc import Mapping, Sequence from copy import deepcopy -from typing import TYPE_CHECKING, Any +from typing import Any from ruamel.yaml import YAML @@ -23,46 +23,58 @@ from extended_data.logging import Logging -if TYPE_CHECKING: - from github import Auth, Github - from github.GithubException import GithubException, UnknownObjectException - from python_graphql_client import GraphqlClient -else: - Auth = None - Github = None - GraphqlClient = None +Auth: Any = None +Github: Any = None +GraphqlClient: Any = None - class GitHubFallbackError(Exception): - """Fallback exception used until PyGithub is imported.""" - GithubException = GitHubFallbackError - UnknownObjectException = GitHubFallbackError +class GitHubFallbackError(Exception): + """Fallback exception used until PyGithub is imported.""" + + +GithubException: Any = GitHubFallbackError +UnknownObjectException: Any = GitHubFallbackError FilePath = str | os.PathLike[str] +def _require_loaded(module: Any | None, module_name: str) -> Any: + if module is None: # pragma: no cover - defensive guard for loader invariants + raise RuntimeError(f"Failed to load optional GitHub dependency module: {module_name}") + return module + + def _load_github_sdk() -> None: """Load GitHub SDK dependencies lazily so tool metadata remains importable.""" global Auth, Github, GithubException, GraphqlClient, UnknownObjectException - if Github is None: + needs_github_module = Auth is None or Github is None + needs_exceptions = GithubException is GitHubFallbackError or UnknownObjectException is GitHubFallbackError + needs_graphql = GraphqlClient is None + + if needs_github_module or needs_exceptions or needs_graphql: try: - github_module = require_extra("github", "github") - github_exceptions = require_extra("github.GithubException", "github") - graphql_module = require_extra("python_graphql_client", "github") + github_module = require_extra("github", "github") if needs_github_module else None + github_exceptions = require_extra("github.GithubException", "github") if needs_exceptions else None + graphql_module = require_extra("python_graphql_client", "github") if needs_graphql else None except ImportError as exc: msg = "PyGithub is required for GitHubConnector. Install with: pip install extended-data[github]" raise ImportError(msg) from exc - Auth = github_module.Auth - Github = github_module.Github - GithubException = github_exceptions.GithubException - UnknownObjectException = github_exceptions.UnknownObjectException - GraphqlClient = graphql_module.GraphqlClient + if Auth is None: + Auth = _require_loaded(github_module, "github").Auth + if Github is None: + Github = _require_loaded(github_module, "github").Github + if GithubException is GitHubFallbackError: + GithubException = _require_loaded(github_exceptions, "github.GithubException").GithubException + if UnknownObjectException is GitHubFallbackError: + UnknownObjectException = _require_loaded(github_exceptions, "github.GithubException").UnknownObjectException + if GraphqlClient is None: + GraphqlClient = _require_loaded(graphql_module, "python_graphql_client").GraphqlClient -def get_github_api_error(exc: GithubException) -> str | None: +def get_github_api_error(exc: BaseException) -> str | None: """Extract error message from a GitHub exception.""" data = getattr(exc, "data", {}) return data.get("message", None) @@ -104,7 +116,7 @@ def __init__( self.logger.warning(f"Repository {self.GITHUB_OWNER}/{self.GITHUB_REPO} does not exist") if github_branch is None and self.repo: - self.GITHUB_BRANCH = self.repo.default_branch + self.GITHUB_BRANCH: str | None = self.repo.default_branch else: self.GITHUB_BRANCH = github_branch diff --git a/src/extended_data/connectors/github/tools.py b/src/extended_data/connectors/github/tools.py index d3823eb..20a15e4 100644 --- a/src/extended_data/connectors/github/tools.py +++ b/src/extended_data/connectors/github/tools.py @@ -308,21 +308,9 @@ def get_repository_file( def get_langchain_tools() -> list[Any]: """Get all GitHub tools as LangChain StructuredTools.""" - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools." - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index a2008a2..0f22a4d 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -116,7 +116,7 @@ def credentials(self) -> service_account.Credentials: Authenticated service account credentials. """ if self._credentials is None: - self._credentials = service_account.Credentials.from_service_account_info( + self._credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] self.service_account_info, scopes=self.scopes, ) @@ -134,7 +134,7 @@ def get_credentials_for_subject(self, subject: str) -> service_account.Credentia Returns: Credentials with the specified subject. """ - return service_account.Credentials.from_service_account_info( + return service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] self.service_account_info, scopes=self.scopes, ).with_subject(subject) diff --git a/src/extended_data/connectors/google/billing.py b/src/extended_data/connectors/google/billing.py index a56e3ba..35bde8a 100644 --- a/src/extended_data/connectors/google/billing.py +++ b/src/extended_data/connectors/google/billing.py @@ -285,7 +285,7 @@ def get_bigquery_billing_dataset( from google.oauth2 import service_account from googleapiclient.discovery import build - credentials = service_account.Credentials.from_service_account_info( + credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] self.service_account_info, scopes=["https://www.googleapis.com/auth/bigquery.readonly"], ) @@ -347,7 +347,7 @@ def setup_billing_export( from google.oauth2 import service_account from googleapiclient.discovery import build - credentials = service_account.Credentials.from_service_account_info( + credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] self.service_account_info, scopes=["https://www.googleapis.com/auth/bigquery"], ) diff --git a/src/extended_data/connectors/google/services.py b/src/extended_data/connectors/google/services.py index f5a5605..73cd067 100644 --- a/src/extended_data/connectors/google/services.py +++ b/src/extended_data/connectors/google/services.py @@ -32,10 +32,13 @@ def _has_http_status(exc: BaseException, status: int) -> bool: def _parse_project_activity_time(value: Any) -> dt.datetime | None: """Parse a Google-style timestamp into an aware UTC datetime.""" - if not isinstance(value, str) or not value.strip(): + if value is None: + return None + + normalized = str(value).strip() + if not normalized: return None - normalized = value.strip() if normalized.endswith("Z"): normalized = f"{normalized[:-1]}+00:00" diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index 1d71085..464f141 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -355,21 +355,9 @@ def get_langchain_tools() -> list[Any]: Raises: ImportError: If langchain-core is not installed. """ - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools.\nInstall with: pip install extended-data[langchain]" - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/google/workspace.py b/src/extended_data/connectors/google/workspace.py index c5722c2..a3c84fc 100644 --- a/src/extended_data/connectors/google/workspace.py +++ b/src/extended_data/connectors/google/workspace.py @@ -568,7 +568,7 @@ def list_available_licenses( from google.oauth2 import service_account from googleapiclient.discovery import build - credentials = service_account.Credentials.from_service_account_info( + credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] self.service_account_info, scopes=["https://www.googleapis.com/auth/apps.licensing"], ) diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index c9e9b21..3d8a02c 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -148,7 +148,7 @@ def create_server() -> Any: "parameters": schema, } - @server.list_tools() # type: ignore[untyped-decorator] + @server.list_tools() # type: ignore[no-untyped-call,untyped-decorator] async def list_tools() -> list[Tool]: """Return all available tools.""" return [ diff --git a/src/extended_data/connectors/meshy/mcp.py b/src/extended_data/connectors/meshy/mcp.py index e31ecc2..28c6eed 100644 --- a/src/extended_data/connectors/meshy/mcp.py +++ b/src/extended_data/connectors/meshy/mcp.py @@ -276,7 +276,7 @@ def create_server() -> Any: tool_list = [tool for tool, _ in mcp_tools] # Register tools - @server.list_tools() # type: ignore[untyped-decorator] + @server.list_tools() # type: ignore[no-untyped-call,untyped-decorator] async def list_tools() -> list[Any]: return tool_list diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index 1479d69..70a3509 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -546,21 +546,9 @@ def get_langchain_tools() -> list[Any]: Raises: ImportError: If langchain-core is not installed. """ - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools.\nInstall with: pip install extended-data[langchain]" - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index 3326f91..b0b241c 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -258,21 +258,9 @@ def get_sources(config_path: str) -> ExtendedDict: def get_langchain_tools() -> list[Any]: """Get all secrets sync tools as LangChain StructuredTools.""" - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools." - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 5a5ee97..e594e05 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -6,7 +6,7 @@ from collections.abc import Iterable, Iterator, Mapping, Sequence from time import sleep -from typing import TYPE_CHECKING, Any +from typing import Any # batched was added in Python 3.12 @@ -29,24 +29,24 @@ def batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: from extended_data.logging import Logging -if TYPE_CHECKING: - from slack_sdk.errors import SlackApiError - from slack_sdk.web import WebClient -else: - WebClient = None +class SlackFallbackError(Exception): + """Fallback exception used until slack-sdk is imported.""" + - class SlackApiError(Exception): - """Fallback exception used until slack-sdk is imported.""" +SlackApiError: Any = SlackFallbackError +WebClient: Any = None def _load_slack_sdk() -> None: """Load slack-sdk lazily so tool metadata can import without the slack extra.""" global SlackApiError, WebClient - if WebClient is None: + if WebClient is None or SlackApiError is SlackFallbackError: try: - SlackApiError = require_extra("slack_sdk.errors", "slack").SlackApiError - WebClient = require_extra("slack_sdk.web", "slack").WebClient + if SlackApiError is SlackFallbackError: + SlackApiError = require_extra("slack_sdk.errors", "slack").SlackApiError + if WebClient is None: + WebClient = require_extra("slack_sdk.web", "slack").WebClient except ImportError as exc: msg = "slack-sdk is required for SlackConnector. Install with: pip install extended-data[slack]" raise ImportError(msg) from exc diff --git a/src/extended_data/connectors/slack/tools.py b/src/extended_data/connectors/slack/tools.py index e22f1d4..824480c 100644 --- a/src/extended_data/connectors/slack/tools.py +++ b/src/extended_data/connectors/slack/tools.py @@ -305,21 +305,9 @@ def get_langchain_tools() -> list[Any]: Raises: ImportError: If langchain-core is not installed. """ - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools.\nInstall with: pip install extended-data[langchain]" - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/vault/tools.py b/src/extended_data/connectors/vault/tools.py index 5e8a1ca..df06cc4 100644 --- a/src/extended_data/connectors/vault/tools.py +++ b/src/extended_data/connectors/vault/tools.py @@ -127,21 +127,9 @@ def read_secret( def get_langchain_tools() -> list[Any]: """Get all Vault tools as LangChain StructuredTools.""" - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools." - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/src/extended_data/connectors/zoom/tools.py b/src/extended_data/connectors/zoom/tools.py index d6e84d5..42300dd 100644 --- a/src/extended_data/connectors/zoom/tools.py +++ b/src/extended_data/connectors/zoom/tools.py @@ -160,21 +160,9 @@ def get_meeting(meeting_id: str) -> ExtendedDict: def get_langchain_tools() -> list[Any]: """Get all Zoom tools as LangChain StructuredTools.""" - try: - from langchain_core.tools import StructuredTool - except ImportError as e: - msg = "langchain-core is required for LangChain tools." - raise ImportError(msg) from e - - return [ - StructuredTool.from_function( - func=defn["func"], - name=defn["name"], - description=defn["description"], - args_schema=defn.get("schema") or defn.get("args_schema"), - ) - for defn in TOOL_DEFINITIONS - ] + from extended_data.connectors.ai_tools import build_langchain_tools + + return build_langchain_tools(TOOL_DEFINITIONS) def get_crewai_tools() -> list[Any]: diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 4b90c07..b863d22 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -183,6 +183,7 @@ ) RAW_CONNECTOR_BOUNDARIES = { + ("src/extended_data/connectors/ai_tools.py", "build_langchain_tools"), ("src/extended_data/connectors/base.py", "VendorConnectorBase.get_tools"), ("src/extended_data/connectors/connectors.py", "ConnectorFabric.list_connectors"), ("src/extended_data/connectors/registry.py", "list_connectors"), From 9982e8726d5ccf94dbfc2a15e40a675ee0dd1cbe Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:29:13 -0500 Subject: [PATCH 098/287] fix: keep optional connector typing environment-neutral --- src/extended_data/connectors/google/__init__.py | 8 +++++--- src/extended_data/connectors/google/billing.py | 8 +++++--- src/extended_data/connectors/google/workspace.py | 5 +++-- src/extended_data/connectors/mcp.py | 9 ++++++--- src/extended_data/connectors/meshy/mcp.py | 9 ++++++--- src/extended_data/connectors/slack/__init__.py | 9 ++++++--- 6 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index 0f22a4d..c341dab 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -5,7 +5,7 @@ import json from collections.abc import Sequence -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase @@ -116,7 +116,8 @@ def credentials(self) -> service_account.Credentials: Authenticated service account credentials. """ if self._credentials is None: - self._credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] + credentials_class = cast(Any, service_account.Credentials) + self._credentials = credentials_class.from_service_account_info( self.service_account_info, scopes=self.scopes, ) @@ -134,7 +135,8 @@ def get_credentials_for_subject(self, subject: str) -> service_account.Credentia Returns: Credentials with the specified subject. """ - return service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] + credentials_class = cast(Any, service_account.Credentials) + return credentials_class.from_service_account_info( self.service_account_info, scopes=self.scopes, ).with_subject(subject) diff --git a/src/extended_data/connectors/google/billing.py b/src/extended_data/connectors/google/billing.py index 35bde8a..ddab375 100644 --- a/src/extended_data/connectors/google/billing.py +++ b/src/extended_data/connectors/google/billing.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from extended_data import unhump_map from extended_data.containers import ExtendedDict, ExtendedList, to_builtin @@ -285,7 +285,8 @@ def get_bigquery_billing_dataset( from google.oauth2 import service_account from googleapiclient.discovery import build - credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] + credentials_class = cast(Any, service_account.Credentials) + credentials = credentials_class.from_service_account_info( self.service_account_info, scopes=["https://www.googleapis.com/auth/bigquery.readonly"], ) @@ -347,7 +348,8 @@ def setup_billing_export( from google.oauth2 import service_account from googleapiclient.discovery import build - credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] + credentials_class = cast(Any, service_account.Credentials) + credentials = credentials_class.from_service_account_info( self.service_account_info, scopes=["https://www.googleapis.com/auth/bigquery"], ) diff --git a/src/extended_data/connectors/google/workspace.py b/src/extended_data/connectors/google/workspace.py index a3c84fc..136cb1c 100644 --- a/src/extended_data/connectors/google/workspace.py +++ b/src/extended_data/connectors/google/workspace.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from extended_data import unhump_map from extended_data.containers import ExtendedDict, ExtendedList, to_builtin @@ -568,7 +568,8 @@ def list_available_licenses( from google.oauth2 import service_account from googleapiclient.discovery import build - credentials = service_account.Credentials.from_service_account_info( # type: ignore[no-untyped-call] + credentials_class = cast(Any, service_account.Credentials) + credentials = credentials_class.from_service_account_info( self.service_account_info, scopes=["https://www.googleapis.com/auth/apps.licensing"], ) diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 3d8a02c..734bba6 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -26,7 +26,7 @@ import sys from collections.abc import Callable -from typing import Any +from typing import Any, cast from extended_data.connectors.registry import get_connector, list_connectors @@ -148,7 +148,10 @@ def create_server() -> Any: "parameters": schema, } - @server.list_tools() # type: ignore[no-untyped-call,untyped-decorator] + tool_decorator = cast(Callable[[], Callable[[Callable[..., Any]], Callable[..., Any]]], server.list_tools) + call_decorator = cast(Callable[[], Callable[[Callable[..., Any]], Callable[..., Any]]], server.call_tool) + + @tool_decorator() async def list_tools() -> list[Tool]: """Return all available tools.""" return [ @@ -156,7 +159,7 @@ async def list_tools() -> list[Tool]: for name, tool in tools.items() ] - @server.call_tool() # type: ignore[untyped-decorator] + @call_decorator() async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: """Execute a tool and return results.""" if name not in tools: diff --git a/src/extended_data/connectors/meshy/mcp.py b/src/extended_data/connectors/meshy/mcp.py index 28c6eed..464b7c9 100644 --- a/src/extended_data/connectors/meshy/mcp.py +++ b/src/extended_data/connectors/meshy/mcp.py @@ -32,7 +32,7 @@ import json from collections.abc import Callable -from typing import Any +from typing import Any, cast MCP_INSTALL_MESSAGE = "MCP SDK not installed. Install with: pip install extended-data[meshy,mcp]" @@ -275,13 +275,16 @@ def create_server() -> Any: tool_handlers = {tool.name: func for tool, func in mcp_tools} tool_list = [tool for tool, _ in mcp_tools] + tool_decorator = cast(Callable[[], Callable[[Callable[..., Any]], Callable[..., Any]]], server.list_tools) + call_decorator = cast(Callable[[], Callable[[Callable[..., Any]], Callable[..., Any]]], server.call_tool) + # Register tools - @server.list_tools() # type: ignore[no-untyped-call,untyped-decorator] + @tool_decorator() async def list_tools() -> list[Any]: return tool_list # Handle tool calls - @server.call_tool() # type: ignore[untyped-decorator] + @call_decorator() async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]: from mcp.types import TextContent diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index e594e05..5c885fc 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -5,6 +5,7 @@ import sys from collections.abc import Iterable, Iterator, Mapping, Sequence +from contextlib import suppress from time import sleep from typing import Any @@ -41,15 +42,17 @@ def _load_slack_sdk() -> None: """Load slack-sdk lazily so tool metadata can import without the slack extra.""" global SlackApiError, WebClient - if WebClient is None or SlackApiError is SlackFallbackError: + if WebClient is None: try: if SlackApiError is SlackFallbackError: SlackApiError = require_extra("slack_sdk.errors", "slack").SlackApiError - if WebClient is None: - WebClient = require_extra("slack_sdk.web", "slack").WebClient + WebClient = require_extra("slack_sdk.web", "slack").WebClient except ImportError as exc: msg = "slack-sdk is required for SlackConnector. Install with: pip install extended-data[slack]" raise ImportError(msg) from exc + elif SlackApiError is SlackFallbackError: + with suppress(ImportError): + SlackApiError = require_extra("slack_sdk.errors", "slack").SlackApiError # Settings From 38e12d34a6e9334ae20c454136162cefbb431721 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:32:37 -0500 Subject: [PATCH 099/287] refactor: remove duplicate secrets tools path --- docs/package-surface.md | 2 ++ src/extended_data/secrets/tools.py | 4 ---- tests/core/test_package_surface.py | 11 ++++++----- 3 files changed, 8 insertions(+), 9 deletions(-) delete mode 100644 src/extended_data/secrets/tools.py diff --git a/docs/package-surface.md b/docs/package-surface.md index f4ff0d6..7ef0cc1 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -195,6 +195,8 @@ The `secrets` adapter is the Python-facing bridge to the standalone `secretsync` project. It uses native bindings when present and otherwise falls back to the CLI, which must emit the stable `secretsync pipeline --output json` result envelope for both dry-run and apply runs. +Secrets tool factories are exported from `extended_data.secrets`; the duplicate +`extended_data.secrets.tools` module path is intentionally not preserved. ```python from extended_data import SecretsConnector, SyncOptions diff --git a/src/extended_data/secrets/tools.py b/src/extended_data/secrets/tools.py deleted file mode 100644 index e576d67..0000000 --- a/src/extended_data/secrets/tools.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Tool exports for secret synchronization workflows.""" - -from extended_data.connectors.secrets.tools import * # noqa: F403 -from extended_data.connectors.secrets.tools import __all__ # noqa: F401 diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index fd4f17a..2e153b6 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -2,12 +2,12 @@ from __future__ import annotations +from importlib import util from importlib.metadata import version from types import ModuleType import extended_data import extended_data.logging as lifecycle_logging -import extended_data.secrets.tools as secrets_tools from extended_data import connectors, containers, inputs, io, primitives, secrets, workflows from extended_data.connectors.connectors import ConnectorFabric @@ -120,7 +120,8 @@ def test_aws_full_connector_keeps_operation_mixins_without_aws_extra() -> None: assert callable(connectors.AWSConnectorFull.list_sso_users) -def test_secrets_tools_alias_preserves_public_exports() -> None: - """The shorter secrets tool path mirrors the canonical connector module.""" - assert "run_pipeline" in secrets_tools.__all__ - assert callable(secrets_tools.run_pipeline) +def test_clean_major_version_does_not_preserve_duplicate_tool_modules() -> None: + """Secrets tool factories live on the package root and connector implementation module.""" + assert util.find_spec("extended_data.secrets.tools") is None + assert callable(secrets.get_tools) + assert callable(connectors.SecretsConnector) From 8f3103b5d083a0fa6f25987a380946b166aafbb5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:35:17 -0500 Subject: [PATCH 100/287] test: enforce clean major import surface --- docs/package-surface.md | 3 +++ tests/core/test_package_surface.py | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/docs/package-surface.md b/docs/package-surface.md index 7ef0cc1..78d8950 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -2,6 +2,9 @@ `extended-data` is one Python distribution with a single `extended_data` namespace. The root package exposes the primitives users need most often: +The old `extended_data_types`, `lifecyclelogging`, +`directed_inputs_class`, and `vendor_connectors` import namespaces are not +preserved in this major version. ```python from extended_data import ( diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 2e153b6..564866c 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -81,6 +81,19 @@ def test_clean_major_version_public_names() -> None: assert not hasattr(connectors, "VendorConnectors") +def test_old_monorepo_import_namespaces_are_not_preserved() -> None: + """Old package import namespaces should remain absent in the clean major version.""" + old_namespaces = ( + "directed_inputs_class", + "extended_data_types", + "lifecyclelogging", + "vendor_connectors", + ) + + for namespace in old_namespaces: + assert util.find_spec(namespace) is None + + def test_root_exports_first_class_integrated_primitives() -> None: """Inputs, logging, and connector fabric are available from the root package.""" assert extended_data.DataDecodeError.__name__ == "DataDecodeError" From 3ae31349b80b3916d51f0610e60de8bdff220c71 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:42:54 -0500 Subject: [PATCH 101/287] fix: preserve tier two container operators --- docs/package-surface.md | 4 +++ src/extended_data/containers/mappings.py | 13 +++++++ src/extended_data/containers/sequences.py | 36 +++++++++++++++++++ tests/core/test_containers.py | 43 +++++++++++++++++++++++ 4 files changed, 96 insertions(+) diff --git a/docs/package-surface.md b/docs/package-surface.md index 78d8950..0792556 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -65,6 +65,10 @@ payload = ExtendedDict({"service": {"name": "api"}}) payload["service"]["name"].upper_first() ``` +Mutation and common operator paths are part of that contract: `setdefault()`, +in-place dict merge, tuple slicing, tuple concatenation, and tuple repetition +preserve Tier 2 containers instead of leaking plain nested values. + Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected `ExtendedDict` values, and `ExtendedDict.all_values()` returns an diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index d26ac89..52f3997 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -6,6 +6,8 @@ from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING, Any, overload +from typing_extensions import Self + if TYPE_CHECKING: from _typeshed import SupportsKeysAndGetItem @@ -68,6 +70,17 @@ def update(self, *args: Any, **kwargs: Any) -> None: # type: ignore[misc] for key, value in kwargs.items(): self[key] = value + def setdefault(self, key: str, default: Any = None) -> Any: + """Insert a default while returning the promoted stored value.""" + if key not in self.data: + self[key] = default + return self.data[key] + + def __ior__(self, other: Any) -> Self: # type: ignore[override,misc] + """Update from a mapping or item iterable while preserving extended containers.""" + self.update(other) + return self + def deep_merge(self, *mappings: Mapping[str, Any]) -> ExtendedDict: """Return a deeply merged copy.""" from extended_data.containers.factory import extend_data, to_builtin diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index 212c281..e6ab2aa 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -4,6 +4,7 @@ from collections import UserList from collections.abc import Callable, Iterable, Iterator, MutableSet +from operator import index as operator_index from typing import Any, SupportsIndex, TypeVar, cast, overload from extended_data.primitives.sequences import flatten_list @@ -103,6 +104,41 @@ def _wrap_item(item: T) -> T: return cast(T, extend_data(item)) + @overload + def __getitem__(self, index: SupportsIndex) -> T: ... + + @overload + def __getitem__(self, index: slice) -> ExtendedTuple[T]: ... + + def __getitem__(self, index: SupportsIndex | slice) -> T | ExtendedTuple[T]: + """Return sliced values as ExtendedTuple instances.""" + value = super().__getitem__(index) + if isinstance(index, slice): + return ExtendedTuple(cast(tuple[T, ...], value)) + return cast(T, value) + + @overload + def __add__(self, other: tuple[T, ...]) -> ExtendedTuple[T]: ... + + @overload + def __add__(self, other: tuple[U, ...]) -> ExtendedTuple[T | U]: ... + + def __add__(self, other: tuple[Any, ...]) -> ExtendedTuple[Any]: + """Concatenate tuples while preserving the ExtendedTuple surface.""" + return ExtendedTuple((*tuple(self), *other)) + + def __radd__(self, other: tuple[Any, ...]) -> ExtendedTuple[Any]: + """Concatenate tuples while preserving the ExtendedTuple surface.""" + return ExtendedTuple((*other, *tuple(self))) + + def __mul__(self, count: SupportsIndex) -> ExtendedTuple[T]: + """Repeat tuple values while preserving the ExtendedTuple surface.""" + return ExtendedTuple(tuple(self) * operator_index(count)) + + def __rmul__(self, count: SupportsIndex) -> ExtendedTuple[T]: + """Repeat tuple values while preserving the ExtendedTuple surface.""" + return self * count + def flatten(self) -> ExtendedTuple[Any]: """Return a recursively flattened tuple copy.""" from extended_data.containers.factory import to_builtin diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index cf39249..ca29329 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -59,6 +59,11 @@ def test_extended_dict_promotes_nested_values_on_mutation() -> None: value.update({"ports": [8080, "9090"]}) value.update([("metadata", {"tier": "prod"})], runtime={"python": "3.13"}) value.update(other={"literal": "key"}) + defaulted = value.setdefault("labels", {"team": "data"}) + existing = value.setdefault("labels", {"team": "ignored"}) + merged = value | {"deployment": {"region": "us-east-1"}} + right_merged = {"cluster": {"name": "primary"}} | value + value |= {"settings": {"debug": "false"}} assert isinstance(value["service"], ExtendedDict) assert isinstance(value["service"]["name"], ExtendedString) @@ -71,6 +76,18 @@ def test_extended_dict_promotes_nested_values_on_mutation() -> None: assert isinstance(value["runtime"]["python"], ExtendedString) assert isinstance(value["other"], ExtendedDict) assert isinstance(value["other"]["literal"], ExtendedString) + assert isinstance(defaulted, ExtendedDict) + assert isinstance(defaulted["team"], ExtendedString) + assert existing is defaulted + assert value["labels"]["team"] == "data" + assert isinstance(value["settings"], ExtendedDict) + assert isinstance(value["settings"]["debug"], ExtendedString) + assert isinstance(merged, ExtendedDict) + assert isinstance(merged["deployment"], ExtendedDict) + assert isinstance(merged["deployment"]["region"], ExtendedString) + assert isinstance(right_merged, ExtendedDict) + assert isinstance(right_merged["cluster"], ExtendedDict) + assert isinstance(right_merged["cluster"]["name"], ExtendedString) assert value["service"]["name"].upper_first() == "Api" @@ -148,6 +165,32 @@ def test_extended_tuple_promotes_nested_values() -> None: assert to_builtin(value) == ({"name": "api"}, ["jobs"]) +def test_extended_tuple_preserves_surface_for_builtin_tuple_operations() -> None: + """Inherited tuple operations should not leak plain tuple results.""" + value = ExtendedTuple(({"name": "api"}, ["jobs"])) + prefix = ({"name": "gateway"},) + suffix = ({"name": "worker"},) + + sliced = value[:1] + added = value + suffix + right_added = prefix + value + repeated = value * 2 + right_repeated = 2 * value + + assert isinstance(sliced, ExtendedTuple) + assert isinstance(added, ExtendedTuple) + assert isinstance(right_added, ExtendedTuple) + assert isinstance(repeated, ExtendedTuple) + assert isinstance(right_repeated, ExtendedTuple) + assert isinstance(sliced[0], ExtendedDict) + assert isinstance(added[2], ExtendedDict) + assert isinstance(added[2]["name"], ExtendedString) + assert isinstance(right_added[0], ExtendedDict) + assert isinstance(right_added[0]["name"], ExtendedString) + assert isinstance(repeated[2], ExtendedDict) + assert isinstance(right_repeated[2], ExtendedDict) + + def test_extend_data_recursively_wraps_builtin_containers() -> None: """The container factory promotes plain values into the Tier 2 surface.""" wrapped = extend_data( From 4c023deb08aa4e02ddbfd471aead76da3a0f2185 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:50:05 -0500 Subject: [PATCH 102/287] feat: expose builtin connectors at package root --- README.md | 7 +++++ docs/package-surface.md | 14 ++++++---- src/extended_data/__init__.py | 39 ++++++++++++++++++++++++++++ src/extended_data/connectors/base.py | 6 ++--- tests/core/test_package_surface.py | 9 +++++++ 5 files changed, 67 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 59b6e99..69e2579 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,13 @@ github = connectors.get_connector( ) ``` +Built-in connector classes are also package-root exports when direct +construction reads better: + +```python +from extended_data import GitHubConnector, SlackConnector +``` + Connector names are normalized before lookup. If a known built-in connector is requested without its optional extra installed, the registry raises an `ImportError` with the matching `extended-data[...]` install target. diff --git a/docs/package-surface.md b/docs/package-surface.md index 0792556..4e2f802 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -1,7 +1,8 @@ # Package Surface `extended-data` is one Python distribution with a single `extended_data` -namespace. The root package exposes the primitives users need most often: +namespace. The root package exposes the primitives and adapters users need most +often. The old `extended_data_types`, `lifecyclelogging`, `directed_inputs_class`, and `vendor_connectors` import namespaces are not preserved in this major version. @@ -16,9 +17,12 @@ from extended_data import ( ExtendedSet, ExtendedString, ExtendedTuple, + GitHubConnector, + GoogleConnector, InputProvider, Logging, SecretsConnector, + SlackConnector, SyncOptions, decode_json, extend_data, @@ -172,10 +176,10 @@ instances by connector type and constructor inputs. Generic connector names are stripped and lowercased before lookup. Every built-in connector class registered by name is also exported from -`extended_data.connectors`. Those exports are real classes, not `None` -sentinels. Vendor SDKs load when connector instances need them, so package -import remains lightweight while missing optional extras still fail at the -operation boundary with install guidance. +`extended_data` and `extended_data.connectors`. Those exports are real classes, +not `None` sentinels. Vendor SDKs load when connector instances need them, so +package import remains lightweight while missing optional extras still fail at +the operation boundary with install guidance. Connectors that inherit `VendorConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 7e495d9..a16e728 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -133,9 +133,22 @@ if TYPE_CHECKING: from extended_data.connectors import ( + AnthropicConnector, + AWSConnector, ConnectorFabric, ConnectorInfo, + CursorConnector, + GitHubConnector, + GoogleBillingConnector, + GoogleCloudConnector, + GoogleConnector, + GoogleWorkspaceConnector, + JulesConnector, + MeshyConnector, + SlackConnector, + VaultConnector, VendorConnectorBase, + ZoomConnector, get_connector, get_connector_class, get_connector_info, @@ -148,18 +161,31 @@ _LAZY_EXPORTS = { + "AWSConnector": ("extended_data.connectors", "AWSConnector"), + "AnthropicConnector": ("extended_data.connectors", "AnthropicConnector"), "ConnectorFabric": ("extended_data.connectors", "ConnectorFabric"), "ConnectorInfo": ("extended_data.connectors", "ConnectorInfo"), + "CursorConnector": ("extended_data.connectors", "CursorConnector"), "ExitRunError": ("extended_data.logging", "ExitRunError"), + "GitHubConnector": ("extended_data.connectors", "GitHubConnector"), + "GoogleBillingConnector": ("extended_data.connectors", "GoogleBillingConnector"), + "GoogleCloudConnector": ("extended_data.connectors", "GoogleCloudConnector"), + "GoogleConnector": ("extended_data.connectors", "GoogleConnector"), + "GoogleWorkspaceConnector": ("extended_data.connectors", "GoogleWorkspaceConnector"), "InputProvider": ("extended_data.inputs", "InputProvider"), + "JulesConnector": ("extended_data.connectors", "JulesConnector"), "KeyTransform": ("extended_data.logging", "KeyTransform"), "Logging": ("extended_data.logging", "Logging"), + "MeshyConnector": ("extended_data.connectors", "MeshyConnector"), "OutputFormat": ("extended_data.secrets", "OutputFormat"), "SecretsConnector": ("extended_data.secrets", "SecretsConnector"), + "SlackConnector": ("extended_data.connectors", "SlackConnector"), "SyncOperation": ("extended_data.secrets", "SyncOperation"), "SyncOptions": ("extended_data.secrets", "SyncOptions"), "SyncResult": ("extended_data.secrets", "SyncResult"), + "VaultConnector": ("extended_data.connectors", "VaultConnector"), "VendorConnectorBase": ("extended_data.connectors", "VendorConnectorBase"), + "ZoomConnector": ("extended_data.connectors", "ZoomConnector"), "directed_inputs": ("extended_data.inputs", "directed_inputs"), "get_connector": ("extended_data.connectors", "get_connector"), "get_connector_class": ("extended_data.connectors", "get_connector_class"), @@ -182,8 +208,11 @@ def __getattr__(name: str) -> Any: __all__ = [ + "AWSConnector", + "AnthropicConnector", "ConnectorFabric", "ConnectorInfo", + "CursorConnector", "DataDecodeError", "DataWorkflow", "ExitRunError", @@ -193,20 +222,30 @@ def __getattr__(name: str) -> Any: "ExtendedString", "ExtendedTuple", "FilePath", + "GitHubConnector", + "GoogleBillingConnector", + "GoogleCloudConnector", + "GoogleConnector", + "GoogleWorkspaceConnector", "InputProvider", + "JulesConnector", "KeyTransform", "Logging", + "MeshyConnector", "OutputFormat", "SecretsConnector", + "SlackConnector", "SortedDefaultDict", "StepLike", "SyncOperation", "SyncOptions", "SyncResult", + "VaultConnector", "VendorConnectorBase", "WorkflowAction", "WorkflowResult", "WorkflowStep", + "ZoomConnector", "__version__", "all_non_empty", "all_non_empty_in_dict", diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 5d6a34c..9ed7923 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -11,7 +11,7 @@ ALL connectors should extend this class instead of InputProvider directly. Usage: - from extended_data.connectors.base import VendorConnectorBase + from extended_data import ExtendedDict, VendorConnectorBase class MyConnector(VendorConnectorBase): API_KEY_ENV = "MY_API_KEY" # Required env var name @@ -21,8 +21,8 @@ def __init__(self, api_key: str | None = None, **kwargs): super().__init__(**kwargs) self._api_key = api_key or self.get_input(self.API_KEY_ENV, required=True) - def my_operation(self) -> dict: - return self.request("GET", "/endpoint") + def my_operation(self) -> ExtendedDict: + return self.request_data("GET", "/endpoint", suffix="json") """ from __future__ import annotations diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 564866c..0b48844 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -126,6 +126,15 @@ def test_connectors_root_exports_builtin_connector_classes() -> None: assert value.__name__ == spec.class_name +def test_package_root_exports_builtin_connector_classes() -> None: + """Built-in connector classes are first-class root package exports.""" + for spec in BUILTIN_CONNECTORS.values(): + root_value = getattr(extended_data, spec.class_name) + connector_value = getattr(connectors, spec.class_name) + + assert root_value is connector_value + + def test_aws_full_connector_keeps_operation_mixins_without_aws_extra() -> None: """AWSConnectorFull should expose real operation mixins even before boto3 is installed.""" assert callable(connectors.AWSConnectorFull.list_s3_buckets) From 46c1b6371421f3865d638117b9359afb0cad65f7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 07:56:40 -0500 Subject: [PATCH 103/287] feat: add explicit input state snapshots --- README.md | 8 +++-- docs/package-surface.md | 5 ++- src/extended_data/inputs/__main__.py | 27 ++++++++++++++ tests/inputs/test_main.py | 53 ++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 69e2579..b9a495e 100644 --- a/README.md +++ b/README.md @@ -138,9 +138,11 @@ apply named transformations, write an output artifact, and keep the step trail in a `WorkflowResult`. Missing workflow inputs and empty writes fail loudly. `InputProvider` stores its active, frozen, and merged input snapshots as `ExtendedDict` values, so direct input-data access can use Tier 2 container -methods. `get_input()` remains the scalar coercion boundary for booleans, -numbers, paths, datetimes, and credential strings; pass `as_extended=True` when -an injected raw input value should stay in Tier 2 form. +methods. `snapshot_inputs()` returns detached active or frozen snapshots, and +`replace_inputs()` installs a new active snapshot while clearing stale frozen +state by default. `get_input()` remains the scalar coercion boundary for +booleans, numbers, paths, datetimes, and credential strings; pass +`as_extended=True` when an injected raw input value should stay in Tier 2 form. `Logging` stores marked log message collections as `ExtendedDict` and `ExtendedSet` values while keeping Python logger and handler objects plain. diff --git a/docs/package-surface.md b/docs/package-surface.md index 4e2f802..150af25 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -129,12 +129,15 @@ payloads. Active, frozen, shifted, and merged input snapshots are `ExtendedDict` values, and input decorator metadata/options are promoted the same way. The old case-insensitive input mapping is intentionally not preserved; exact keys keep configuration wiring explicit while still letting direct snapshots use Tier 2 -methods. +methods. Use `snapshot_inputs()` for a detached promoted copy of active or +frozen state, and `replace_inputs()` when a workflow should install a new +active snapshot instead of mutating `.inputs` directly. ```python inputs = InputProvider(inputs={"service": {"name": "api"}}, from_environment=False) assert inputs.inputs["service"]["name"].upper_first() == "Api" assert isinstance(inputs.merge_inputs({"service": {"region": "us-east-1"}}), ExtendedDict) +assert inputs.snapshot_inputs()["service"]["region"].upper_first() == "Us-east-1" ``` `get_input()` is the scalar coercion boundary for booleans, numbers, paths, diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 6bf3e9a..d1f854e 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -337,6 +337,33 @@ def thaw_inputs(self) -> ExtendedDict: self.frozen_inputs = ExtendedDict() return self.inputs + def snapshot_inputs(self, *, frozen: bool = False) -> ExtendedDict: + """Return a detached Tier 2 snapshot of active or frozen inputs. + + Args: + frozen (bool): Return frozen inputs instead of active inputs. + + Returns: + ExtendedDict: A promoted copy of the requested input state. + """ + source = self.frozen_inputs if frozen else self.inputs + return ExtendedDict(deepcopy(to_builtin(source))) + + def replace_inputs(self, new_inputs: Mapping[str, Any] | None, *, clear_frozen: bool = True) -> ExtendedDict: + """Replace active inputs with a normalized Tier 2 snapshot. + + Args: + new_inputs (Mapping[str, Any] | None): New active input values. + clear_frozen (bool): Whether to clear frozen inputs after replacement. + + Returns: + ExtendedDict: The updated active input mapping. + """ + self.inputs = ExtendedDict(deepcopy(self._normalize_inputs(new_inputs))) + if clear_frozen: + self.frozen_inputs = ExtendedDict() + return self.inputs + def merge_inputs(self, new_inputs: Mapping[str, Any] | None) -> ExtendedDict: """Merge new inputs into the current inputs using deep merge semantics. diff --git a/tests/inputs/test_main.py b/tests/inputs/test_main.py index 32ee674..ad09f5b 100644 --- a/tests/inputs/test_main.py +++ b/tests/inputs/test_main.py @@ -323,6 +323,59 @@ def test_thaw_inputs(): assert dic.frozen_inputs == {} +def test_snapshot_inputs_returns_detached_extended_copy(): + """Input snapshots are promoted copies, not mutable internal state.""" + dic = InputProvider(inputs={"service": {"name": "api"}}) + + snapshot = dic.snapshot_inputs() + snapshot["service"]["name"] = "worker" + + assert isinstance(snapshot, ExtendedDict) + assert isinstance(snapshot["service"], ExtendedDict) + assert isinstance(snapshot["service"]["name"], ExtendedString) + assert dic.inputs["service"]["name"] == "api" + assert dic.snapshot_inputs()["service"]["name"].upper_first() == "Api" + + +def test_snapshot_inputs_can_select_frozen_state(): + """Frozen input snapshots can be inspected without thawing state.""" + dic = InputProvider(inputs={"service": {"name": "api"}}, from_environment=False) + dic.freeze_inputs() + + frozen = dic.snapshot_inputs(frozen=True) + + assert isinstance(frozen, ExtendedDict) + assert isinstance(frozen["service"], ExtendedDict) + assert frozen["service"]["name"].upper_first() == "Api" + assert dic.inputs == {} + assert dic.frozen_inputs["service"]["name"] == "api" + + +def test_replace_inputs_promotes_values_and_clears_frozen_state_by_default(): + """Replacing inputs should be explicit and should not keep stale frozen state.""" + dic = InputProvider(inputs={"service": {"name": "api"}}, from_environment=False) + dic.freeze_inputs() + + replaced = dic.replace_inputs({"service": {"name": "worker"}}) + + assert isinstance(replaced, ExtendedDict) + assert isinstance(replaced["service"], ExtendedDict) + assert replaced["service"]["name"].upper_first() == "Worker" + assert dic.inputs["service"]["name"] == "worker" + assert dic.frozen_inputs == {} + + +def test_replace_inputs_can_preserve_frozen_state_when_requested(): + """Replacement can keep frozen inputs for explicit staged-state workflows.""" + dic = InputProvider(inputs={"service": {"name": "api"}}, from_environment=False) + dic.freeze_inputs() + + dic.replace_inputs({"region": "us-east-1"}, clear_frozen=False) + + assert dic.inputs["region"].upper_first() == "Us-east-1" + assert dic.snapshot_inputs(frozen=True)["service"]["name"].upper_first() == "Api" + + def test_shift_inputs(): """Test shifting between frozen and thawed inputs. From 6cafc8c41a001cc8ba95144e78d4e41c86298058 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:00:32 -0500 Subject: [PATCH 104/287] docs: align examples with package fabric --- examples/core/composed_workflows.py | 4 ++-- examples/core/file_operations.py | 4 ++-- examples/core/serialization.py | 4 ++-- examples/core/string_transformations.py | 4 ++-- examples/inputs/README.md | 3 ++- examples/inputs/basic_usage.py | 11 +++++++++-- src/extended_data/connectors/base.py | 2 +- src/extended_data/inputs/__main__.py | 14 ++++++-------- 8 files changed, 26 insertions(+), 20 deletions(-) diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index 657d948..9e620ab 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """End-to-end workflow examples for Extended Data core. -This script demonstrates how the library's smaller helpers compose into more -complete configuration and payload pipelines. +This script demonstrates how package primitives, containers, and processors +compose into complete configuration and payload pipelines. """ from __future__ import annotations diff --git a/examples/core/file_operations.py b/examples/core/file_operations.py index a94842d..6b45ce2 100755 --- a/examples/core/file_operations.py +++ b/examples/core/file_operations.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -"""File operation examples for Extended Data core library. +"""File operation examples for the Extended Data core package. This module demonstrates file path utilities, encoding detection, -and file read/write operations provided by the library. +and file read/write operations provided by the package. """ from __future__ import annotations diff --git a/examples/core/serialization.py b/examples/core/serialization.py index e8f6d85..a8a03e2 100755 --- a/examples/core/serialization.py +++ b/examples/core/serialization.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -"""Serialization examples for Extended Data core library. +"""Serialization examples for the Extended Data core package. This module demonstrates YAML, JSON, TOML, HCL, and Base64 encoding/decoding -utilities provided by the library. +utilities provided by the package. """ from __future__ import annotations diff --git a/examples/core/string_transformations.py b/examples/core/string_transformations.py index 9e5e629..294e8d9 100755 --- a/examples/core/string_transformations.py +++ b/examples/core/string_transformations.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -"""String transformation examples for Extended Data core library. +"""String transformation examples for the Extended Data core package. This module demonstrates case conversion, humanization, pluralization, -and other string manipulation utilities provided by the library. +and other string manipulation utilities provided by the package. """ from __future__ import annotations diff --git a/examples/inputs/README.md b/examples/inputs/README.md index c8e6439..5636b70 100644 --- a/examples/inputs/README.md +++ b/examples/inputs/README.md @@ -24,7 +24,8 @@ uv run python examples/inputs/encoding_decoding.py Demonstrates the `InputProvider` API: - Loading inputs from environment variables - Environment variable prefix filtering -- Direct `ExtendedDict`/`ExtendedString` input snapshot access +- Direct `ExtendedDict`/`ExtendedString` input snapshot access with `snapshot_inputs()` +- Active input replacement with `replace_inputs()` - Type conversion (boolean, integer, float) - Default values - Input freezing and thawing diff --git a/examples/inputs/basic_usage.py b/examples/inputs/basic_usage.py index c9dd423..d292f48 100644 --- a/examples/inputs/basic_usage.py +++ b/examples/inputs/basic_usage.py @@ -5,10 +5,12 @@ - Loading inputs from environment variables - Providing default values - Type conversion (boolean, integer, float) +- Detached Tier 2 input snapshots +- Explicit input replacement - Input freezing and thawing Run with: - python -m examples.basic_usage + python examples/inputs/basic_usage.py """ from __future__ import annotations @@ -39,13 +41,18 @@ def main() -> None: inputs.get_input("TIMEOUT", is_float=True) inputs.get_input("NAME") inputs.inputs["NAME"].to_snake_case() + inputs.snapshot_inputs()["NAME"].to_snake_case() # Demonstrate default values inputs.get_input("LOG_LEVEL", default="INFO") + # Replace active inputs with a new promoted snapshot + inputs.replace_inputs({"SERVICE": {"name": "api"}}, clear_frozen=True) + inputs.snapshot_inputs()["SERVICE"]["name"].upper_first() + # Demonstrate freeze/thaw functionality inputs.freeze_inputs() - + inputs.snapshot_inputs(frozen=True)["SERVICE"]["name"].upper_first() inputs.thaw_inputs() diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 9ed7923..5918685 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -1,7 +1,7 @@ """Base class for all extended data connectors. This module provides VendorConnectorBase - the foundation for ALL connectors -in this library. It extends InputProvider and provides: +in the package connector fabric. It extends InputProvider and provides: 1. Credential loading from env vars, stdin, or direct inputs 2. HTTP client with retries and rate limiting diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index d1f854e..022ea0f 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -1,9 +1,9 @@ -"""Module to handle directed inputs for the InputProvider library. +"""Tier 3 directed input processing for the extended-data package. -This module provides functionality for managing inputs from various sources -(environment, stdin) and allows for dynamic merging, freezing, and thawing -of inputs. It includes methods to decode inputs from JSON, YAML, and Base64 -formats, as well as handling boolean and integer conversions. +This module manages inputs from environment variables, stdin, and explicit +mappings. It can merge, replace, snapshot, freeze, and thaw input state while +keeping public snapshots in Tier 2 containers. It also decodes inputs from JSON, +YAML, and Base64 and coerces scalar values through Tier 1 type primitives. """ from __future__ import annotations @@ -33,9 +33,7 @@ class InputProvider: - """A class to manage and process directed inputs from environment variables. - - stdin, or provided dictionaries. + """Manage directed inputs from environment variables, stdin, or mappings. Attributes: inputs (ExtendedDict): Dictionary to store inputs. From a0f22b6ca07be9dcda582f958428638949e22dfc Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:03:39 -0500 Subject: [PATCH 105/287] fix: preserve extended list in-place operators --- docs/package-surface.md | 5 +++-- src/extended_data/containers/sequences.py | 11 +++++++++++ tests/core/test_containers.py | 9 +++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index 150af25..77f764d 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -70,8 +70,9 @@ payload["service"]["name"].upper_first() ``` Mutation and common operator paths are part of that contract: `setdefault()`, -in-place dict merge, tuple slicing, tuple concatenation, and tuple repetition -preserve Tier 2 containers instead of leaking plain nested values. +in-place dict merge, list in-place concatenation, list in-place repetition, +tuple slicing, tuple concatenation, and tuple repetition preserve Tier 2 +containers instead of leaking plain nested values. Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index e6ab2aa..d4c94c8 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -52,6 +52,17 @@ def extend(self, other: Iterable[T]) -> None: """Extend values while preserving extended nested containers.""" self.data.extend(self._wrap_item(item) for item in other) + def __iadd__(self, other: Iterable[T]) -> ExtendedList[T]: + """Extend in place while preserving extended nested containers.""" + self.extend(other) + return self + + def __imul__(self, count: SupportsIndex) -> ExtendedList[T]: + """Repeat in place while preserving extended nested containers.""" + self.data *= operator_index(count) + self.data[:] = [self._wrap_item(item) for item in self.data] + return self + def insert(self, i: int, item: T) -> None: """Insert a value while preserving extended nested containers.""" self.data.insert(i, self._wrap_item(item)) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index ca29329..1ec19c0 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -105,12 +105,15 @@ def test_extended_list_composes_sequence_primitives() -> None: def test_extended_list_promotes_nested_values_on_mutation() -> None: """ExtendedList keeps nested values in the Tier 2 surface.""" value: ExtendedList[Any] = ExtendedList([{"name": "api"}]) + in_place: ExtendedList[Any] = ExtendedList([{"name": "api"}]) value.append("worker") value.extend([{"name": "scheduler"}]) value.insert(0, ["frontdoor"]) value[1] = {"name": "gateway"} value[2:3] = ["jobs"] + in_place += [{"name": "worker"}, ["jobs"]] + in_place *= 2 assert isinstance(value[0], ExtendedList) assert isinstance(value[0][0], ExtendedString) @@ -119,6 +122,12 @@ def test_extended_list_promotes_nested_values_on_mutation() -> None: assert isinstance(value[2], ExtendedString) assert isinstance(value[3], ExtendedDict) assert value[1]["name"].upper_first() == "Gateway" + assert isinstance(in_place[1], ExtendedDict) + assert isinstance(in_place[1]["name"], ExtendedString) + assert isinstance(in_place[2], ExtendedList) + assert isinstance(in_place[2][0], ExtendedString) + assert isinstance(in_place[4], ExtendedDict) + assert isinstance(in_place[5], ExtendedList) def test_extended_set_composes_set_operations() -> None: From b66e237d295d0a15aa2a6bb65a36b054ca9679d2 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:08:27 -0500 Subject: [PATCH 106/287] fix: decode present inputs equal to defaults --- src/extended_data/inputs/__main__.py | 6 +++++- tests/inputs/test_main.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 022ea0f..575d688 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -259,11 +259,15 @@ def decode_input( Returns: Any: The decoded input, potentially converted or defaulted. """ + source_present = k in self.inputs conf = self.get_input(k, default=default, required=required) - if conf is None or conf == default: + if not source_present or (is_nothing(self.inputs.get(k)) and conf == default): return conf + if conf is None: + return default if not allow_none else None + conf = self._coerce_text(conf) if not isinstance(conf, str): diff --git a/tests/inputs/test_main.py b/tests/inputs/test_main.py index ad09f5b..5c7d740 100644 --- a/tests/inputs/test_main.py +++ b/tests/inputs/test_main.py @@ -258,6 +258,20 @@ def test_decode_input_json_can_return_extended_containers(): assert decoded["name"].upper_first() == "Test" +def test_decode_input_decodes_present_value_that_equals_default(): + """Defaults should not mask present input values that happen to be equal.""" + raw_config = '{"name": "test"}' + dic = InputProvider(inputs={"json_key": raw_config}, from_environment=False) + missing = InputProvider(from_environment=False) + + decoded = dic.decode_input("json_key", default=raw_config, decode_from_json=True, as_extended=True) + + assert isinstance(decoded, ExtendedDict) + assert isinstance(decoded["name"], ExtendedString) + assert decoded["name"].upper_first() == "Test" + assert missing.decode_input("json_key", default=raw_config, decode_from_json=True) == raw_config + + def test_decode_input_errors_do_not_echo_values(): """Decode diagnostics identify the input key without exposing raw values.""" dic = InputProvider( From a4162e5409f57c5834c65eb90b9dcb43d197f552 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:14:02 -0500 Subject: [PATCH 107/287] fix: reconstruct signed integer values --- src/extended_data/primitives/types.py | 6 ++++-- tests/core/test_type_utils.py | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/extended_data/primitives/types.py b/src/extended_data/primitives/types.py index 6a2cb4d..dce1e91 100644 --- a/src/extended_data/primitives/types.py +++ b/src/extended_data/primitives/types.py @@ -24,6 +24,7 @@ - DATETIME_PATTERN: Regex for matching ISO 8601 datetime strings. - TIME_PATTERN: Regex for matching time strings. - PATH_PATTERN: Regex for matching Unix and Windows-style paths. + - INTEGER_PATTERN: Regex for matching integer strings. - NUMBER_PATTERN: Regex for matching numeric strings. - TRUTHY_PATTERN: Regex for matching truthy strings. - FALSY_PATTERN: Regex for matching falsy strings. @@ -56,6 +57,7 @@ ) # Matches extended datetime formats like YYYY-MM-DDTHH:MM[:SS][.fff][Z|±hh:mm] TIME_PATTERN: re.Pattern[str] = re.compile(r"^\d{2}:\d{2}(:\d{2}(\.\d{1,6})?)?$") # Matches HH:MM[:SS] and microseconds PATH_PATTERN: re.Pattern[str] = re.compile(r'^(?:[a-zA-Z]:)?[\\/](?:[^<>:"|?*\n]+[\\/])*[^<>:"|?*\n]*$') +INTEGER_PATTERN: re.Pattern[str] = re.compile(r"^-?\d+$") NUMBER_PATTERN: re.Pattern[str] = re.compile(r"^-?\d+(\.\d+)?$") TRUTHY_PATTERN: re.Pattern[str] = re.compile(r"^(y|yes|t|true|on|1)$", re.IGNORECASE) FALSY_PATTERN: re.Pattern[str] = re.compile(r"^(n|no|f|false|off|0)$", re.IGNORECASE) @@ -425,7 +427,7 @@ def reconstruct_special_type(converted_obj: str, fail_silently: bool = False) -> ConversionError: If reconstruction fails and fail_silently is False. """ try: - if converted_obj == "None": + if converted_obj in {"None", "null"}: return None if DATETIME_PATTERN.match(converted_obj): return strtodatetime(converted_obj) @@ -438,7 +440,7 @@ def reconstruct_special_type(converted_obj: str, fail_silently: bool = False) -> if TRUTHY_PATTERN.match(converted_obj) or FALSY_PATTERN.match(converted_obj): return strtobool(converted_obj) if NUMBER_PATTERN.match(converted_obj): - if converted_obj.isdigit(): + if INTEGER_PATTERN.match(converted_obj): return strtoint(converted_obj) return strtofloat(converted_obj) diff --git a/tests/core/test_type_utils.py b/tests/core/test_type_utils.py index 5b4453a..485e11a 100644 --- a/tests/core/test_type_utils.py +++ b/tests/core/test_type_utils.py @@ -586,10 +586,13 @@ def test_convert_special_types_handles_tuple_frozenset_and_yaml_pairs() -> None: ("/some/path", Path("/some/path")), # Path string to Path ("simple string", "simple string"), # Simple string remains unchanged ("123", 123), # Numeric string to integer + ("-123", -123), # Negative numeric string to integer ("3.14", 3.14), # Numeric string to float + ("-3.14", -3.14), # Negative numeric string to float ("true", True), # Boolean string to bool ("false", False), ("None", None), # "None" string to NoneType + ("null", None), # JSON null string to NoneType ("", ""), # Empty string remains unchanged ], ) From 7842bafd0d5445e91a981a1800a2894ad63ba10c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:17:57 -0500 Subject: [PATCH 108/287] fix: preserve explicit null input values --- src/extended_data/inputs/__main__.py | 18 ++++++++++++++---- tests/inputs/test_main.py | 18 ++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 575d688..6c3e9d0 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -32,6 +32,9 @@ from collections.abc import Mapping +_MISSING = object() + + class InputProvider: """Manage directed inputs from environment variables, stdin, or mappings. @@ -259,14 +262,21 @@ def decode_input( Returns: Any: The decoded input, potentially converted or defaulted. """ - source_present = k in self.inputs - conf = self.get_input(k, default=default, required=required) + raw_input = self.inputs.get(k, _MISSING) + source_present = raw_input is not _MISSING - if not source_present or (is_nothing(self.inputs.get(k)) and conf == default): - return conf + if not source_present: + if required: + self.get_input(k, default=default, required=True) + return default + conf = to_builtin(raw_input) if conf is None: return default if not allow_none else None + if is_nothing(conf): + if required: + self.get_input(k, default=default, required=True) + return default conf = self._coerce_text(conf) diff --git a/tests/inputs/test_main.py b/tests/inputs/test_main.py index 5c7d740..4f2de93 100644 --- a/tests/inputs/test_main.py +++ b/tests/inputs/test_main.py @@ -272,6 +272,24 @@ def test_decode_input_decodes_present_value_that_equals_default(): assert missing.decode_input("json_key", default=raw_config, decode_from_json=True) == raw_config +def test_decode_input_honors_explicit_none_values(): + """Present None inputs should obey allow_none instead of looking missing.""" + dic = InputProvider(inputs={"json_key": None}, from_environment=False) + missing = InputProvider(from_environment=False) + + assert dic.decode_input("json_key", default="fallback", decode_from_json=True, allow_none=True) is None + assert dic.decode_input("json_key", default="fallback", decode_from_json=True, allow_none=False) == "fallback" + assert missing.decode_input("json_key", default="fallback", decode_from_json=True, allow_none=True) == "fallback" + + +def test_decode_input_required_empty_value_raises(): + """Required decode inputs still reject empty provided values.""" + dic = InputProvider(inputs={"json_key": ""}, from_environment=False) + + with pytest.raises(RuntimeError, match="Required input json_key not passed"): + dic.decode_input("json_key", decode_from_json=True, required=True) + + def test_decode_input_errors_do_not_echo_values(): """Decode diagnostics identify the input key without exposing raw values.""" dic = InputProvider( From d520700ac5a296500d4f04d7f652934bde883dc4 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:21:47 -0500 Subject: [PATCH 109/287] fix: honor nullable decorator inputs --- src/extended_data/inputs/decorators.py | 2 ++ tests/inputs/test_decorators.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/extended_data/inputs/decorators.py b/src/extended_data/inputs/decorators.py index ab92297..80dff24 100644 --- a/src/extended_data/inputs/decorators.py +++ b/src/extended_data/inputs/decorators.py @@ -84,6 +84,8 @@ def resolve(self, provider: InputProvider) -> Any | object: allow_none=self.allow_none, as_extended=self.as_extended, ) + elif source_present and provider.inputs.get(key) is None and self.allow_none and not self.required: + value = None else: value = provider.get_input( key, diff --git a/tests/inputs/test_decorators.py b/tests/inputs/test_decorators.py index ba99175..d973edc 100644 --- a/tests/inputs/test_decorators.py +++ b/tests/inputs/test_decorators.py @@ -31,6 +31,14 @@ def parse_extended_config(self, extended_config: ExtendedDict) -> ExtendedDict: def parse_raw_extended_config(self, raw_config: ExtendedDict) -> ExtendedDict: return raw_config + @input_config("optional_value", allow_none=True) + def optional_plain_value(self, optional_value: str | None = "method-default") -> str | None: + return optional_value + + @input_config("required_value", required=True, allow_none=True) + def required_plain_value(self, required_value: str | None = "method-default") -> str | None: + return required_value + def greet(self, prefix: str = "hello") -> str: return prefix @@ -77,6 +85,19 @@ def test_plain_input_config_can_return_extended_containers() -> None: assert isinstance(parsed["name"], ExtendedString) +def test_plain_input_config_honors_explicit_none() -> None: + service = ExampleService(_input_provider_config={"inputs": {"optional_value": None}}) + + assert service.optional_plain_value() is None + + +def test_plain_input_config_required_none_still_raises() -> None: + service = ExampleService(_input_provider_config={"inputs": {"required_value": None}}) + + with pytest.raises(RuntimeError, match="Required input required_value not passed"): + service.required_plain_value() + + def test_method_default_used_when_input_missing() -> None: service = ExampleService(_input_provider_config={"inputs": {"domain": "acme.io"}}) assert service.greet() == "hello" From 6c86f9658052abf614b745c478efe0524c3df9b6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:27:59 -0500 Subject: [PATCH 110/287] feat: add cli methods json output --- README.md | 1 + docs/package-surface.md | 7 +++++++ src/extended_data/connectors/cli.py | 11 +++++++++++ tests/connectors/test_cli.py | 12 ++++++++++++ 4 files changed, 31 insertions(+) diff --git a/README.md b/README.md index b9a495e..f0cd4b5 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ The same catalog is available from the CLI: ```bash extended-data list extended-data info github --json +extended-data methods github --json ``` ## Package Shape diff --git a/docs/package-surface.md b/docs/package-surface.md index 77f764d..e8fb8b2 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -232,6 +232,13 @@ github_info = fabric.get_connector_info("github") Each catalog entry includes availability, source, extra name, install command, required packages, missing packages, module, class, and description fields. +The installed CLI exposes the same discovery layer for shell automation: + +```bash +extended-data list --json +extended-data info github --json +extended-data methods github --json +``` ## Optional Integrations diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index 56580a3..9eb213d 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -174,6 +174,7 @@ def cmd_methods(args: argparse.Namespace) -> int: _write_stderr(str(e)) return 1 + methods: list[dict[str, str]] = [] for name in sorted(dir(cls)): if name.startswith("_"): continue @@ -182,6 +183,15 @@ def cmd_methods(args: argparse.Namespace) -> int: continue doc = attr.__doc__.split("\n")[0].strip()[:50] if attr.__doc__ else "No description" + methods.append({"name": name, "description": doc}) + + if getattr(args, "json", False): + _write_stdout(_json_output(methods)) + return 0 + + for method in methods: + name = method["name"] + doc = method["description"] _write_stdout(f" {name:<30} {doc}") return 0 @@ -256,6 +266,7 @@ def main() -> int: # Methods command methods_parser = subparsers.add_parser("methods", help="List methods for a connector") methods_parser.add_argument("connector", help="Connector name") + methods_parser.add_argument("--json", action="store_true", help="JSON output") methods_parser.set_defaults(func=cmd_methods) # Info command diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 6ea90b8..3b50116 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -63,6 +63,18 @@ def test_cli_methods_lists_public_methods(): assert "Decode an HTTP response body" in output +def test_cli_methods_json_lists_public_methods() -> None: + """Methods command can emit machine-readable method metadata.""" + args = argparse.Namespace(connector="meshy", json=True) + with patch("sys.stdout.write") as mock_write: + exit_code = cmd_methods(args) + + assert exit_code == 0 + methods = json.loads(mock_write.call_args.args[0]) + decode_response = next(method for method in methods if method["name"] == "decode_response") + assert decode_response["description"].startswith("Decode an HTTP response body") + + def test_cli_call_parses_dynamic_keyword_arguments() -> None: """Call command accepts documented --arg value pairs after the method.""" connector = MagicMock() From 42872a27a701f37e75b291cf4f76e661d0822878 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:32:43 -0500 Subject: [PATCH 111/287] fix: keep string tokenization in tier two --- README.md | 3 ++ docs/package-surface.md | 4 +++ src/extended_data/containers/strings.py | 47 +++++++++++++++++++++++++ tests/core/test_containers.py | 21 +++++++++++ 4 files changed, 75 insertions(+) diff --git a/README.md b/README.md index f0cd4b5..d7d8b86 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,9 @@ Tier 3 decoders return Tier 2 containers by default, so decoded files, Base64 payloads, and directed inputs can immediately use `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, `ExtendedSet`, and `ExtendedString` methods. +String tokenization stays inside the same surface: `ExtendedString.split()` +returns an `ExtendedList` of `ExtendedString` values, and partition operations +return `ExtendedTuple` values. `DataWorkflow` makes those compositions first-class: read or decode data, apply named transformations, write an output artifact, and keep the step trail in a `WorkflowResult`. Missing workflow inputs and empty writes fail loudly. diff --git a/docs/package-surface.md b/docs/package-surface.md index e8fb8b2..6916958 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -73,6 +73,10 @@ Mutation and common operator paths are part of that contract: `setdefault()`, in-place dict merge, list in-place concatenation, list in-place repetition, tuple slicing, tuple concatenation, and tuple repetition preserve Tier 2 containers instead of leaking plain nested values. +String tokenization and partitioning paths are covered too: +`ExtendedString.split()`, `rsplit()`, and `splitlines()` return `ExtendedList` +values containing `ExtendedString` parts, while `partition()` and +`rpartition()` return `ExtendedTuple` values. Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index 8465429..b49591e 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -3,6 +3,8 @@ from __future__ import annotations from collections import UserString +from collections.abc import Iterable +from typing import TYPE_CHECKING from extended_data.primitives.string_transforms import ( humanize, @@ -28,6 +30,15 @@ from extended_data.primitives.types import strtobool +if TYPE_CHECKING: + from extended_data.containers.sequences import ExtendedList, ExtendedTuple + + +def _coerce_string_argument(value: str | UserString) -> str: + """Coerce stdlib user strings while preserving normal str errors elsewhere.""" + return str(value) if isinstance(value, UserString) else value + + class ExtendedString(UserString): """String wrapper with chainable primitive operations.""" @@ -95,6 +106,42 @@ def ordinalize(self) -> ExtendedString: """Return an ordinalized copy.""" return ExtendedString(ordinalize(self.data)) + def split(self, sep: str | UserString | None = None, maxsplit: int = -1) -> ExtendedList[ExtendedString]: # type: ignore[override] + """Split into extended string parts.""" + from extended_data.containers.sequences import ExtendedList + + separator = None if sep is None else _coerce_string_argument(sep) + return ExtendedList(ExtendedString(part) for part in self.data.split(separator, maxsplit)) + + def rsplit(self, sep: str | UserString | None = None, maxsplit: int = -1) -> ExtendedList[ExtendedString]: # type: ignore[override] + """Split from the right into extended string parts.""" + from extended_data.containers.sequences import ExtendedList + + separator = None if sep is None else _coerce_string_argument(sep) + return ExtendedList(ExtendedString(part) for part in self.data.rsplit(separator, maxsplit)) + + def splitlines(self, keepends: bool = False) -> ExtendedList[ExtendedString]: # type: ignore[override] + """Split lines into extended string parts.""" + from extended_data.containers.sequences import ExtendedList + + return ExtendedList(ExtendedString(part) for part in self.data.splitlines(keepends)) + + def partition(self, sep: str | UserString) -> ExtendedTuple[ExtendedString]: # type: ignore[override] + """Partition into extended string parts.""" + from extended_data.containers.sequences import ExtendedTuple + + return ExtendedTuple(ExtendedString(part) for part in self.data.partition(_coerce_string_argument(sep))) + + def rpartition(self, sep: str | UserString) -> ExtendedTuple[ExtendedString]: # type: ignore[override] + """Partition from the right into extended string parts.""" + from extended_data.containers.sequences import ExtendedTuple + + return ExtendedTuple(ExtendedString(part) for part in self.data.rpartition(_coerce_string_argument(sep))) + + def join(self, seq: Iterable[str | UserString]) -> ExtendedString: # type: ignore[override] + """Join string-like values into an extended string.""" + return ExtendedString(self.data.join(_coerce_string_argument(item) for item in seq)) + def is_url(self) -> bool: """Return whether the string is a URL.""" return is_url(self.data) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 1ec19c0..b2a6592 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -20,11 +20,32 @@ def test_extended_string_chains_primitive_transforms() -> None: """ExtendedString composes Tier 1 string primitives.""" value = ExtendedString("API Response Value") + partitioned = ExtendedString("api.gateway.worker").partition(".") + right_partitioned = ExtendedString("api.gateway.worker").rpartition(".") + split = ExtendedString("api,gateway,worker").split(",") + right_split = ExtendedString("api,gateway,worker").rsplit(",", 1) + lines = ExtendedString("api\ngateway").splitlines() + joined = ExtendedString(",").join([ExtendedString("api"), "gateway"]) assert value.to_snake_case().remove_suffix("_value") == "api_response" assert value.to_kebab_case() == "api-response-value" assert ExtendedString("1").ordinalize() == "1st" assert ExtendedString("yes").to_bool() is True + assert isinstance(partitioned, ExtendedTuple) + assert isinstance(partitioned[0], ExtendedString) + assert partitioned == ("api", ".", "gateway.worker") + assert isinstance(right_partitioned, ExtendedTuple) + assert right_partitioned == ("api.gateway", ".", "worker") + assert isinstance(split, ExtendedList) + assert all(isinstance(item, ExtendedString) for item in split) + assert split == ["api", "gateway", "worker"] + assert isinstance(right_split, ExtendedList) + assert right_split == ["api,gateway", "worker"] + assert isinstance(lines, ExtendedList) + assert all(isinstance(item, ExtendedString) for item in lines) + assert lines == ["api", "gateway"] + assert isinstance(joined, ExtendedString) + assert joined == "api,gateway" def test_extended_dict_composes_mapping_primitives() -> None: From a0b7cf3e8e56eedb2b7c0c628ff425fe3fc565fa Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:42:23 -0500 Subject: [PATCH 112/287] fix: lower extended mapping keys --- src/extended_data/containers/factory.py | 4 ++-- tests/core/test_containers.py | 4 ++++ tests/core/test_json_utils.py | 11 ++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/extended_data/containers/factory.py b/src/extended_data/containers/factory.py index ec13ea6..6a881a5 100644 --- a/src/extended_data/containers/factory.py +++ b/src/extended_data/containers/factory.py @@ -37,7 +37,7 @@ def to_builtin(value: Any) -> Any: if isinstance(value, ExtendedString): return str(value) if isinstance(value, ExtendedDict): - return {key: to_builtin(item) for key, item in value.items()} + return {to_builtin(key): to_builtin(item) for key, item in value.items()} if isinstance(value, ExtendedList): return [to_builtin(item) for item in value] if isinstance(value, ExtendedTuple): @@ -45,7 +45,7 @@ def to_builtin(value: Any) -> Any: if isinstance(value, ExtendedSet): return {to_builtin(item) for item in value} if isinstance(value, Mapping): - return {key: to_builtin(item) for key, item in value.items()} + return {to_builtin(key): to_builtin(item) for key, item in value.items()} if isinstance(value, list): return [to_builtin(item) for item in value] if isinstance(value, tuple): diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index b2a6592..95136d5 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -246,6 +246,7 @@ def test_to_builtin_recursively_unwraps_extended_containers() -> None: wrapped = ExtendedDict( { "service": ExtendedDict({"name": ExtendedString("api")}), + ExtendedString("metadata"): ExtendedDict({"owner": ExtendedString("platform")}), "ports": ExtendedList([8080, 8081]), "tags": ExtendedSet({"prod", "api"}), "aliases": ExtendedTuple(("api", "gateway")), @@ -256,6 +257,9 @@ def test_to_builtin_recursively_unwraps_extended_containers() -> None: assert isinstance(plain, dict) assert plain["service"] == {"name": "api"} + metadata_key = next(key for key in plain if key == "metadata") + assert type(metadata_key) is str + assert plain["metadata"] == {"owner": "platform"} assert plain["ports"] == [8080, 8081] assert plain["tags"] == {"prod", "api"} assert plain["aliases"] == ("api", "gateway") diff --git a/tests/core/test_json_utils.py b/tests/core/test_json_utils.py index f1be24b..4b5546d 100644 --- a/tests/core/test_json_utils.py +++ b/tests/core/test_json_utils.py @@ -15,7 +15,7 @@ import pytest -from extended_data.containers import ExtendedDict +from extended_data.containers import ExtendedDict, ExtendedString from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.json import decode_json, encode_json @@ -123,3 +123,12 @@ def test_encode_json_lowers_extended_containers(use_data_attribute: bool) -> Non result = encode_json(raw_data, sort_keys=True) assert decode_json(result) == {"items": ["one"], "status": "ok"} + + +def test_encode_json_lowers_extended_mapping_keys() -> None: + """Extended mapping keys are lowered before JSON handoff.""" + payload = ExtendedDict({ExtendedString("service"): {"name": "api"}}) + + result = encode_json(payload, sort_keys=True) + + assert decode_json(result) == {"service": {"name": "api"}} From c41e7b973fd9b57f25ff51b1e6353e0137949aad Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:45:58 -0500 Subject: [PATCH 113/287] docs: document extended key serialization --- README.md | 2 ++ docs/package-surface.md | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d7d8b86..a319101 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,8 @@ decoded files, Base64 payloads, and directed inputs can immediately use String tokenization stays inside the same surface: `ExtendedString.split()` returns an `ExtendedList` of `ExtendedString` values, and partition operations return `ExtendedTuple` values. +Format encoders lower extended containers, including extended mapping keys, at +the serialization boundary. `DataWorkflow` makes those compositions first-class: read or decode data, apply named transformations, write an output artifact, and keep the step trail in a `WorkflowResult`. Missing workflow inputs and empty writes fail loudly. diff --git a/docs/package-surface.md b/docs/package-surface.md index 6916958..d5a6b0f 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -100,7 +100,8 @@ Tuple values are promoted to `ExtendedTuple` and lowered back to Python tuples, so the Tier 2 surface does not silently turn immutable input data into mutable lists. Format encoders lower Tier 2 containers the same way before serializing JSON, -YAML, TOML, and HCL output. +YAML, TOML, and HCL output, including extended mapping keys that must become +plain strings before JSON handoff. `DataWorkflow` is the Tier 3 composition surface for higher-order data processing. It reads or decodes structured data through the file and format From 0f34a7550d182bb17c7074ba9ed8c5d1d8493b25 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:50:35 -0500 Subject: [PATCH 114/287] fix: preserve extended string formatting --- docs/package-surface.md | 3 ++- src/extended_data/containers/sequences.py | 4 ++++ src/extended_data/containers/strings.py | 10 +++++++++- tests/core/test_containers.py | 11 +++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index d5a6b0f..25a544d 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -76,7 +76,8 @@ containers instead of leaking plain nested values. String tokenization and partitioning paths are covered too: `ExtendedString.split()`, `rsplit()`, and `splitlines()` return `ExtendedList` values containing `ExtendedString` parts, while `partition()` and -`rpartition()` return `ExtendedTuple` values. +`rpartition()` return `ExtendedTuple` values. String formatting paths +`format()` and `format_map()` return `ExtendedString`. Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index d4c94c8..a20d308 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -223,6 +223,10 @@ def __len__(self) -> int: """Return the number of set values.""" return len(self._data) + def __repr__(self) -> str: + """Return a value-oriented representation.""" + return f"{self.__class__.__name__}({self._data!r})" + def add(self, value: T) -> None: """Add a value to the set.""" self._data.add(self._wrap_item(value)) diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index b49591e..03e6f55 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -3,7 +3,7 @@ from __future__ import annotations from collections import UserString -from collections.abc import Iterable +from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING from extended_data.primitives.string_transforms import ( @@ -106,6 +106,14 @@ def ordinalize(self) -> ExtendedString: """Return an ordinalized copy.""" return ExtendedString(ordinalize(self.data)) + def format(self, *args: object, **kwargs: object) -> ExtendedString: # type: ignore[override] + """Format values into an extended string.""" + return ExtendedString(self.data.format(*args, **kwargs)) + + def format_map(self, mapping: Mapping[str, object]) -> ExtendedString: # type: ignore[override] + """Format mapping values into an extended string.""" + return ExtendedString(self.data.format_map(mapping)) + def split(self, sep: str | UserString | None = None, maxsplit: int = -1) -> ExtendedList[ExtendedString]: # type: ignore[override] """Split into extended string parts.""" from extended_data.containers.sequences import ExtendedList diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 95136d5..4499b25 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -26,6 +26,10 @@ def test_extended_string_chains_primitive_transforms() -> None: right_split = ExtendedString("api,gateway,worker").rsplit(",", 1) lines = ExtendedString("api\ngateway").splitlines() joined = ExtendedString(",").join([ExtendedString("api"), "gateway"]) + formatted = ExtendedString("{service}.{component}").format(service="api", component=ExtendedString("worker")) + formatted_map = ExtendedString("{service}.{component}").format_map( + {"service": ExtendedString("api"), "component": "worker"} + ) assert value.to_snake_case().remove_suffix("_value") == "api_response" assert value.to_kebab_case() == "api-response-value" @@ -46,6 +50,10 @@ def test_extended_string_chains_primitive_transforms() -> None: assert lines == ["api", "gateway"] assert isinstance(joined, ExtendedString) assert joined == "api,gateway" + assert isinstance(formatted, ExtendedString) + assert formatted == "api.worker" + assert isinstance(formatted_map, ExtendedString) + assert formatted_map == "api.worker" def test_extended_dict_composes_mapping_primitives() -> None: @@ -155,6 +163,9 @@ def test_extended_set_composes_set_operations() -> None: """ExtendedSet provides chainable set operations.""" value = ExtendedSet({1, 2, 3, None}) + compact_repr = repr(value.compact()) + assert compact_repr.startswith("ExtendedSet(") + assert "object at" not in compact_repr assert value.compact().to_set() == {1, 2, 3} assert value.union({4}).to_set() == {1, 2, 3, 4, None} assert value.intersection({2, 3, 5}).to_set() == {2, 3} From cf3d1fce00b2b1d576feb110a7d19d12975e989e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:54:13 -0500 Subject: [PATCH 115/287] feat: add extended set mutators --- docs/package-surface.md | 4 +++- src/extended_data/containers/sequences.py | 29 +++++++++++++++++++++++ tests/core/test_containers.py | 17 +++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/docs/package-surface.md b/docs/package-surface.md index 25a544d..63eb845 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -72,7 +72,9 @@ payload["service"]["name"].upper_first() Mutation and common operator paths are part of that contract: `setdefault()`, in-place dict merge, list in-place concatenation, list in-place repetition, tuple slicing, tuple concatenation, and tuple repetition preserve Tier 2 -containers instead of leaking plain nested values. +containers instead of leaking plain nested values. `ExtendedSet` named +mutators such as `update()`, `intersection_update()`, `difference_update()`, +and `symmetric_difference_update()` preserve promoted values as well. String tokenization and partitioning paths are covered too: `ExtendedString.split()`, `rsplit()`, and `splitlines()` return `ExtendedList` values containing `ExtendedString` parts, while `partition()` and diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index a20d308..e3a3745 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -231,6 +231,12 @@ def add(self, value: T) -> None: """Add a value to the set.""" self._data.add(self._wrap_item(value)) + def update(self, *others: Iterable[T]) -> None: + """Add values from one or more iterables.""" + for other in others: + for value in other: + self.add(value) + def discard(self, value: T) -> None: """Remove a value from the set if present.""" self._data.discard(value) @@ -264,6 +270,29 @@ def difference(self, *others: Iterable[T]) -> ExtendedSet[T]: result.difference_update(other) return ExtendedSet(result) + def symmetric_difference(self, other: Iterable[T]) -> ExtendedSet[T]: + """Return a symmetric difference against another iterable.""" + result = set(self._data) + for value in other: + wrapped = self._wrap_item(value) + if wrapped in result: + result.remove(wrapped) + else: + result.add(wrapped) + return ExtendedSet(result) + + def intersection_update(self, *others: Iterable[T]) -> None: + """Keep only values found in all other iterables.""" + self._data = self.intersection(*others)._data + + def difference_update(self, *others: Iterable[T]) -> None: + """Remove values found in other iterables.""" + self._data = self.difference(*others)._data + + def symmetric_difference_update(self, other: Iterable[T]) -> None: + """Replace values with the symmetric difference against another iterable.""" + self._data = self.symmetric_difference(other)._data + def to_set(self) -> set[T]: """Return a plain set copy.""" return set(self._data) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 4499b25..b41d71d 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -183,6 +183,23 @@ def test_extended_set_promotes_string_values() -> None: assert to_builtin(value) == {"api", "worker"} +def test_extended_set_named_mutators_preserve_extended_values() -> None: + """Named set mutation methods keep values in the Tier 2 surface.""" + value = ExtendedSet({"api"}) + + value.update(["worker"], {"scheduler"}) + symmetric = value.symmetric_difference({"worker", "batch"}) + value.intersection_update({"api", "scheduler", "batch"}) + value.difference_update({"scheduler"}) + value.symmetric_difference_update({"api", "batch"}) + + assert isinstance(symmetric, ExtendedSet) + assert symmetric.to_set() == {"api", "scheduler", "batch"} + assert all(isinstance(item, ExtendedString) for item in symmetric) + assert value.to_set() == {"batch"} + assert all(isinstance(item, ExtendedString) for item in value) + + def test_extended_tuple_preserves_immutable_sequence_shape() -> None: """ExtendedTuple composes sequence primitives without becoming an ExtendedList.""" value = ExtendedTuple((1, (2, [3]), "", 2)) From da97c1c6a1b22575985b9ea795dbd7b17ba56dc0 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 08:58:33 -0500 Subject: [PATCH 116/287] ci: audit python dependencies --- .github/workflows/ci.yml | 1 + .github/workflows/release.yml | 1 + README.md | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 468b104..f346c5f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,7 @@ jobs: version: "0.11.19" enable-cache: true - run: uv sync --python 3.13 --extra tests --extra typing + - run: uv run --with pip-audit==2.10.0 pip-audit --skip-editable - run: uvx ruff check src tests examples README.md docs/package-surface.md - run: uv run mypy src/extended_data - run: uv run pytest tests diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 878a815..63aaa4a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -45,6 +45,7 @@ jobs: version: "0.11.19" enable-cache: true - run: uv sync --python 3.13 --extra tests --extra typing + - run: uv run --with pip-audit==2.10.0 pip-audit --skip-editable - run: uvx ruff check src tests examples README.md docs/package-surface.md - run: uv run mypy src/extended_data - run: uv run pytest tests diff --git a/README.md b/README.md index a319101..ed8d278 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,7 @@ More detail lives in [`docs/package-surface.md`](docs/package-surface.md). ```bash uv sync --extra tests --extra typing +uv run --with pip-audit==2.10.0 pip-audit --skip-editable uv run pytest uv run ruff check src tests uv run mypy src/extended_data From db3760e468b3dfbe88470803b7ea126914bdd616 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 09:02:55 -0500 Subject: [PATCH 117/287] fix: lower mcp tool results --- src/extended_data/connectors/mcp.py | 23 +++++++++++++++-------- tests/connectors/test_mcp.py | 24 +++++++++++++++++++++++- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 734bba6..8eeb937 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -25,10 +25,11 @@ import json import sys -from collections.abc import Callable +from collections.abc import Callable, Iterable, Mapping from typing import Any, cast from extended_data.connectors.registry import get_connector, list_connectors +from extended_data.containers import to_builtin def _check_mcp_installed() -> bool: @@ -102,6 +103,18 @@ def _get_public_methods(connector_class: builtins.type[Any]) -> list[tuple[str, return methods +def _jsonable_tool_result(result: Any) -> Any: + """Lower connector tool results to JSON-compatible Python data.""" + if hasattr(result, "model_dump"): + result = result.model_dump() + elif isinstance(result, Iterable) and not isinstance(result, (str, bytes, bytearray, Mapping)): + result = [item.model_dump() if hasattr(item, "model_dump") else item for item in result] + result = to_builtin(result) + if isinstance(result, set | frozenset): + return [to_builtin(item) for item in result] + return result + + def create_server() -> Any: """Create the unified MCP server with all registered connectors.""" try: @@ -181,13 +194,7 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: if inspect.iscoroutine(result): result = await result - # Convert Pydantic models to dict - if hasattr(result, "model_dump"): - result = result.model_dump() - elif hasattr(result, "__iter__") and not isinstance(result, (str, dict)): - result = [r.model_dump() if hasattr(r, "model_dump") else r for r in result] - - return [TextContent(type="text", text=json.dumps(result, indent=2, default=str))] + return [TextContent(type="text", text=json.dumps(_jsonable_tool_result(result), indent=2, default=str))] except Exception as e: return [TextContent(type="text", text=f"Error: {type(e).__name__}: {e}")] diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index 2c2ec02..f4154f7 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -4,7 +4,8 @@ import pytest -from extended_data.connectors.mcp import create_server +from extended_data.connectors.mcp import _jsonable_tool_result, create_server +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet def test_create_server(): @@ -14,3 +15,24 @@ def test_create_server(): assert server.name == "extended-data" # Basic check that server was initialized assert server is not None + + +def test_jsonable_tool_result_lowers_extended_mapping_payloads() -> None: + """MCP result serialization keeps Tier 2 mapping payloads as JSON objects.""" + payload = ExtendedDict({"service": {"name": "api"}}) + + assert _jsonable_tool_result(payload) == {"service": {"name": "api"}} + + +def test_jsonable_tool_result_lowers_extended_sequence_payloads() -> None: + """MCP result serialization keeps Tier 2 sequence payloads as JSON arrays.""" + payload = ExtendedList([{"service": "api"}]) + + assert _jsonable_tool_result(payload) == [{"service": "api"}] + + +def test_jsonable_tool_result_lowers_extended_set_payloads() -> None: + """MCP result serialization turns Tier 2 sets into JSON arrays.""" + payload = ExtendedSet({"api", "worker"}) + + assert sorted(_jsonable_tool_result(payload)) == ["api", "worker"] From e60725812802c78301bb1fd16d964376aa68fb7f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 09:20:57 -0500 Subject: [PATCH 118/287] feat: add tiered data file reader --- README.md | 11 +++++--- docs/package-surface.md | 8 +++--- examples/core/file_operations.py | 8 +++--- src/extended_data/__init__.py | 2 ++ src/extended_data/io/__init__.py | 2 ++ src/extended_data/io/files.py | 36 ++++++++++++++++++++++++- src/extended_data/workflows/__init__.py | 16 +++++------ tests/core/test_file_data_type.py | 33 +++++++++++++++++++++++ tests/core/test_package_surface.py | 1 + 9 files changed, 95 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index ed8d278..611b394 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,7 @@ The package is intentionally tiered: vendor integrations, and workflows. Tier 3 decoders return Tier 2 containers by default, so -decoded files, Base64 payloads, and directed inputs can immediately use +data files, Base64 payloads, and directed inputs can immediately use `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, `ExtendedSet`, and `ExtendedString` methods. String tokenization stays inside the same surface: `ExtendedString.split()` @@ -139,9 +139,12 @@ returns an `ExtendedList` of `ExtendedString` values, and partition operations return `ExtendedTuple` values. Format encoders lower extended containers, including extended mapping keys, at the serialization boundary. -`DataWorkflow` makes those compositions first-class: read or decode data, -apply named transformations, write an output artifact, and keep the step trail -in a `WorkflowResult`. Missing workflow inputs and empty writes fail loudly. +`read_data_file()` is the direct file boundary for one-step read plus decode +workflows; it raises for missing files and promotes structured data into Tier 2 +containers by default. `DataWorkflow` makes those compositions first-class: +read or decode data, apply named transformations, write an output artifact, and +keep the step trail in a `WorkflowResult`. Missing workflow inputs and empty +writes fail loudly. `InputProvider` stores its active, frozen, and merged input snapshots as `ExtendedDict` values, so direct input-data access can use Tier 2 container methods. `snapshot_inputs()` returns detached active or frozen snapshots, and diff --git a/docs/package-surface.md b/docs/package-surface.md index 63eb845..dc76138 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -86,14 +86,16 @@ Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict` values, and `ExtendedDict.all_values()` returns an `ExtendedList`. -Tier 3 decode surfaces promote decoded values into Tier 2 containers by -default: +Tier 3 file and decode surfaces promote decoded values into Tier 2 containers +by default: ```python -from extended_data import decode_file +from extended_data import decode_file, read_data_file payload = decode_file('{"service": {"name": "api"}}', suffix="json") +file_payload = read_data_file("config/service.json") assert payload["service"]["name"].upper_first() == "Api" +assert file_payload["service"]["name"].upper_first() == "Api" ``` Pass `as_extended=False` when a decode boundary should return standard Python diff --git a/examples/core/file_operations.py b/examples/core/file_operations.py index 6b45ce2..a8c6c63 100755 --- a/examples/core/file_operations.py +++ b/examples/core/file_operations.py @@ -13,9 +13,9 @@ from extended_data import ( FilePath, - decode_file, file_path_depth, is_url, + read_data_file, read_file, resolve_local_path, write_file, @@ -89,8 +89,7 @@ def demonstrate_file_operations() -> None: """ write_file(yaml_file, yaml_content) - yaml_text = read_file(yaml_file) - data = decode_file(yaml_text, file_path=yaml_file) + data = read_data_file(yaml_file) print(f"\nDecoded YAML file: {data}") print(f"YAML service keys: {data.flatten().keys()}") @@ -99,8 +98,7 @@ def demonstrate_file_operations() -> None: json_content = '{"users": [{"id": 1, "name": "Alice"}]}' write_file(json_file, json_content) - json_text = read_file(json_file) - data = decode_file(json_text, file_path=json_file) + data = read_data_file(json_file) print(f"Decoded JSON file: {data}") diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index a16e728..3a3cd64 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -38,6 +38,7 @@ get_tld, is_url, match_file_extensions, + read_data_file, read_file, resolve_local_path, write_file, @@ -317,6 +318,7 @@ def __getattr__(name: str) -> Any: "number_to_words", "ordinalize", "pluralize", + "read_data_file", "read_file", "reconstruct_special_type", "reconstruct_special_types", diff --git a/src/extended_data/io/__init__.py b/src/extended_data/io/__init__.py index 306d65b..fd3e402 100644 --- a/src/extended_data/io/__init__.py +++ b/src/extended_data/io/__init__.py @@ -15,6 +15,7 @@ get_tld, is_url, match_file_extensions, + read_data_file, read_file, resolve_local_path, write_file, @@ -38,6 +39,7 @@ "is_url", "make_raw_data_export_safe", "match_file_extensions", + "read_data_file", "read_file", "resolve_local_path", "unwrap_raw_data_from_import", diff --git a/src/extended_data/io/files.py b/src/extended_data/io/files.py index 7587dd6..3c22c33 100644 --- a/src/extended_data/io/files.py +++ b/src/extended_data/io/files.py @@ -9,7 +9,7 @@ from base64 import b64encode from collections.abc import Mapping from pathlib import Path -from typing import Any, TypeAlias +from typing import Any, TypeAlias, cast import validators @@ -346,6 +346,40 @@ def decode_file( return file_data +def read_data_file( + file_path: FilePath, + *, + suffix: str | None = None, + as_extended: bool = True, + charset: str = "utf-8", + errors: str = "strict", + headers: Mapping[str, str] | None = None, + tld: Path | None = None, +) -> Any: + """Read and decode a local file or URL through the Tier 3 data boundary. + + This composes ``read_file`` and ``decode_file`` for the common data-file + workflow. Structured files are decoded from their suffix and promoted to + Tier 2 containers by default. Missing local files fail loudly. + """ + file_data = read_file( + file_path, + charset=charset, + errors=errors, + headers=headers, + tld=tld, + ) + if file_data is None: + raise FileNotFoundError(str(file_path)) + + return decode_file( + cast(str | memoryview | bytes | bytearray, file_data), + file_path=file_path, + suffix=suffix, + as_extended=as_extended, + ) + + def write_file( file_path: FilePath, data: Any, diff --git a/src/extended_data/workflows/__init__.py b/src/extended_data/workflows/__init__.py index 3906f6b..7692a09 100644 --- a/src/extended_data/workflows/__init__.py +++ b/src/extended_data/workflows/__init__.py @@ -5,10 +5,10 @@ from collections.abc import Callable, Iterable from dataclasses import dataclass from pathlib import Path -from typing import Any, TypeAlias, cast +from typing import Any, TypeAlias from extended_data.containers import extend_data, to_builtin -from extended_data.io.files import FilePath, decode_file, read_file, write_file +from extended_data.io.files import FilePath, decode_file, read_data_file, write_file WorkflowAction: TypeAlias = Callable[[Any], Any] @@ -99,15 +99,13 @@ def from_file( tld: Path | None = None, ) -> DataWorkflow: """Read and decode a local file or URL into a workflow.""" - file_data = read_file(file_path, charset=charset, errors=errors, tld=tld) - if file_data is None: - raise FileNotFoundError(str(file_path)) - - decoded = decode_file( - cast(str | memoryview | bytes | bytearray, file_data), - file_path=file_path, + decoded = read_data_file( + file_path, suffix=suffix, as_extended=as_extended, + charset=charset, + errors=errors, + tld=tld, ) return cls(decoded, steps=(f"read:{file_path}",), as_extended=as_extended) diff --git a/tests/core/test_file_data_type.py b/tests/core/test_file_data_type.py index 0485792..2241fa4 100644 --- a/tests/core/test_file_data_type.py +++ b/tests/core/test_file_data_type.py @@ -39,6 +39,7 @@ get_tld, is_url, match_file_extensions, + read_data_file, read_file, resolve_local_path, write_file, @@ -542,6 +543,38 @@ def test_decode_file_returns_extended_containers_by_default() -> None: assert isinstance(result["ports"], ExtendedList) +def test_read_data_file_reads_and_decodes_extended_data(tmp_path: Path) -> None: + """Data-file reads enter the Tier 2 container layer in one operation.""" + test_file = tmp_path / "service.json" + test_file.write_text('{"service": {"name": "api"}, "ports": [8080]}') + + result = read_data_file(test_file, tld=tmp_path) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["service"], ExtendedDict) + assert isinstance(result["service"]["name"], ExtendedString) + assert isinstance(result["ports"], ExtendedList) + assert result["service"]["name"].upper_first() == "Api" + + +def test_read_data_file_can_return_builtin_data(tmp_path: Path) -> None: + """The composed file-data boundary can explicitly return plain Python values.""" + test_file = tmp_path / "service.json" + test_file.write_text('{"service": {"name": "api"}}') + + result = read_data_file(test_file, as_extended=False, tld=tmp_path) + + assert isinstance(result, dict) + assert not isinstance(result, ExtendedDict) + assert isinstance(result["service"], dict) + + +def test_read_data_file_raises_for_missing_file(tmp_path: Path) -> None: + """Missing data-file reads fail loudly.""" + with pytest.raises(FileNotFoundError, match=r"missing\.json"): + read_data_file("missing.json", tld=tmp_path) + + def test_write_file_json(tmp_path: Path) -> None: """Tests writing data as JSON. diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 0b48844..fb1c69c 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -113,6 +113,7 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert extended_data.number_to_words(42) == "forty-two" assert extended_data.to_roman(42) == "XLII" assert extended_data.normalize_data_encoding("YML") == "yaml" + assert callable(extended_data.read_data_file) assert callable(extended_data.get_connector) assert callable(extended_data.list_connector_info) From 7eb42393583f220e506f214592515432e02bd982 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 09:27:22 -0500 Subject: [PATCH 119/287] fix: route extended containers by primitive type --- README.md | 3 +++ docs/package-surface.md | 3 +++ src/extended_data/primitives/types.py | 8 +++++--- tests/core/test_type_utils.py | 13 ++++++++++++- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 611b394..f8b9a28 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,9 @@ Tier 3 decoders return Tier 2 containers by default, so data files, Base64 payloads, and directed inputs can immediately use `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, `ExtendedSet`, and `ExtendedString` methods. +Generic type routing can still ask for plain data roles with +`typeof(value, primitive_only=True)`, which treats Extended containers as their +underlying `str`, `list`, `dict`, and `set` roles. String tokenization stays inside the same surface: `ExtendedString.split()` returns an `ExtendedList` of `ExtendedString` values, and partition operations return `ExtendedTuple` values. diff --git a/docs/package-surface.md b/docs/package-surface.md index dc76138..4625fdb 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -85,6 +85,9 @@ Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected `ExtendedDict` values, and `ExtendedDict.all_values()` returns an `ExtendedList`. +Generic type routing can still ask for plain data roles: +`typeof(value, primitive_only=True)` reports Extended strings, lists, tuples, +mappings, and sets as `str`, `list`, `list`, `dict`, and `set`. Tier 3 file and decode surfaces promote decoded values into Tier 2 containers by default: diff --git a/src/extended_data/primitives/types.py b/src/extended_data/primitives/types.py index dce1e91..78d3ca3 100644 --- a/src/extended_data/primitives/types.py +++ b/src/extended_data/primitives/types.py @@ -338,11 +338,13 @@ def get_primitive_type_for_instance_type(value: Any) -> builtins.type[Any]: """Gets the primitive type for a given value.""" if isinstance(value, (bool, int, float, str, bytes, bytearray)): return type(value) - if isinstance(value, (list, tuple)): + if isinstance(value, UserString): + return str + if isinstance(value, (list, tuple, UserList)): return list - if isinstance(value, dict): + if isinstance(value, Mapping): return dict - if isinstance(value, (set, frozenset)): + if isinstance(value, (set, frozenset, AbstractSet)): return set return type(None) if value is None else object diff --git a/tests/core/test_type_utils.py b/tests/core/test_type_utils.py index 485e11a..408cb76 100644 --- a/tests/core/test_type_utils.py +++ b/tests/core/test_type_utils.py @@ -14,7 +14,7 @@ import pytest -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, ExtendedTuple from extended_data.primitives.formats.yaml import YamlPairs, YamlTagged from extended_data.primitives.types import ( ConversionError, @@ -453,6 +453,11 @@ def test_get_default_value_for_type(input_type: type, expected: Any) -> None: ((1, 2, 3), list), ({"key": "value"}, dict), ({1, 2}, set), + (ExtendedString("hello"), str), + (ExtendedList([1, 2, 3]), list), + (ExtendedTuple((1, 2, 3)), list), + (ExtendedDict({"key": "value"}), dict), + (ExtendedSet({1, 2}), set), (None, type(None)), (object(), object), ], @@ -472,6 +477,12 @@ def test_get_primitive_type_for_instance_type(value: Any, expected_type: type) - ([1, 2, 3], True, list), ({"key": "value"}, False, dict), ({"key": "value"}, True, dict), + (ExtendedString("hello"), False, ExtendedString), + (ExtendedString("hello"), True, str), + (ExtendedList([1, 2, 3]), False, ExtendedList), + (ExtendedList([1, 2, 3]), True, list), + (ExtendedDict({"key": "value"}), False, ExtendedDict), + (ExtendedDict({"key": "value"}), True, dict), ], ) def test_typeof(item: Any, primitive_only: bool, expected_type: type) -> None: From c34f347f06433f40c371e1313cc9896c93284209 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 09:30:06 -0500 Subject: [PATCH 120/287] test: dogfood data file reader --- tests/core/test_workflows.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 769c6bb..28a2c15 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -14,11 +14,10 @@ WorkflowStep, base64_decode, base64_encode, - decode_file, decode_hcl2, encode_hcl2, filter_list, - read_file, + read_data_file, write_file, ) from extended_data.primitives.formats.yaml import YamlTagged @@ -55,7 +54,7 @@ def test_data_workflow_layered_config_round_trip(tmp_path: Path) -> None: "ports": [8080, 8081], "features": {"auth": True, "metrics": True}, } - assert decode_file(read_file(result.output_path), file_path=result.output_path) == result.as_builtin() + assert read_data_file(result.output_path) == result.as_builtin() def test_data_workflow_runs_named_value_transforms() -> None: @@ -127,7 +126,7 @@ def test_data_workflow_preserves_tuples_until_serialization(tmp_path: Path) -> N result = workflow.write("build/aliases.json", tld=tmp_path) - assert decode_file(read_file(result.output_path), file_path=result.output_path) == {"aliases": ["api", "gateway"]} + assert read_data_file(result.output_path) == {"aliases": ["api", "gateway"]} def test_data_workflow_missing_file_fails_loudly(tmp_path: Path) -> None: @@ -158,8 +157,8 @@ def test_layered_config_workflow_round_trip(tmp_path: Path) -> None: write_file("config/base.yaml", base_config, tld=tmp_path) write_file("config/dev.yaml", env_config, tld=tmp_path) - base_data = decode_file(read_file("config/base.yaml", tld=tmp_path), file_path="config/base.yaml") - env_data = decode_file(read_file("config/dev.yaml", tld=tmp_path), file_path="config/dev.yaml") + base_data = read_data_file("config/base.yaml", tld=tmp_path) + env_data = read_data_file("config/dev.yaml", tld=tmp_path) merged = base_data.deep_merge(env_data) output_path = write_file("build/config.yaml", merged, tld=tmp_path) @@ -167,7 +166,7 @@ def test_layered_config_workflow_round_trip(tmp_path: Path) -> None: assert isinstance(base_data, ExtendedDict) assert isinstance(merged, ExtendedDict) assert output_path == tmp_path / "build" / "config.yaml" - assert decode_file(read_file(output_path), file_path=output_path) == { + assert read_data_file(output_path) == { "service": {"name": "api", "debug": True}, "ports": [8080, 8081], "features": {"auth": True, "metrics": True}, @@ -212,7 +211,7 @@ def test_api_payload_normalization_workflow_round_trip(tmp_path: Path) -> None: assert output_path == tmp_path / "build" / "payload.json" assert isinstance(normalized, ExtendedDict) - assert decode_file(read_file(output_path), file_path=output_path) == { + assert read_data_file(output_path) == { "http_response_code": 200, "selected_services": ["api", "worker"], "tags": ["api", "docs"], @@ -228,7 +227,7 @@ def test_api_payload_factory_workflow_round_trip(tmp_path: Path) -> None: } raw_path = write_file("build/raw-payload.json", raw_payload, tld=tmp_path) - decoded = decode_file(read_file(raw_path), file_path=raw_path) + decoded = read_data_file(raw_path) normalized = decoded.deduplicate().unhump() output_path = write_file("build/payload.json", normalized, tld=tmp_path) @@ -236,7 +235,7 @@ def test_api_payload_factory_workflow_round_trip(tmp_path: Path) -> None: assert output_path == tmp_path / "build" / "payload.json" assert isinstance(decoded, ExtendedDict) assert isinstance(normalized, ExtendedDict) - assert decode_file(read_file(output_path), file_path=output_path) == { + assert read_data_file(output_path) == { "http_response_code": 200, "selected_services": ["api", "worker"], "tags": ["api", "docs"], @@ -251,7 +250,7 @@ def test_yaml_native_workflow_round_trip(tmp_path: Path) -> None: } output_path = write_file("template.yaml", template, tld=tmp_path) - decoded = decode_file(read_file(output_path), file_path=output_path) + decoded = read_data_file(output_path) assert output_path == tmp_path / "template.yaml" assert isinstance(decoded, ExtendedDict) From 3e5e235d74279161342c93b3b1d8e1124a79d9d7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 09:47:18 -0500 Subject: [PATCH 121/287] test: use tiered file reader in workflows --- examples/core/composed_workflows.py | 4 ++-- tests/core/test_integration_workflows.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index 9e620ab..4db2576 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -15,10 +15,10 @@ ExtendedDict, base64_decode, base64_encode, - decode_file, decode_hcl2, encode_hcl2, filter_list, + read_data_file, read_file, write_file, ) @@ -120,7 +120,7 @@ def demonstrate_yaml_native_workflow() -> None: tld = Path(tmpdir) write_file("template.yaml", template, tld=tld) rendered = read_file("template.yaml", tld=tld) - decoded = decode_file(rendered, file_path="template.yaml") + decoded = read_data_file("template.yaml", tld=tld) print(rendered) print(f"\nDecoded tag: {decoded['bucket_name'].tag}") diff --git a/tests/core/test_integration_workflows.py b/tests/core/test_integration_workflows.py index 04f0aed..6f3b78a 100644 --- a/tests/core/test_integration_workflows.py +++ b/tests/core/test_integration_workflows.py @@ -26,10 +26,10 @@ def test_integration_workflow_serialization_transformation_export(): } edt.write_file(tmp_path, raw_data) - # 2. Read and Decode - content = edt.read_file(tmp_path) - loaded_data = edt.decode_file(content, file_path=tmp_path) + # 2. Read and decode through the Tier 3 file boundary + loaded_data = edt.read_data_file(tmp_path) assert loaded_data == raw_data + assert isinstance(loaded_data, edt.ExtendedDict) # 3. Transform: Convert types and transform strings transformed = { From 58435829a906850ed911a07fcf8ea2834114fbf3 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 09:49:02 -0500 Subject: [PATCH 122/287] test: pin meshy connector payload contracts --- src/extended_data/connectors/meshy/connector.py | 11 ++++++----- tests/connectors/test_connector_payload_contracts.py | 6 ++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/extended_data/connectors/meshy/connector.py b/src/extended_data/connectors/meshy/connector.py index 5e3abd5..9043a36 100644 --- a/src/extended_data/connectors/meshy/connector.py +++ b/src/extended_data/connectors/meshy/connector.py @@ -10,6 +10,7 @@ from extended_data.connectors.base import VendorConnectorBase from extended_data.connectors.meshy import animate, image3d, retexture, rigging, text3d +from extended_data.containers import ExtendedDict, ExtendedString class MeshyConnector(VendorConnectorBase): @@ -38,7 +39,7 @@ def text3d_generate( target_polycount: int = 30000, enable_pbr: bool = True, wait: bool = True, - ) -> Any: + ) -> ExtendedDict | ExtendedString: """Generate a 3D model from text description.""" return text3d.generate( prompt, @@ -56,7 +57,7 @@ def image3d_generate( target_polycount: int = 15000, enable_pbr: bool = True, wait: bool = True, - ) -> Any: + ) -> ExtendedDict | ExtendedString: """Generate a 3D model from an image.""" return image3d.generate( image_url, @@ -66,11 +67,11 @@ def image3d_generate( wait=wait, ) - def rig_model(self, model_id: str, wait: bool = True) -> Any: + def rig_model(self, model_id: str, wait: bool = True) -> ExtendedDict | ExtendedString: """Add skeleton/rig to a static 3D model.""" return rigging.rig(model_id, wait=wait) - def apply_animation(self, model_id: str, animation_id: int, wait: bool = True) -> Any: + def apply_animation(self, model_id: str, animation_id: int, wait: bool = True) -> ExtendedDict | ExtendedString: """Apply animation to a rigged model.""" return animate.apply(model_id, animation_id, wait=wait) @@ -80,7 +81,7 @@ def retexture_model( texture_prompt: str, enable_pbr: bool = True, wait: bool = True, - ) -> Any: + ) -> ExtendedDict | ExtendedString: """Apply new textures to an existing model.""" return retexture.apply( model_id, diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index b863d22..df4e909 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -23,6 +23,7 @@ from extended_data.connectors.google.jules import JulesConnector from extended_data.connectors.google.services import GoogleServicesMixin from extended_data.connectors.google.workspace import GoogleWorkspaceMixin +from extended_data.connectors.meshy.connector import MeshyConnector from extended_data.connectors.slack import SlackConnector from extended_data.connectors.vault import VaultConnector from extended_data.connectors.zoom import ZoomConnector @@ -165,6 +166,11 @@ (JulesConnector.approve_plan, ExtendedDict), (JulesConnector.add_user_response, ExtendedDict), (JulesConnector.resume_session, ExtendedDict), + (MeshyConnector.text3d_generate, ExtendedDict | ExtendedString), + (MeshyConnector.image3d_generate, ExtendedDict | ExtendedString), + (MeshyConnector.rig_model, ExtendedDict | ExtendedString), + (MeshyConnector.apply_animation, ExtendedDict | ExtendedString), + (MeshyConnector.retexture_model, ExtendedDict | ExtendedString), (SlackConnector.get_bot_channels, ExtendedDict), (SlackConnector.list_users, ExtendedDict), (SlackConnector.list_usergroups, ExtendedDict), From 3b449138cd0dd6f17f60348f88c5813c9c5cfcc7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 09:51:13 -0500 Subject: [PATCH 123/287] fix: tighten connector data contracts --- .../connectors/github/__init__.py | 4 +-- .../connectors/slack/__init__.py | 33 ++++++++++++++++--- .../test_connector_payload_contracts.py | 4 ++- tests/connectors/test_slack_connector.py | 33 +++++++++++++++++++ 4 files changed, 67 insertions(+), 7 deletions(-) diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index cb55e45..723eadb 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -19,7 +19,7 @@ ) from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import ExtendedDict +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple from extended_data.logging import Logging @@ -167,7 +167,7 @@ def get_repository_file( charset: str | None = "utf-8", errors: str | None = "strict", raise_on_not_found: bool = False, - ) -> Any: + ) -> ExtendedDict | ExtendedList[Any] | ExtendedString | ExtendedTuple[Any] | None: """Get a file from the repository.""" file_path_text = os.fspath(file_path) if self.repo is None: diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 5c885fc..f916d6a 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -26,7 +26,7 @@ def batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: from extended_data import is_nothing, wrap_raw_data_for_export from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data, to_builtin from extended_data.logging import Logging @@ -68,6 +68,30 @@ def __init__(self, response: Any) -> None: super().__init__(f"Slack API error: {response}") +def _slack_response_payload(response: Any) -> dict[str, Any]: + """Normalize Slack SDK response objects into a serializable payload.""" + if isinstance(response, Mapping): + return dict(response) + + data = getattr(response, "data", None) + if isinstance(data, Mapping): + return dict(data) + + payload: dict[str, Any] = {} + response_get = getattr(response, "get", None) + if callable(response_get): + for key in ("ok", "error", "warning"): + value = response_get(key) + if value is not None: + payload[key] = value + + status_code = getattr(response, "status_code", None) + if status_code is not None: + payload["status_code"] = status_code + + return payload or {"response": str(response)} + + def get_divider() -> ExtendedDict: """Return a Slack divider block. @@ -239,7 +263,7 @@ def send_message( strike: bool = False, thread_id: str | None = None, raise_on_api_error: bool = True, - ) -> Any: + ) -> ExtendedString | ExtendedDict: """Send a message to a Slack channel using the bot token. Args: @@ -254,7 +278,8 @@ def send_message( raise_on_api_error: When True, raise `SlackAPIError` on API failures. Returns: - str | Any: Timestamp string for the posted message or the Slack API response. + Extended timestamp string, or an extended error payload when + `raise_on_api_error=False`. Raises: RuntimeError: If the bot is not a member of the channel. @@ -285,7 +310,7 @@ def send_message( except SlackApiError as exc: if raise_on_api_error: raise SlackAPIError(exc.response) from exc - return exc.response + return self.extend_result(_slack_response_payload(exc.response)) def get_bot_channels(self) -> ExtendedDict: """Return channels the bot account is a member of. diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index df4e909..13375ed 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -27,7 +27,7 @@ from extended_data.connectors.slack import SlackConnector from extended_data.connectors.vault import VaultConnector from extended_data.connectors.zoom import ZoomConnector -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple REPO_ROOT = Path(__file__).resolve().parents[2] @@ -95,6 +95,7 @@ (GitHubConnector.get_repository, ExtendedDict | None), (GitHubConnector.list_teams, ExtendedDict), (GitHubConnector.get_team, ExtendedDict | None), + (GitHubConnector.get_repository_file, ExtendedDict | ExtendedList[Any] | ExtendedString | ExtendedTuple[Any] | None), (GitHubConnector.execute_graphql, ExtendedDict), (GitHubConnector.get_users_with_verified_emails, ExtendedDict), (GitHubConnector.build_workflow, ExtendedDict), @@ -171,6 +172,7 @@ (MeshyConnector.rig_model, ExtendedDict | ExtendedString), (MeshyConnector.apply_animation, ExtendedDict | ExtendedString), (MeshyConnector.retexture_model, ExtendedDict | ExtendedString), + (SlackConnector.send_message, ExtendedString | ExtendedDict), (SlackConnector.get_bot_channels, ExtendedDict), (SlackConnector.list_users, ExtendedDict), (SlackConnector.list_usergroups, ExtendedDict), diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index 7a521a6..8289e40 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -122,6 +122,39 @@ def test_send_message_converts_extended_blocks_for_sdk(self, mock_webclient_clas assert not isinstance(kwargs["blocks"][0], ExtendedDict) assert isinstance(kwargs["channel"], str) + @patch("extended_data.connectors.slack.WebClient") + def test_send_message_non_raising_api_error_returns_extended_payload( + self, + mock_webclient_class, + base_connector_kwargs, + ): + """Non-raising Slack send failures should not leak raw SDK response objects.""" + + class FakeSlackApiError(Exception): + def __init__(self, response): + self.response = response + + mock_bot_client = MagicMock() + mock_bot_client.users_conversations.return_value = {"channels": [{"name": "general", "id": "C12345"}]} + mock_bot_client.chat_postMessage.side_effect = FakeSlackApiError({"ok": False, "error": "channel_not_found"}) + + mock_user_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with patch("extended_data.connectors.slack.SlackApiError", FakeSlackApiError): + result = connector.send_message( + channel_name="general", + text="Test message", + blocks=[], + raise_on_api_error=False, + ) + + assert isinstance(result, ExtendedDict) + assert isinstance(result["error"], ExtendedString) + assert result["error"] == "channel_not_found" + @patch("extended_data.connectors.slack.SlackConnector._call_api") @patch("extended_data.connectors.slack.WebClient") def test_list_users_filters_deleted( From 63a34bcc86c2270c65a97cf3f194adee77548aac Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:03:03 -0500 Subject: [PATCH 124/287] fix: limit mcp tools to data payload methods --- README.md | 2 + docs/package-surface.md | 3 ++ src/extended_data/connectors/mcp.py | 57 +++++++++++++++++++++++++---- tests/connectors/test_mcp.py | 19 +++++++++- 4 files changed, 71 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f8b9a28..83b34b0 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,8 @@ use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. Data-returning AI tool wrappers expose the same `ExtendedDict`/`ExtendedList` payload contract; framework factory functions still return framework tool objects. +The generic MCP bridge exposes only methods that advertise Extended Data +payload returns. Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. diff --git a/docs/package-surface.md b/docs/package-surface.md index 4625fdb..8798bd8 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -211,6 +211,9 @@ first-class `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and handoff boundaries. Data-returning AI tool wrapper functions follow the same contract and annotate their payload returns as `ExtendedDict` or `ExtendedList[ExtendedDict]`. +The generic MCP bridge exposes only connector methods that advertise Extended +Data payload returns, so raw SDK client factories and low-level HTTP helpers do +not leak into MCP tool catalogs. LangChain, CrewAI, Strands, and auto-detection factory functions still return plain framework tool object lists. diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 8eeb937..818babe 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -1,7 +1,7 @@ """Unified MCP Server for Extended Data Connectors. This module provides a single MCP (Model Context Protocol) server that -exposes ALL extended data connectors as tools via the registry. +exposes registered connector data methods as tools via the registry. Usage: # Command line @@ -12,10 +12,10 @@ server = create_server() The server automatically discovers all registered connectors and exposes -their public methods as MCP tools. +methods that advertise Extended Data payload returns as MCP tools. This provides a standard MCP bridge between Python connectors and any MCP-aware -client with zero custom glue code - just standard MCP over stdio. +client without leaking raw SDK client factories or low-level HTTP helpers. """ from __future__ import annotations @@ -26,10 +26,13 @@ import sys from collections.abc import Callable, Iterable, Mapping -from typing import Any, cast +from typing import Any, cast, get_args, get_origin, get_type_hints from extended_data.connectors.registry import get_connector, list_connectors -from extended_data.containers import to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, ExtendedTuple, to_builtin + + +_EXTENDED_PAYLOAD_TYPES = (ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, ExtendedTuple) def _check_mcp_installed() -> bool: @@ -92,17 +95,55 @@ def _get_method_schema(method: Callable[..., Any]) -> dict[str, Any]: def _get_public_methods(connector_class: builtins.type[Any]) -> list[tuple[str, Callable[..., Any]]]: - """Get public methods from a connector class (excluding dunder and private).""" + """Get public data methods from a connector class for MCP exposure.""" methods = [] for name in dir(connector_class): if name.startswith("_"): continue attr = getattr(connector_class, name, None) - if callable(attr) and not isinstance(attr, builtins.type): - methods.append((name, attr)) + if _is_mcp_data_method(attr): + methods.append((name, cast(Callable[..., Any], attr))) return methods +def _is_mcp_data_method(method: Any) -> bool: + """Return True when a public callable advertises an Extended Data payload.""" + if not callable(method) or isinstance(method, builtins.type): + return False + + qualname = getattr(method, "__qualname__", "") + if qualname.startswith(("VendorConnectorBase.", "InputProvider.")): + return False + + return _annotation_includes_extended_payload(_return_annotation(method)) + + +def _return_annotation(method: Callable[..., Any]) -> Any: + """Resolve a method return annotation without failing on optional imports.""" + try: + return get_type_hints(method).get("return") + except Exception: + return getattr(method, "__annotations__", {}).get("return") + + +def _annotation_includes_extended_payload(annotation: Any) -> bool: + """Return True when an annotation includes a Tier 2 container type.""" + if annotation is None: + return False + + if isinstance(annotation, str): + return any(payload_type.__name__ in annotation for payload_type in _EXTENDED_PAYLOAD_TYPES) + + if annotation in _EXTENDED_PAYLOAD_TYPES: + return True + + origin = get_origin(annotation) + if origin in _EXTENDED_PAYLOAD_TYPES: + return True + + return any(_annotation_includes_extended_payload(arg) for arg in get_args(annotation)) + + def _jsonable_tool_result(result: Any) -> Any: """Lower connector tool results to JSON-compatible Python data.""" if hasattr(result, "model_dump"): diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index f4154f7..9992306 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -4,11 +4,12 @@ import pytest -from extended_data.connectors.mcp import _jsonable_tool_result, create_server +from extended_data.connectors.mcp import _get_public_methods, _jsonable_tool_result, create_server +from extended_data.connectors.meshy.connector import MeshyConnector from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet -def test_create_server(): +def test_create_server() -> None: """Test that the MCP server can be created and has tools.""" pytest.importorskip("mcp") server = create_server() @@ -17,6 +18,20 @@ def test_create_server(): assert server is not None +def test_mcp_public_methods_only_include_extended_payload_boundaries() -> None: + """Generic MCP exposure should skip raw clients and inherited base helpers.""" + method_names = {name for name, _ in _get_public_methods(MeshyConnector)} + + assert "text3d_generate" in method_names + assert "image3d_generate" in method_names + assert "request_data" not in method_names + assert "decode_response" not in method_names + assert "get_ai_tool_definitions" not in method_names + assert "freeze_inputs" not in method_names + assert "merge_inputs" not in method_names + assert "replace_inputs" not in method_names + + def test_jsonable_tool_result_lowers_extended_mapping_payloads() -> None: """MCP result serialization keeps Tier 2 mapping payloads as JSON objects.""" payload = ExtendedDict({"service": {"name": "api"}}) From 306f421c7ec77ad70621049615d2a65b4e62db0b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:08:57 -0500 Subject: [PATCH 125/287] fix: restrict cli calls to data methods --- README.md | 4 +- docs/package-surface.md | 6 +- src/extended_data/connectors/cli.py | 17 ++--- src/extended_data/connectors/mcp.py | 55 ++-------------- src/extended_data/connectors/surface.py | 63 +++++++++++++++++++ tests/connectors/test_cli.py | 50 +++++++++++---- .../test_connector_payload_contracts.py | 1 + 7 files changed, 120 insertions(+), 76 deletions(-) create mode 100644 src/extended_data/connectors/surface.py diff --git a/README.md b/README.md index 83b34b0..9b544b6 100644 --- a/README.md +++ b/README.md @@ -106,8 +106,8 @@ use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. Data-returning AI tool wrappers expose the same `ExtendedDict`/`ExtendedList` payload contract; framework factory functions still return framework tool objects. -The generic MCP bridge exposes only methods that advertise Extended Data -payload returns. +The generic CLI `call` command and MCP bridge expose only methods that +advertise Extended Data payload returns. Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. diff --git a/docs/package-surface.md b/docs/package-surface.md index 8798bd8..77acf32 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -211,9 +211,9 @@ first-class `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and handoff boundaries. Data-returning AI tool wrapper functions follow the same contract and annotate their payload returns as `ExtendedDict` or `ExtendedList[ExtendedDict]`. -The generic MCP bridge exposes only connector methods that advertise Extended -Data payload returns, so raw SDK client factories and low-level HTTP helpers do -not leak into MCP tool catalogs. +The generic CLI `call` command and MCP bridge expose only connector methods +that advertise Extended Data payload returns, so raw SDK client factories and +low-level HTTP helpers do not leak into serialized tool catalogs. LangChain, CrewAI, Strands, and auto-detection factory functions still return plain framework tool object lists. diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index 9eb213d..dc8ecd5 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -7,7 +7,7 @@ # List available connectors extended-data list - # Call any connector method + # Call any connector data method extended-data call [--arg value ...] # Start MCP server @@ -29,6 +29,7 @@ get_connector_info, list_connector_info, ) +from extended_data.connectors.surface import connector_data_methods, is_connector_data_method from extended_data.containers import ExtendedList from extended_data.containers.factory import to_builtin @@ -142,6 +143,12 @@ def cmd_call(args: argparse.Namespace) -> int: i += 1 try: + cls = get_connector_class(connector_name) + class_method = getattr(cls, method_name, None) + if not is_connector_data_method(class_method): + _write_stderr(f"Connector {connector_name!r} has no exposed data method {method_name!r}") + return 1 + connector = get_connector(connector_name) method = getattr(connector, method_name, None) @@ -175,13 +182,7 @@ def cmd_methods(args: argparse.Namespace) -> int: return 1 methods: list[dict[str, str]] = [] - for name in sorted(dir(cls)): - if name.startswith("_"): - continue - attr = getattr(cls, name, None) - if not callable(attr) or isinstance(attr, type): - continue - + for name, attr in connector_data_methods(cls): doc = attr.__doc__.split("\n")[0].strip()[:50] if attr.__doc__ else "No description" methods.append({"name": name, "description": doc}) diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 818babe..9552b49 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -26,13 +26,11 @@ import sys from collections.abc import Callable, Iterable, Mapping -from typing import Any, cast, get_args, get_origin, get_type_hints +from typing import Any, cast from extended_data.connectors.registry import get_connector, list_connectors -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, ExtendedTuple, to_builtin - - -_EXTENDED_PAYLOAD_TYPES = (ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, ExtendedTuple) +from extended_data.connectors.surface import connector_data_methods +from extended_data.containers import to_builtin def _check_mcp_installed() -> bool: @@ -96,52 +94,7 @@ def _get_method_schema(method: Callable[..., Any]) -> dict[str, Any]: def _get_public_methods(connector_class: builtins.type[Any]) -> list[tuple[str, Callable[..., Any]]]: """Get public data methods from a connector class for MCP exposure.""" - methods = [] - for name in dir(connector_class): - if name.startswith("_"): - continue - attr = getattr(connector_class, name, None) - if _is_mcp_data_method(attr): - methods.append((name, cast(Callable[..., Any], attr))) - return methods - - -def _is_mcp_data_method(method: Any) -> bool: - """Return True when a public callable advertises an Extended Data payload.""" - if not callable(method) or isinstance(method, builtins.type): - return False - - qualname = getattr(method, "__qualname__", "") - if qualname.startswith(("VendorConnectorBase.", "InputProvider.")): - return False - - return _annotation_includes_extended_payload(_return_annotation(method)) - - -def _return_annotation(method: Callable[..., Any]) -> Any: - """Resolve a method return annotation without failing on optional imports.""" - try: - return get_type_hints(method).get("return") - except Exception: - return getattr(method, "__annotations__", {}).get("return") - - -def _annotation_includes_extended_payload(annotation: Any) -> bool: - """Return True when an annotation includes a Tier 2 container type.""" - if annotation is None: - return False - - if isinstance(annotation, str): - return any(payload_type.__name__ in annotation for payload_type in _EXTENDED_PAYLOAD_TYPES) - - if annotation in _EXTENDED_PAYLOAD_TYPES: - return True - - origin = get_origin(annotation) - if origin in _EXTENDED_PAYLOAD_TYPES: - return True - - return any(_annotation_includes_extended_payload(arg) for arg in get_args(annotation)) + return connector_data_methods(connector_class) def _jsonable_tool_result(result: Any) -> Any: diff --git a/src/extended_data/connectors/surface.py b/src/extended_data/connectors/surface.py new file mode 100644 index 0000000..6e722e8 --- /dev/null +++ b/src/extended_data/connectors/surface.py @@ -0,0 +1,63 @@ +"""Public connector data-surface helpers.""" + +from __future__ import annotations + +import builtins + +from collections.abc import Callable +from typing import Any, cast, get_args, get_origin, get_type_hints + +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, ExtendedTuple + + +EXTENDED_PAYLOAD_TYPES = (ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, ExtendedTuple) + + +def connector_data_methods(connector_class: builtins.type[Any]) -> list[tuple[str, Callable[..., Any]]]: + """Return public connector methods that advertise Extended Data payloads.""" + methods: list[tuple[str, Callable[..., Any]]] = [] + for name in dir(connector_class): + if name.startswith("_"): + continue + attr = getattr(connector_class, name, None) + if is_connector_data_method(attr): + methods.append((name, cast(Callable[..., Any], attr))) + return methods + + +def is_connector_data_method(method: Any) -> bool: + """Return True when a callable belongs to the public data payload surface.""" + if not callable(method) or isinstance(method, builtins.type): + return False + + qualname = getattr(method, "__qualname__", "") + if qualname.startswith(("VendorConnectorBase.", "InputProvider.")): + return False + + return annotation_includes_extended_payload(return_annotation(method)) + + +def return_annotation(method: Callable[..., Any]) -> Any: + """Resolve a callable return annotation without failing on optional imports.""" + try: + return get_type_hints(method).get("return") + except Exception: + return getattr(method, "__annotations__", {}).get("return") + + +def annotation_includes_extended_payload(annotation: Any) -> bool: + """Return True when an annotation includes a Tier 2 container type.""" + if annotation is None: + return False + + if isinstance(annotation, str): + return any(payload_type.__name__ in annotation for payload_type in EXTENDED_PAYLOAD_TYPES) + + if annotation in EXTENDED_PAYLOAD_TYPES: + return True + + origin = get_origin(annotation) + if origin in EXTENDED_PAYLOAD_TYPES: + return True + + return any(annotation_includes_extended_payload(arg) for arg in get_args(annotation)) diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 3b50116..1ffa5de 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -13,7 +13,15 @@ from extended_data.containers import ExtendedDict -def test_cli_list(): +class ExampleConnector: + """Tiny connector shell for CLI call-surface tests.""" + + def fetch(self, enabled: bool = False, count: int = 0) -> ExtendedDict: + """Fetch example data.""" + return ExtendedDict({"enabled": enabled, "count": count}) + + +def test_cli_list() -> None: """Test the list command.""" args = argparse.Namespace(json=False, available_only=False) with patch("sys.stdout.write") as mock_write: @@ -26,7 +34,7 @@ def test_cli_list(): assert "google" in output -def test_cli_list_json(): +def test_cli_list_json() -> None: """List command can emit machine-readable connector metadata.""" args = argparse.Namespace(json=True, available_only=False) with patch("sys.stdout.write") as mock_write: @@ -39,7 +47,7 @@ def test_cli_list_json(): assert "api_key_env" not in output -def test_cli_info(): +def test_cli_info() -> None: """Info command prints connector metadata.""" args = argparse.Namespace(connector=" github ", json=False) with patch("sys.stdout.write") as mock_write: @@ -51,28 +59,30 @@ def test_cli_info(): assert "install: pip install extended-data[github]" in output -def test_cli_methods_lists_public_methods(): - """Methods command prints public callable methods with descriptions.""" +def test_cli_methods_lists_public_methods() -> None: + """Methods command prints public data methods with descriptions.""" args = argparse.Namespace(connector="meshy") with patch("sys.stdout.write") as mock_write: exit_code = cmd_methods(args) assert exit_code == 0 output = "".join(call.args[0] for call in mock_write.call_args_list if call.args) - assert "request_data" in output - assert "Decode an HTTP response body" in output + assert "text3d_generate" in output + assert "request_data" not in output + assert "decode_response" not in output def test_cli_methods_json_lists_public_methods() -> None: - """Methods command can emit machine-readable method metadata.""" + """Methods command can emit machine-readable data-method metadata.""" args = argparse.Namespace(connector="meshy", json=True) with patch("sys.stdout.write") as mock_write: exit_code = cmd_methods(args) assert exit_code == 0 methods = json.loads(mock_write.call_args.args[0]) - decode_response = next(method for method in methods if method["name"] == "decode_response") - assert decode_response["description"].startswith("Decode an HTTP response body") + method_names = {method["name"] for method in methods} + assert "text3d_generate" in method_names + assert "request_data" not in method_names def test_cli_call_parses_dynamic_keyword_arguments() -> None: @@ -82,6 +92,7 @@ def test_cli_call_parses_dynamic_keyword_arguments() -> None: with ( patch("sys.argv", ["extended-data", "call", "example", "fetch", "--enabled", "true", "--count", "3"]), + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), patch("extended_data.connectors.cli.get_connector", return_value=connector), patch("sys.stdout.write") as mock_write, ): @@ -100,6 +111,7 @@ def test_cli_call_accepts_json_flag_after_method() -> None: args = argparse.Namespace(connector="example", method="fetch", extra=["--json"], json=False) with ( + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), patch("extended_data.connectors.cli.get_connector", return_value=connector), patch("sys.stdout.write") as mock_write, ): @@ -117,6 +129,7 @@ def test_cli_call_serializes_extended_containers_as_data() -> None: args = argparse.Namespace(connector="example", method="fetch", extra=[], json=True) with ( + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), patch("extended_data.connectors.cli.get_connector", return_value=connector), patch("sys.stdout.write") as mock_write, ): @@ -132,13 +145,25 @@ def test_cli_call_reports_missing_method() -> None: connector = object() with ( + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), patch("extended_data.connectors.cli.get_connector", return_value=connector), patch("sys.stderr.write") as mock_write, ): exit_code = cmd_call(args) assert exit_code == 1 - assert "has no callable method" in mock_write.call_args.args[0] + assert "has no exposed data method" in mock_write.call_args.args[0] + + +def test_cli_call_rejects_raw_connector_helpers() -> None: + """Call command should not expose raw/base helpers at the serialization boundary.""" + args = argparse.Namespace(connector="meshy", method="request_data", extra=[], json=False) + + with patch("sys.stderr.write") as mock_write: + exit_code = cmd_call(args) + + assert exit_code == 1 + assert "has no exposed data method" in mock_write.call_args.args[0] def test_cli_call_reports_connector_errors() -> None: @@ -146,6 +171,7 @@ def test_cli_call_reports_connector_errors() -> None: args = argparse.Namespace(connector="example", method="fetch", extra=[], json=False) with ( + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), patch("extended_data.connectors.cli.get_connector", side_effect=RuntimeError("boom")), patch("sys.stderr.write") as mock_write, ): @@ -155,7 +181,7 @@ def test_cli_call_reports_connector_errors() -> None: assert "boom" in mock_write.call_args.args[0] -def test_cli_main_help(): +def test_cli_main_help() -> None: """Test main CLI entry point with help.""" with patch("sys.argv", ["extended-data", "--help"]): with pytest.raises(SystemExit) as exc: diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 13375ed..2008bb2 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -195,6 +195,7 @@ ("src/extended_data/connectors/base.py", "VendorConnectorBase.get_tools"), ("src/extended_data/connectors/connectors.py", "ConnectorFabric.list_connectors"), ("src/extended_data/connectors/registry.py", "list_connectors"), + ("src/extended_data/connectors/surface.py", "connector_data_methods"), ("src/extended_data/connectors/zoom/__init__.py", "ZoomConnector.get_headers"), } From 21c34487416676e565b69c5bef583e023bdf1d31 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:13:09 -0500 Subject: [PATCH 126/287] docs: clarify connector cli data methods --- src/extended_data/connectors/cli.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index dc8ecd5..d8a5aff 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -116,7 +116,7 @@ def cmd_list(args: argparse.Namespace) -> int: def cmd_call(args: argparse.Namespace) -> int: - """Call a connector method.""" + """Call a connector data method.""" connector_name = args.connector method_name = args.method @@ -172,7 +172,7 @@ def cmd_call(args: argparse.Namespace) -> int: def cmd_methods(args: argparse.Namespace) -> int: - """List methods for a connector.""" + """List connector data methods.""" connector_name = args.connector try: @@ -250,7 +250,7 @@ def main() -> int: epilog=""" Examples: extended-data list # List all connectors - extended-data methods jules # List Jules methods + extended-data methods jules # List Jules data methods extended-data call jules list_sources # Call a method extended-data call cursor list_agents extended-data mcp # Start MCP server @@ -265,7 +265,7 @@ def main() -> int: list_parser.set_defaults(func=cmd_list) # Methods command - methods_parser = subparsers.add_parser("methods", help="List methods for a connector") + methods_parser = subparsers.add_parser("methods", help="List connector data methods") methods_parser.add_argument("connector", help="Connector name") methods_parser.add_argument("--json", action="store_true", help="JSON output") methods_parser.set_defaults(func=cmd_methods) @@ -277,7 +277,7 @@ def main() -> int: info_parser.set_defaults(func=cmd_info) # Call command - call_parser = subparsers.add_parser("call", help="Call a connector method") + call_parser = subparsers.add_parser("call", help="Call a connector data method") call_parser.add_argument("--json", action="store_true", help="JSON output") call_parser.add_argument("connector", help="Connector name") call_parser.add_argument("method", help="Method name") From 88c4474af726557a21bc86424c5e9e6d18089d1b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:18:32 -0500 Subject: [PATCH 127/287] test: harden connector data surface contract --- .../test_connector_payload_contracts.py | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 2008bb2..e8fcc38 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -9,12 +9,15 @@ import pytest +import extended_data.connectors as connector_exports + from extended_data.connectors.anthropic import AnthropicConnector from extended_data.connectors.aws import AWSConnector from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments from extended_data.connectors.aws.organizations import AWSOrganizationsMixin from extended_data.connectors.aws.s3 import AWSS3Mixin from extended_data.connectors.aws.sso import AWSSSOmixin +from extended_data.connectors.base import VendorConnectorBase from extended_data.connectors.cursor import CursorConnector from extended_data.connectors.github import GitHubConnector from extended_data.connectors.google import GoogleConnector @@ -24,10 +27,13 @@ from extended_data.connectors.google.services import GoogleServicesMixin from extended_data.connectors.google.workspace import GoogleWorkspaceMixin from extended_data.connectors.meshy.connector import MeshyConnector +from extended_data.connectors.registry import BUILTIN_CONNECTORS from extended_data.connectors.slack import SlackConnector +from extended_data.connectors.surface import connector_data_methods, is_connector_data_method from extended_data.connectors.vault import VaultConnector from extended_data.connectors.zoom import ZoomConnector from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple +from extended_data.inputs import InputProvider REPO_ROOT = Path(__file__).resolve().parents[2] @@ -199,6 +205,58 @@ ("src/extended_data/connectors/zoom/__init__.py", "ZoomConnector.get_headers"), } +RAW_DATA_SURFACE_METHOD_NAMES = { + "close", + "delete", + "delete_data", + "download", + "extend_result", + "freeze_inputs", + "get", + "get_ai_tool_definitions", + "get_data", + "get_input", + "get_tools", + "handle_ai_tool_call", + "merge_inputs", + "patch", + "patch_data", + "post", + "post_data", + "put", + "put_data", + "replace_inputs", + "request", + "request_data", + "snapshot_inputs", +} + +RAW_DATA_SURFACE_METHODS = ( + VendorConnectorBase.close, + VendorConnectorBase.delete, + VendorConnectorBase.delete_data, + VendorConnectorBase.download, + VendorConnectorBase.extend_result, + VendorConnectorBase.get, + VendorConnectorBase.get_ai_tool_definitions, + VendorConnectorBase.get_data, + VendorConnectorBase.get_tools, + VendorConnectorBase.handle_ai_tool_call, + VendorConnectorBase.patch, + VendorConnectorBase.patch_data, + VendorConnectorBase.post, + VendorConnectorBase.post_data, + VendorConnectorBase.put, + VendorConnectorBase.put_data, + VendorConnectorBase.request, + VendorConnectorBase.request_data, + InputProvider.freeze_inputs, + InputProvider.get_input, + InputProvider.merge_inputs, + InputProvider.replace_inputs, + InputProvider.snapshot_inputs, +) + class _RawContainerReturnVisitor(ast.NodeVisitor): def __init__(self, relative_path: str) -> None: @@ -253,6 +311,36 @@ def test_direct_connector_methods_advertise_extended_payloads(method: object, ex assert return_type == expected_return +@pytest.mark.parametrize(("method", "expected_return"), PAYLOAD_METHODS) +def test_payload_methods_are_accepted_by_connector_data_surface(method: object, expected_return: object) -> None: + """Every annotated connector payload method should be eligible for data-surface exposure.""" + assert is_connector_data_method(method), f"{method!r} -> {expected_return!r}" + + +@pytest.mark.parametrize("method", RAW_DATA_SURFACE_METHODS) +def test_inherited_transport_and_input_helpers_are_not_data_surface_methods(method: object) -> None: + """Inherited raw helpers should stay out of CLI and MCP data surfaces.""" + assert not is_connector_data_method(method), getattr(method, "__qualname__", repr(method)) + + +def test_builtin_connector_data_surfaces_do_not_expose_raw_helpers() -> None: + """Built-in connector CLI/MCP surfaces should expose payload methods, not fabric plumbing.""" + offenders: dict[str, list[str]] = {} + empty_surfaces: list[str] = [] + + for name, spec in BUILTIN_CONNECTORS.items(): + connector_class = getattr(connector_exports, spec.class_name) + method_names = {method_name for method_name, _ in connector_data_methods(connector_class)} + leaked = sorted(method_names & RAW_DATA_SURFACE_METHOD_NAMES) + if leaked: + offenders[name] = leaked + if not method_names: + empty_surfaces.append(name) + + assert offenders == {} + assert empty_surfaces == [] + + def test_raw_connector_container_returns_are_explicit_boundaries() -> None: """Public connector payloads should not drift back to plain dict/list returns.""" offenders: list[str] = [] From e5b819ee7ec43ba1b90f7b54c87ee3d69d0bb832 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:21:33 -0500 Subject: [PATCH 128/287] refactor: remove prefix suffix compatibility helpers --- examples/core/basic_usage.py | 9 ++-- src/extended_data/__init__.py | 4 -- src/extended_data/containers/strings.py | 6 +-- src/extended_data/primitives/__init__.py | 4 -- src/extended_data/primitives/strings.py | 30 ----------- src/extended_data/primitives/types.py | 3 +- tests/core/test_package_surface.py | 4 ++ tests/core/test_string_data_type.py | 66 ------------------------ 8 files changed, 11 insertions(+), 115 deletions(-) diff --git a/examples/core/basic_usage.py b/examples/core/basic_usage.py index 99504a5..18742b0 100644 --- a/examples/core/basic_usage.py +++ b/examples/core/basic_usage.py @@ -4,6 +4,7 @@ from __future__ import annotations from extended_data import ( + ExtendedString, all_non_empty, any_non_empty, deep_merge, @@ -13,8 +14,6 @@ flatten_list, flatten_map, is_nothing, - removeprefix, - removesuffix, sanitize_key, truncate, ) @@ -60,9 +59,9 @@ def demonstrate_map_utilities() -> None: def demonstrate_string_utilities() -> None: """Demonstrate basic string cleanup helpers.""" print("\n=== String Utilities ===") - text = "prefix_content_suffix" - print("Remove prefix:", removeprefix(text, "prefix_")) - print("Remove suffix:", removesuffix(text, "_suffix")) + text = ExtendedString("prefix_content_suffix") + print("Remove prefix:", text.remove_prefix("prefix_")) + print("Remove suffix:", text.remove_suffix("_suffix")) print("Truncate:", truncate("This value is intentionally too long", 20)) print("Sanitize key:", sanitize_key("User Name (Primary)")) diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 3a3cd64..1818242 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -105,8 +105,6 @@ from extended_data.primitives.strings import ( bytestostr, lower_first_char, - removeprefix, - removesuffix, sanitize_key, titleize_name, truncate, @@ -322,8 +320,6 @@ def __getattr__(name: str) -> Any: "read_file", "reconstruct_special_type", "reconstruct_special_types", - "removeprefix", - "removesuffix", "resolve_local_path", "sanitize_key", "singularize", diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index 03e6f55..81b1806 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -20,8 +20,6 @@ from extended_data.primitives.strings import ( is_url, lower_first_char, - removeprefix, - removesuffix, sanitize_key, titleize_name, truncate, @@ -52,11 +50,11 @@ def upper_first(self) -> ExtendedString: def remove_prefix(self, prefix: str) -> ExtendedString: """Return a copy with a leading prefix removed.""" - return ExtendedString(removeprefix(self.data, prefix)) + return ExtendedString(self.data.removeprefix(str(prefix))) def remove_suffix(self, suffix: str) -> ExtendedString: """Return a copy with a trailing suffix removed.""" - return ExtendedString(removesuffix(self.data, suffix)) + return ExtendedString(self.data.removesuffix(str(suffix))) def sanitize(self, delim: str = "_") -> ExtendedString: """Return a key-safe copy.""" diff --git a/src/extended_data/primitives/__init__.py b/src/extended_data/primitives/__init__.py index 7487fd1..45aae19 100644 --- a/src/extended_data/primitives/__init__.py +++ b/src/extended_data/primitives/__init__.py @@ -67,8 +67,6 @@ from extended_data.primitives.strings import ( bytestostr, lower_first_char, - removeprefix, - removesuffix, sanitize_key, titleize_name, truncate, @@ -145,8 +143,6 @@ "pluralize", "reconstruct_special_type", "reconstruct_special_types", - "removeprefix", - "removesuffix", "sanitize_key", "singularize", "split_dict_by_type", diff --git a/src/extended_data/primitives/strings.py b/src/extended_data/primitives/strings.py index 6ac2b8f..1feaf30 100644 --- a/src/extended_data/primitives/strings.py +++ b/src/extended_data/primitives/strings.py @@ -128,33 +128,3 @@ def titleize_name(name: str) -> str: str: The TitleCase name. """ return inflection.titleize(inflection.underscore(str(name))) - - -def removeprefix(string: str, prefix: str) -> str: - """Removes the specified prefix from the string if present. - - Args: - string (str): The string from which to remove the prefix. - prefix (str): The prefix to remove. - - Returns: - str: The string with the prefix removed if it was present, otherwise the original string. - """ - return str(string).removeprefix(str(prefix)) - - -def removesuffix(string: str, suffix: str) -> str: - """Removes the specified suffix from the string if present. - - Args: - string (str): The string from which to remove the suffix. - suffix (str): The suffix to remove. - - Returns: - str: The string with the suffix removed if it was present, otherwise the original string. - """ - string = str(string) - suffix = str(suffix) - if not suffix: - return string - return string.removesuffix(suffix) diff --git a/src/extended_data/primitives/types.py b/src/extended_data/primitives/types.py index 78d3ca3..34649f1 100644 --- a/src/extended_data/primitives/types.py +++ b/src/extended_data/primitives/types.py @@ -47,7 +47,6 @@ from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.formats.json import decode_json from extended_data.primitives.formats.yaml import YamlPairs, YamlTagged, decode_yaml -from extended_data.primitives.strings import removesuffix # Patterns for matching date, datetime, and time strings @@ -366,7 +365,7 @@ def convert_special_type(obj: Any) -> Any: return [convert_special_types(v) for v in obj] if isinstance(obj, (datetime.date, datetime.datetime)): - return removesuffix(obj.isoformat(), "+00:00") + return obj.isoformat().removesuffix("+00:00") if isinstance(obj, pathlib.Path): return str(obj) if isinstance(obj, (int, float, str, bool, type(None), UserString)): diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index fb1c69c..c1f3798 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -79,6 +79,10 @@ def test_clean_major_version_public_names() -> None: assert connectors.ConnectorFabric is ConnectorFabric assert not hasattr(inputs, "DirectedInputsClass") assert not hasattr(connectors, "VendorConnectors") + assert not hasattr(primitives, "removeprefix") + assert not hasattr(primitives, "removesuffix") + assert not hasattr(extended_data, "removeprefix") + assert not hasattr(extended_data, "removesuffix") def test_old_monorepo_import_namespaces_are_not_preserved() -> None: diff --git a/tests/core/test_string_data_type.py b/tests/core/test_string_data_type.py index e855157..341fc49 100644 --- a/tests/core/test_string_data_type.py +++ b/tests/core/test_string_data_type.py @@ -20,8 +20,6 @@ - `valid_path_data`: Provides valid input values and expected results for testing path conversion. - `invalid_path_data`: Provides invalid inputs and expected exceptions for testing path conversion with errors. - `silent_invalid_path_data`: Provides invalid inputs for testing path conversion when errors are silenced. - - `removeprefix_data`: Provides input strings, prefixes, and expected results for testing prefix removal. - - `removesuffix_data`: Provides input strings, suffixes, and expected results for testing suffix removal. ### Test Functions The module contains the following test functions: @@ -37,8 +35,6 @@ - `test_strtopath`: Tests converting valid inputs into pathlib.Path objects. - `test_strtopath_invalid`: Tests handling invalid path inputs that should raise exceptions. - `test_strtopath_invalid_silent`: Tests handling invalid path inputs when errors are silenced. - - `test_removeprefix`: Tests removing a prefix from a string. - - `test_removesuffix`: Tests removing a suffix from a string. """ from __future__ import annotations @@ -52,8 +48,6 @@ bytestostr, is_url, lower_first_char, - removeprefix, - removesuffix, sanitize_key, titleize_name, truncate, @@ -141,38 +135,6 @@ def titleize_name_data(request: Any) -> tuple[str, str]: return request.param -@pytest.fixture( - params=[ - ("test_string", "test_", "string"), - ("string", "test_", "string"), - ("test_string", "", "test_string"), - ] -) -def removeprefix_data(request: Any) -> tuple[str, str, str]: - """Provides data for testing removeprefix function. - - Yields: - tuple[str, str, str]: A tuple containing the input string, prefix, and expected result. - """ - return request.param - - -@pytest.fixture( - params=[ - ("test_string", "_string", "test"), - ("test", "_string", "test"), - ("test_string", "", "test_string"), - ] -) -def removesuffix_data(request: Any) -> tuple[str, str, str]: - """Provides data for testing removesuffix function. - - Yields: - tuple[str, str, str]: A tuple containing the input string, suffix, and expected result. - """ - return request.param - - @pytest.mark.parametrize( ("input_value", "expected_output"), [ @@ -294,32 +256,6 @@ def test_titleize_name(titleize_name_data: tuple[str, str]) -> None: assert titleize_name(name) == expected -def test_removeprefix(removeprefix_data: tuple[str, str, str]) -> None: - """Tests removing a prefix from a string. - - Args: - removeprefix_data (tuple[str, str, str]): A fixture providing the input string, prefix, and expected result. - - Asserts: - The result of removeprefix matches the expected string with the prefix removed. - """ - string, prefix, expected = removeprefix_data - assert removeprefix(string, prefix) == expected - - -def test_removesuffix(removesuffix_data: tuple[str, str, str]) -> None: - """Tests removing a suffix from a string. - - Args: - removesuffix_data (tuple[str, str, str]): A fixture providing the input string, suffix, and expected result. - - Asserts: - The result of removesuffix matches the expected string with the suffix removed. - """ - string, suffix, expected = removesuffix_data - assert removesuffix(string, suffix) == expected - - def test_string_utilities_accept_extended_string_values() -> None: """Tier 1 string utilities compose with Tier 2 ExtendedString values.""" value = ExtendedString("helloWorld") @@ -330,5 +266,3 @@ def test_string_utilities_accept_extended_string_values() -> None: assert upper_first_char(ExtendedString("hello")) == "Hello" assert is_url(ExtendedString("https://example.com")) assert titleize_name(value) == "Hello World" - assert removeprefix(ExtendedString("pre_value"), ExtendedString("pre_")) == "value" - assert removesuffix(ExtendedString("value_suffix"), ExtendedString("_suffix")) == "value" From 98de6811f330703bf1d27ef4265d701c549a6625 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:23:02 -0500 Subject: [PATCH 129/287] test: cover extended string prefix suffix methods --- tests/core/test_containers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index b41d71d..823581b 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -32,6 +32,9 @@ def test_extended_string_chains_primitive_transforms() -> None: ) assert value.to_snake_case().remove_suffix("_value") == "api_response" + assert value.to_snake_case().remove_prefix("api_") == "response_value" + assert ExtendedString("prefix_value").remove_prefix("prefix_") == "value" + assert ExtendedString("value_suffix").remove_suffix("_suffix") == "value" assert value.to_kebab_case() == "api-response-value" assert ExtendedString("1").ordinalize() == "1st" assert ExtendedString("yes").to_bool() is True From 0ba86f87f928093a97fa18c7276f1f3e8697f730 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:25:20 -0500 Subject: [PATCH 130/287] refactor: rename bytes string primitive --- src/extended_data/__init__.py | 4 +-- src/extended_data/io/importers.py | 4 +-- src/extended_data/primitives/__init__.py | 4 +-- src/extended_data/primitives/formats/hcl.py | 4 +-- src/extended_data/primitives/formats/toml.py | 4 +-- .../primitives/formats/yaml/utils.py | 4 +-- src/extended_data/primitives/strings.py | 29 +++++++++---------- tests/core/test_package_surface.py | 2 ++ tests/core/test_string_data_type.py | 18 ++++++------ 9 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 1818242..38cf3cb 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -103,7 +103,7 @@ to_snake_case, ) from extended_data.primitives.strings import ( - bytestostr, + bytes_to_string, lower_first_char, sanitize_key, titleize_name, @@ -254,7 +254,7 @@ def __getattr__(name: str) -> Any: "are_nothing", "base64_decode", "base64_encode", - "bytestostr", + "bytes_to_string", "clone_repository_to_temp", "convert_special_type", "convert_special_types", diff --git a/src/extended_data/io/importers.py b/src/extended_data/io/importers.py index 7538ddf..5c362c6 100644 --- a/src/extended_data/io/importers.py +++ b/src/extended_data/io/importers.py @@ -10,7 +10,7 @@ from extended_data.primitives.formats.toml import decode_toml from extended_data.primitives.formats.yaml import decode_yaml from extended_data.primitives.serialization import normalize_data_encoding -from extended_data.primitives.strings import bytestostr +from extended_data.primitives.strings import bytes_to_string def unwrap_raw_data_from_import( @@ -43,7 +43,7 @@ def unwrap_raw_data_from_import( elif normalized_encoding == "hcl": decoded = decode_hcl2(wrapped_data) elif normalized_encoding == "raw": - decoded = bytestostr(wrapped_data) + decoded = bytes_to_string(wrapped_data) else: error_message = f"Unsupported encoding format: {encoding}" raise ValueError(error_message) diff --git a/src/extended_data/primitives/__init__.py b/src/extended_data/primitives/__init__.py index 45aae19..1e3584b 100644 --- a/src/extended_data/primitives/__init__.py +++ b/src/extended_data/primitives/__init__.py @@ -65,7 +65,7 @@ to_snake_case, ) from extended_data.primitives.strings import ( - bytestostr, + bytes_to_string, lower_first_char, sanitize_key, titleize_name, @@ -99,7 +99,7 @@ "all_values_from_map", "any_non_empty", "are_nothing", - "bytestostr", + "bytes_to_string", "convert_special_type", "convert_special_types", "create_merger", diff --git a/src/extended_data/primitives/formats/hcl.py b/src/extended_data/primitives/formats/hcl.py index 68b9252..340a9cc 100644 --- a/src/extended_data/primitives/formats/hcl.py +++ b/src/extended_data/primitives/formats/hcl.py @@ -15,7 +15,7 @@ from extended_data.primitives.formats._normalization import lower_extended_data from extended_data.primitives.formats.errors import DataDecodeError, invalid_utf8_error -from extended_data.primitives.strings import bytestostr +from extended_data.primitives.strings import bytes_to_string from extended_data.primitives.types import convert_special_types @@ -235,7 +235,7 @@ def decode_hcl2(hcl2_data: str | memoryview | bytes | bytearray) -> Any: UnexpectedToken If the HCL2 data cannot be parsed. """ try: - hcl2_data = bytestostr(hcl2_data) + hcl2_data = bytes_to_string(hcl2_data) except UnicodeDecodeError as exc: raise invalid_utf8_error("HCL2") from exc diff --git a/src/extended_data/primitives/formats/toml.py b/src/extended_data/primitives/formats/toml.py index e46ba5b..08f9676 100644 --- a/src/extended_data/primitives/formats/toml.py +++ b/src/extended_data/primitives/formats/toml.py @@ -11,7 +11,7 @@ from extended_data.primitives.formats._normalization import lower_extended_data from extended_data.primitives.formats.errors import DataDecodeError, invalid_utf8_error -from extended_data.primitives.strings import bytestostr +from extended_data.primitives.strings import bytes_to_string from extended_data.primitives.types import convert_special_types @@ -25,7 +25,7 @@ def decode_toml(toml_data: str | memoryview | bytes | bytearray) -> Any: Any: The decoded Python object with any special types processed. """ try: - toml_data = bytestostr(toml_data) + toml_data = bytes_to_string(toml_data) except UnicodeDecodeError as exc: raise invalid_utf8_error("TOML") from exc try: diff --git a/src/extended_data/primitives/formats/yaml/utils.py b/src/extended_data/primitives/formats/yaml/utils.py index 09e9846..446b099 100644 --- a/src/extended_data/primitives/formats/yaml/utils.py +++ b/src/extended_data/primitives/formats/yaml/utils.py @@ -15,7 +15,7 @@ from extended_data.primitives.formats.yaml.dumpers import PureDumper from extended_data.primitives.formats.yaml.loaders import PureLoader from extended_data.primitives.formats.yaml.tag_classes import YamlPairs, YamlTagged -from extended_data.primitives.strings import bytestostr +from extended_data.primitives.strings import bytes_to_string def decode_yaml(yaml_data: str | memoryview | bytes | bytearray) -> Any: @@ -28,7 +28,7 @@ def decode_yaml(yaml_data: str | memoryview | bytes | bytearray) -> Any: Any: The decoded Python object. """ try: - yaml_data = bytestostr(yaml_data) + yaml_data = bytes_to_string(yaml_data) except UnicodeDecodeError as exc: raise invalid_utf8_error("YAML") from exc try: diff --git a/src/extended_data/primitives/strings.py b/src/extended_data/primitives/strings.py index 1feaf30..895c3df 100644 --- a/src/extended_data/primitives/strings.py +++ b/src/extended_data/primitives/strings.py @@ -12,34 +12,31 @@ import inflection -def bytestostr(bstr: str | memoryview | bytes | bytearray) -> str: - """Converts bytes, memoryview, or bytearray to a UTF-8 decoded string. +def bytes_to_string(value: object) -> str: + """Convert bytes, memoryview, bytearray, or another object to a string. - This function takes an input which could be a string, memoryview, bytes, or bytearray, - and returns the corresponding UTF-8 decoded string. If the input is already a string, - it returns it unchanged. + Bytes-like values are decoded as UTF-8. Strings are returned unchanged and + all other objects use their standard string representation. Args: - bstr (str | memoryview | bytes | bytearray): The input to convert to a string. - Can be a `str`, `memoryview`, `bytes`, or `bytearray`. + value: The value to convert. Returns: - str: The UTF-8 decoded string representation of the input. + The string representation of the input. Raises: UnicodeDecodeError: If the bytes or bytearray cannot be decoded into a valid UTF-8 string. """ - if isinstance(bstr, str): - return bstr + if isinstance(value, str): + return value - if isinstance(bstr, memoryview): - bstr = bstr.tobytes() + if isinstance(value, memoryview): + value = value.tobytes() - if isinstance(bstr, (bytes, bytearray)): - return bstr.decode("utf-8") + if isinstance(value, (bytes, bytearray)): + return value.decode("utf-8") - # This return handles both bytes, bytearray, and memoryview after conversion to bytes - return str(bstr) + return str(value) def sanitize_key(key: str, delim: str = "_") -> str: diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index c1f3798..af0f3ad 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -81,8 +81,10 @@ def test_clean_major_version_public_names() -> None: assert not hasattr(connectors, "VendorConnectors") assert not hasattr(primitives, "removeprefix") assert not hasattr(primitives, "removesuffix") + assert not hasattr(primitives, "bytestostr") assert not hasattr(extended_data, "removeprefix") assert not hasattr(extended_data, "removesuffix") + assert not hasattr(extended_data, "bytestostr") def test_old_monorepo_import_namespaces_are_not_preserved() -> None: diff --git a/tests/core/test_string_data_type.py b/tests/core/test_string_data_type.py index 341fc49..993d79a 100644 --- a/tests/core/test_string_data_type.py +++ b/tests/core/test_string_data_type.py @@ -45,7 +45,7 @@ from extended_data.containers import ExtendedString from extended_data.primitives.strings import ( - bytestostr, + bytes_to_string, is_url, lower_first_char, sanitize_key, @@ -144,7 +144,7 @@ def titleize_name_data(request: Any) -> tuple[str, str]: (memoryview(b"memoryview data"), "memoryview data"), # Memoryview input ], ) -def test_bytestostr(input_value: str | memoryview | bytes | bytearray, expected_output: str) -> None: +def test_bytes_to_string(input_value: str | memoryview | bytes | bytearray, expected_output: str) -> None: """Tests converting various byte-like objects and strings into a UTF-8 decoded string. Args: @@ -152,25 +152,25 @@ def test_bytestostr(input_value: str | memoryview | bytes | bytearray, expected_ expected_output (str): The expected UTF-8 decoded string. Asserts: - The result of bytestostr matches the expected UTF-8 decoded string for valid inputs. + The result of bytes_to_string matches the expected UTF-8 decoded string for valid inputs. """ - assert bytestostr(input_value) == expected_output + assert bytes_to_string(input_value) == expected_output -def test_bytestostr_invalid_bytes() -> None: +def test_bytes_to_string_invalid_bytes() -> None: """Tests handling of invalid byte sequences during conversion to string. Asserts: - The bytestostr function raises a ConversionError when invalid bytes cannot be decoded. + The bytes_to_string function raises a ConversionError when invalid bytes cannot be decoded. """ invalid_bytes = b"\x80invalid" with pytest.raises(UnicodeDecodeError): - bytestostr(invalid_bytes) + bytes_to_string(invalid_bytes) -def test_bytestostr_falls_back_to_string_conversion() -> None: +def test_bytes_to_string_falls_back_to_string_conversion() -> None: """Convert non-bytes objects with a plain string fallback.""" - assert bytestostr(123) == "123" + assert bytes_to_string(123) == "123" def test_sanitize_key(test_key: str, sanitized_key: str) -> None: From b4f88cdf07b3214ba2f2fd7436800ef9b06f8249 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:28:20 -0500 Subject: [PATCH 131/287] refactor: rename string conversion primitives --- src/extended_data/__init__.py | 28 +-- src/extended_data/containers/strings.py | 4 +- src/extended_data/inputs/__main__.py | 20 ++- src/extended_data/io/exporters.py | 4 +- src/extended_data/logging/logging.py | 6 +- src/extended_data/primitives/__init__.py | 28 +-- src/extended_data/primitives/types.py | 28 +-- tests/core/test_package_surface.py | 12 ++ tests/core/test_string_data_type.py | 18 +- tests/core/test_type_utils.py | 216 +++++++++++------------ 10 files changed, 191 insertions(+), 173 deletions(-) diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 38cf3cb..a105117 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -118,13 +118,13 @@ make_hashable, reconstruct_special_type, reconstruct_special_types, - strtobool, - strtodate, - strtodatetime, - strtofloat, - strtoint, - strtopath, - strtotime, + string_to_bool, + string_to_date, + string_to_datetime, + string_to_float, + string_to_int, + string_to_path, + string_to_time, typeof, ) from extended_data.workflows import DataWorkflow, StepLike, WorkflowAction, WorkflowResult, WorkflowStep @@ -325,13 +325,13 @@ def __getattr__(name: str) -> Any: "singularize", "split_dict_by_type", "split_list_by_type", - "strtobool", - "strtodate", - "strtodatetime", - "strtofloat", - "strtoint", - "strtopath", - "strtotime", + "string_to_bool", + "string_to_date", + "string_to_datetime", + "string_to_float", + "string_to_int", + "string_to_path", + "string_to_time", "titleize", "titleize_name", "to_builtin", diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index 81b1806..573ae7e 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -25,7 +25,7 @@ truncate, upper_first_char, ) -from extended_data.primitives.types import strtobool +from extended_data.primitives.types import string_to_bool if TYPE_CHECKING: @@ -154,4 +154,4 @@ def is_url(self) -> bool: def to_bool(self, *, raise_on_error: bool = False) -> bool | None: """Return a boolean parsed from the string.""" - return strtobool(self.data, raise_on_error=raise_on_error) + return string_to_bool(self.data, raise_on_error=raise_on_error) diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 6c3e9d0..2e062d9 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -25,7 +25,13 @@ from extended_data.primitives.formats.json import decode_json from extended_data.primitives.formats.yaml import decode_yaml from extended_data.primitives.state import is_nothing -from extended_data.primitives.types import strtobool, strtodatetime, strtofloat, strtoint, strtopath +from extended_data.primitives.types import ( + string_to_bool, + string_to_datetime, + string_to_float, + string_to_int, + string_to_path, +) if TYPE_CHECKING: @@ -76,7 +82,7 @@ def __init__( env_inputs = self._filtered_environment(os.environ, env_prefix=env_prefix, strip_prefix=strip_env_prefix) current_inputs = self._merge_inputs(env_inputs, current_inputs) - if from_stdin and not strtobool(os.getenv("OVERRIDE_STDIN", "False")): + if from_stdin and not string_to_bool(os.getenv("OVERRIDE_STDIN", "False")): stdin_inputs = self._load_from_stdin() current_inputs = self._merge_inputs(stdin_inputs, current_inputs) @@ -192,35 +198,35 @@ def get_input( if is_bool and not isinstance(inp, bool): try: - inp = strtobool(str(inp), raise_on_error=True) + inp = string_to_bool(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: message = f"Input {k} cannot be converted to boolean." raise RuntimeError(message) from exc if is_integer and inp is not None and not isinstance(inp, int): try: - inp = strtoint(str(inp), raise_on_error=True) + inp = string_to_int(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: message = f"Input {k} cannot be converted to integer." raise RuntimeError(message) from exc if is_float and inp is not None and not isinstance(inp, float): try: - inp = strtofloat(str(inp), raise_on_error=True) + inp = string_to_float(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: message = f"Input {k} cannot be converted to float." raise RuntimeError(message) from exc if is_path and inp is not None: try: - inp = strtopath(str(inp), raise_on_error=True) + inp = string_to_path(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: message = f"Input {k} cannot be converted to Path." raise RuntimeError(message) from exc if is_datetime and inp is not None: try: - inp = strtodatetime(str(inp), raise_on_error=True) + inp = string_to_datetime(str(inp), raise_on_error=True) except (TypeError, ValueError) as exc: message = f"Input {k} cannot be converted to datetime." raise RuntimeError(message) from exc diff --git a/src/extended_data/io/exporters.py b/src/extended_data/io/exporters.py index c0e3e21..52880a4 100644 --- a/src/extended_data/io/exporters.py +++ b/src/extended_data/io/exporters.py @@ -20,7 +20,7 @@ is_yaml_data, ) from extended_data.primitives.serialization import normalize_data_encoding -from extended_data.primitives.types import convert_special_types, strtobool +from extended_data.primitives.types import convert_special_types, string_to_bool def wrap_raw_data_for_export( @@ -61,7 +61,7 @@ def wrap_raw_data_for_export( # Attempt to convert string-based allow_encoding to a boolean try: - allow_encoding_bool = strtobool(allow_encoding, raise_on_error=True) + allow_encoding_bool = string_to_bool(allow_encoding, raise_on_error=True) allow_encoding = allow_encoding_bool if isinstance(allow_encoding_bool, bool) else allow_encoding except ValueError as e: raise ValueError(f"Invalid allow_encoding value: {allow_encoding}") from e diff --git a/src/extended_data/logging/logging.py b/src/extended_data/logging/logging.py index 4cd5415..b628add 100644 --- a/src/extended_data/logging/logging.py +++ b/src/extended_data/logging/logging.py @@ -32,7 +32,7 @@ from extended_data import ( get_unique_signature, is_nothing, - strtobool, + string_to_bool, to_camel_case, to_kebab_case, to_pascal_case, @@ -194,10 +194,10 @@ def _setup_handlers(self, logger: logging.Logger, log_file_name: str) -> None: logger.setLevel(gunicorn_logger.level) return - if self.enable_console or strtobool(os.getenv("OVERRIDE_TO_CONSOLE", "False")): + if self.enable_console or string_to_bool(os.getenv("OVERRIDE_TO_CONSOLE", "False")): add_console_handler(logger) - if self.enable_file or strtobool(os.getenv("OVERRIDE_TO_FILE", "False")): + if self.enable_file or string_to_bool(os.getenv("OVERRIDE_TO_FILE", "False")): # Pass the log file name directly add_file_handler(logger, log_file_name) diff --git a/src/extended_data/primitives/__init__.py b/src/extended_data/primitives/__init__.py index 1e3584b..8d22168 100644 --- a/src/extended_data/primitives/__init__.py +++ b/src/extended_data/primitives/__init__.py @@ -80,13 +80,13 @@ make_hashable, reconstruct_special_type, reconstruct_special_types, - strtobool, - strtodate, - strtodatetime, - strtofloat, - strtoint, - strtopath, - strtotime, + string_to_bool, + string_to_date, + string_to_datetime, + string_to_float, + string_to_int, + string_to_path, + string_to_time, typeof, ) @@ -147,13 +147,13 @@ "singularize", "split_dict_by_type", "split_list_by_type", - "strtobool", - "strtodate", - "strtodatetime", - "strtofloat", - "strtoint", - "strtopath", - "strtotime", + "string_to_bool", + "string_to_date", + "string_to_datetime", + "string_to_float", + "string_to_int", + "string_to_path", + "string_to_time", "titleize", "titleize_name", "to_camel_case", diff --git a/src/extended_data/primitives/types.py b/src/extended_data/primitives/types.py index 34649f1..f614bd4 100644 --- a/src/extended_data/primitives/types.py +++ b/src/extended_data/primitives/types.py @@ -114,7 +114,7 @@ def __init__(self, expected_type: builtins.type[Any], value: Any): super().__init__(f"Invalid {type_str} value: {self.value!r}") -def strtobool(val: str | bool | None, raise_on_error: bool = False) -> bool | None: +def string_to_bool(val: str | bool | None, raise_on_error: bool = False) -> bool | None: """Converts a string representation of truth to boolean. Args: @@ -145,7 +145,7 @@ def strtobool(val: str | bool | None, raise_on_error: bool = False) -> bool | No return None -def strtofloat(val: str, raise_on_error: bool = False) -> float | None: +def string_to_float(val: str, raise_on_error: bool = False) -> float | None: """Converts a string representation of a float to a float. Args: @@ -172,7 +172,7 @@ def strtofloat(val: str, raise_on_error: bool = False) -> float | None: return None -def strtoint(val: str, raise_on_error: bool = False) -> int | None: +def string_to_int(val: str, raise_on_error: bool = False) -> int | None: """Converts a string representation of an integer to an int. Args: @@ -187,7 +187,7 @@ def strtoint(val: str, raise_on_error: bool = False) -> int | None: """ val = str(val) try: - float_value = strtofloat(val, raise_on_error=raise_on_error) + float_value = string_to_float(val, raise_on_error=raise_on_error) if float_value is not None: return int(float_value) except ConversionError as exc: @@ -200,7 +200,7 @@ def strtoint(val: str, raise_on_error: bool = False) -> int | None: return None -def strtopath(val: str | bytes | os.PathLike[str] | None, raise_on_error: bool = False) -> Path | None: +def string_to_path(val: str | bytes | os.PathLike[str] | None, raise_on_error: bool = False) -> Path | None: """Converts a string or byte representation of a path to a pathlib.Path object. Args: @@ -234,7 +234,7 @@ def strtopath(val: str | bytes | os.PathLike[str] | None, raise_on_error: bool = return None -def strtodate(val: str, raise_on_error: bool = False) -> datetime.date | None: +def string_to_date(val: str, raise_on_error: bool = False) -> datetime.date | None: """Converts a string representation of a date to a datetime.date object. Args: @@ -260,7 +260,7 @@ def strtodate(val: str, raise_on_error: bool = False) -> datetime.date | None: return None -def strtodatetime(val: str, raise_on_error: bool = False) -> datetime.datetime | None: +def string_to_datetime(val: str, raise_on_error: bool = False) -> datetime.datetime | None: """Converts a string representation of a datetime to a datetime.datetime object. Args: @@ -291,7 +291,7 @@ def strtodatetime(val: str, raise_on_error: bool = False) -> datetime.datetime | return None -def strtotime(val: str, raise_on_error: bool = False) -> datetime.time | None: +def string_to_time(val: str, raise_on_error: bool = False) -> datetime.time | None: """Converts a string representation of a time to a datetime.time object. Args: @@ -431,19 +431,19 @@ def reconstruct_special_type(converted_obj: str, fail_silently: bool = False) -> if converted_obj in {"None", "null"}: return None if DATETIME_PATTERN.match(converted_obj): - return strtodatetime(converted_obj) + return string_to_datetime(converted_obj) if DATE_PATTERN.match(converted_obj): - return strtodate(converted_obj) + return string_to_date(converted_obj) if TIME_PATTERN.match(converted_obj): - return strtotime(converted_obj) + return string_to_time(converted_obj) if PATH_PATTERN.match(converted_obj): return pathlib.Path(converted_obj) if TRUTHY_PATTERN.match(converted_obj) or FALSY_PATTERN.match(converted_obj): - return strtobool(converted_obj) + return string_to_bool(converted_obj) if NUMBER_PATTERN.match(converted_obj): if INTEGER_PATTERN.match(converted_obj): - return strtoint(converted_obj) - return strtofloat(converted_obj) + return string_to_int(converted_obj) + return string_to_float(converted_obj) if is_potential_yaml(converted_obj): return decode_yaml(converted_obj) diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index af0f3ad..5293408 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -85,6 +85,18 @@ def test_clean_major_version_public_names() -> None: assert not hasattr(extended_data, "removeprefix") assert not hasattr(extended_data, "removesuffix") assert not hasattr(extended_data, "bytestostr") + old_type_converters = ( + "strtobool", + "strtodate", + "strtodatetime", + "strtofloat", + "strtoint", + "strtopath", + "strtotime", + ) + for old_name in old_type_converters: + assert not hasattr(primitives, old_name) + assert not hasattr(extended_data, old_name) def test_old_monorepo_import_namespaces_are_not_preserved() -> None: diff --git a/tests/core/test_string_data_type.py b/tests/core/test_string_data_type.py index 993d79a..a3e3fbb 100644 --- a/tests/core/test_string_data_type.py +++ b/tests/core/test_string_data_type.py @@ -14,9 +14,9 @@ - `upper_first_char_data`: Provides input strings and expected results for testing uppercase conversion of the first character. - `url_data`: Provides URLs and expected validation results for testing URL checks. - `titleize_name_data`: Provides camelCase names and expected TitleCase results for testing titleization. - - `strtobool_data`: Provides strings representing truth values for testing boolean conversion. - - `strtofloat_data`: Provides strings representing floats for testing float conversion. - - `strtoint_data`: Provides strings representing integers for testing integer conversion. + - `string_to_bool_data`: Provides strings representing truth values for testing boolean conversion. + - `string_to_float_data`: Provides strings representing floats for testing float conversion. + - `string_to_int_data`: Provides strings representing integers for testing integer conversion. - `valid_path_data`: Provides valid input values and expected results for testing path conversion. - `invalid_path_data`: Provides invalid inputs and expected exceptions for testing path conversion with errors. - `silent_invalid_path_data`: Provides invalid inputs for testing path conversion when errors are silenced. @@ -29,12 +29,12 @@ - `test_upper_first_char`: Tests converting the first character of a string to uppercase. - `test_is_url`: Tests checking if a string is a valid URL. - `test_titleize_name`: Tests converting camelCase names to TitleCase. - - `test_strtobool`: Tests converting a string to a boolean value. - - `test_strtofloat`: Tests converting a string to a float value. - - `test_strtoint`: Tests converting a string to an integer value. - - `test_strtopath`: Tests converting valid inputs into pathlib.Path objects. - - `test_strtopath_invalid`: Tests handling invalid path inputs that should raise exceptions. - - `test_strtopath_invalid_silent`: Tests handling invalid path inputs when errors are silenced. + - `test_string_to_bool`: Tests converting a string to a boolean value. + - `test_string_to_float`: Tests converting a string to a float value. + - `test_string_to_int`: Tests converting a string to an integer value. + - `test_string_to_path`: Tests converting valid inputs into pathlib.Path objects. + - `test_string_to_path_invalid`: Tests handling invalid path inputs that should raise exceptions. + - `test_string_to_path_invalid_silent`: Tests handling invalid path inputs when errors are silenced. """ from __future__ import annotations diff --git a/tests/core/test_type_utils.py b/tests/core/test_type_utils.py index 408cb76..d800326 100644 --- a/tests/core/test_type_utils.py +++ b/tests/core/test_type_utils.py @@ -25,13 +25,13 @@ make_hashable, reconstruct_special_type, reconstruct_special_types, - strtobool, - strtodate, - strtodatetime, - strtofloat, - strtoint, - strtopath, - strtotime, + string_to_bool, + string_to_date, + string_to_datetime, + string_to_float, + string_to_int, + string_to_path, + string_to_time, typeof, ) @@ -44,8 +44,8 @@ @pytest.fixture(params=[("yes", True), ("no", False), ("invalid", None)]) -def strtobool_data(request: Any) -> tuple[str, bool | None]: - """Provides data for testing strtobool function. +def string_to_bool_data(request: Any) -> tuple[str, bool | None]: + """Provides data for testing string_to_bool function. Yields: tuple[str, bool | None]: A tuple containing the input string and the expected boolean or None result. @@ -54,8 +54,8 @@ def strtobool_data(request: Any) -> tuple[str, bool | None]: @pytest.fixture(params=[("3.14", EXPECTED_FLOAT_1), ("42", EXPECTED_FLOAT_2), ("invalid", None)]) -def strtofloat_data(request: Any) -> tuple[str, float | None]: - """Provides data for testing strtofloat function. +def string_to_float_data(request: Any) -> tuple[str, float | None]: + """Provides data for testing string_to_float function. Yields: tuple[str, float | None]: A tuple containing the input value and the expected float or None result. @@ -64,8 +64,8 @@ def strtofloat_data(request: Any) -> tuple[str, float | None]: @pytest.fixture(params=[("42", EXPECTED_INT_1), ("3.0", EXPECTED_INT_2), ("invalid", None)]) -def strtoint_data(request: Any) -> tuple[str, int | None]: - """Provides data for testing strtoint function. +def string_to_int_data(request: Any) -> tuple[str, int | None]: + """Provides data for testing string_to_int function. Yields: tuple[str, int | None]: A tuple containing the input value and the expected int or None result. @@ -82,7 +82,7 @@ def strtoint_data(request: Any) -> tuple[str, int | None]: ] ) def valid_path_data(request: Any) -> tuple[str | bytes | Path | None, Path | None]: - """Provides valid input and expected output pairs for testing strtopath function. + """Provides valid input and expected output pairs for testing string_to_path function. Yields: tuple[str | bytes | Path | None, Path | None]: A tuple containing the input value and the expected Path or None result. @@ -92,7 +92,7 @@ def valid_path_data(request: Any) -> tuple[str | bytes | Path | None, Path | Non @pytest.fixture(params=[("invalid:://path", ValueError, True), (b"\x80invalid", ValueError, True)]) def invalid_path_data(request: Any) -> tuple[str | bytes, type[Exception], bool]: - """Provides invalid input, expected exception type, and raise_on_error flag for testing strtopath. + """Provides invalid input, expected exception type, and raise_on_error flag for testing string_to_path. Yields: tuple[str | bytes, Type[Exception], bool]: A tuple containing the input value, expected exception type, and the raise_on_error flag. @@ -102,7 +102,7 @@ def invalid_path_data(request: Any) -> tuple[str | bytes, type[Exception], bool] @pytest.fixture(params=["invalid:://path", b"\x80invalid"]) def silent_invalid_path_data(request: Any) -> str | bytes: - """Provides invalid input values for testing strtopath when raise_on_error is False. + """Provides invalid input values for testing string_to_path when raise_on_error is False. Yields: str | bytes: The invalid input value to test. @@ -117,8 +117,8 @@ def silent_invalid_path_data(request: Any) -> str | bytes: ("invalid-date", None), ] ) -def strtodate_data(request: Any) -> tuple[str, datetime.date | None]: - """Provides data for testing strtodate function. +def string_to_date_data(request: Any) -> tuple[str, datetime.date | None]: + """Provides data for testing string_to_date function. Yields: tuple[str, datetime.date | None]: A tuple containing the input string and the expected date object or None. @@ -143,8 +143,8 @@ def strtodate_data(request: Any) -> tuple[str, datetime.date | None]: ("invalid-datetime", None), ] ) -def strtodatetime_data(request: Any) -> tuple[str, datetime.datetime | None]: - """Provides data for testing strtodatetime function. +def string_to_datetime_data(request: Any) -> tuple[str, datetime.datetime | None]: + """Provides data for testing string_to_datetime function. Yields: tuple[str, datetime.datetime | None]: A tuple containing the input string and the expected datetime object or None. @@ -160,8 +160,8 @@ def strtodatetime_data(request: Any) -> tuple[str, datetime.datetime | None]: ("invalid-time", None), ] ) -def strtotime_data(request: Any) -> tuple[str, datetime.time | None]: - """Provides data for testing strtotime function. +def string_to_time_data(request: Any) -> tuple[str, datetime.time | None]: + """Provides data for testing string_to_time function. Yields: tuple[str, datetime.time | None]: A tuple containing the input string and the expected time object or None. @@ -169,42 +169,42 @@ def strtotime_data(request: Any) -> tuple[str, datetime.time | None]: return request.param -def test_strtobool(strtobool_data: tuple[str, bool | None]) -> None: +def test_string_to_bool(string_to_bool_data: tuple[str, bool | None]) -> None: """Tests converting a string to a boolean value. Args: - strtobool_data (tuple[str, bool | None]): A fixture providing the input string and the expected boolean or None result. + string_to_bool_data (tuple[str, bool | None]): A fixture providing the input string and the expected boolean or None result. Asserts: - The result of strtobool is True for truthy strings, False for falsy strings, and raises a ConversionError for invalid strings if specified. + The result of string_to_bool is True for truthy strings, False for falsy strings, and raises a ConversionError for invalid strings if specified. """ - val, expected = strtobool_data - assert strtobool(val) == expected + val, expected = string_to_bool_data + assert string_to_bool(val) == expected if expected is None and val == "invalid": with pytest.raises(ConversionError, match=r"Invalid value: 'invalid'"): - strtobool(val, raise_on_error=True) + string_to_bool(val, raise_on_error=True) -def test_strtobool_passthrough_for_bool_and_none() -> None: +def test_string_to_bool_passthrough_for_bool_and_none() -> None: """Return boolean and None inputs unchanged.""" - assert strtobool(True) is True - assert strtobool(False) is False - assert strtobool(None) is None + assert string_to_bool(True) is True + assert string_to_bool(False) is False + assert string_to_bool(None) is None -def test_strtobool_rejects_non_strings_when_requested() -> None: +def test_string_to_bool_rejects_non_strings_when_requested() -> None: """Reject unsupported non-string inputs when raise_on_error is enabled.""" with pytest.raises(ConversionError, match=r"Invalid value: 123"): - strtobool(123, raise_on_error=True) + string_to_bool(123, raise_on_error=True) def test_string_type_converters_accept_extended_string_values() -> None: """Type conversion primitives compose with Tier 2 ExtendedString values.""" - assert strtobool(ExtendedString("true")) is True - assert strtofloat(ExtendedString("3.14")) == EXPECTED_FLOAT_1 - assert strtoint(ExtendedString("42")) == EXPECTED_INT_1 - assert strtodate(ExtendedString("2023-09-05")) == datetime.date(2023, 9, 5) - assert strtodatetime(ExtendedString("2023-09-05T12:30:00")) == datetime.datetime( + assert string_to_bool(ExtendedString("true")) is True + assert string_to_float(ExtendedString("3.14")) == EXPECTED_FLOAT_1 + assert string_to_int(ExtendedString("42")) == EXPECTED_INT_1 + assert string_to_date(ExtendedString("2023-09-05")) == datetime.date(2023, 9, 5) + assert string_to_datetime(ExtendedString("2023-09-05T12:30:00")) == datetime.datetime( 2023, 9, 5, @@ -213,215 +213,215 @@ def test_string_type_converters_accept_extended_string_values() -> None: 0, tzinfo=datetime.timezone.utc, ) - assert strtotime(ExtendedString("12:30")) == datetime.time(12, 30, 0) - assert strtopath(ExtendedString("/valid/path")) == Path("/valid/path") + assert string_to_time(ExtendedString("12:30")) == datetime.time(12, 30, 0) + assert string_to_path(ExtendedString("/valid/path")) == Path("/valid/path") -def test_strtofloat(strtofloat_data: tuple[str, float | None]) -> None: +def test_string_to_float(string_to_float_data: tuple[str, float | None]) -> None: """Tests converting a string to a float value. Args: - strtofloat_data (tuple[str, float | None]): A fixture providing the input value and the expected float or None result. + string_to_float_data (tuple[str, float | None]): A fixture providing the input value and the expected float or None result. Asserts: - The result of strtofloat matches the expected float value and raises a ConversionError for invalid strings if specified. + The result of string_to_float matches the expected float value and raises a ConversionError for invalid strings if specified. """ - val, expected = strtofloat_data - assert strtofloat(val) == expected + val, expected = string_to_float_data + assert string_to_float(val) == expected if expected is None and val == "invalid": with pytest.raises(ConversionError, match=r"Invalid value: 'invalid'"): - strtofloat(val, raise_on_error=True) + string_to_float(val, raise_on_error=True) -def test_strtofloat_wraps_float_value_errors(mocker) -> None: +def test_string_to_float_wraps_float_value_errors(mocker) -> None: """Surface float conversion failures as ConversionError when requested.""" mocker.patch("builtins.float", side_effect=ValueError("boom")) with pytest.raises(ConversionError, match=r"Invalid .* value: '3.14'"): - strtofloat("3.14", raise_on_error=True) + string_to_float("3.14", raise_on_error=True) -def test_strtofloat_swallows_float_value_errors_when_not_requested(mocker) -> None: +def test_string_to_float_swallows_float_value_errors_when_not_requested(mocker) -> None: """Return None when float conversion fails and raise_on_error is disabled.""" mocker.patch("builtins.float", side_effect=ValueError("boom")) - assert strtofloat("3.14") is None + assert string_to_float("3.14") is None -def test_strtoint(strtoint_data: tuple[str, int | None]) -> None: +def test_string_to_int(string_to_int_data: tuple[str, int | None]) -> None: """Tests converting a string to an integer value. Args: - strtoint_data (tuple[str, int | None]): A fixture providing the input value and the expected int or None result. + string_to_int_data (tuple[str, int | None]): A fixture providing the input value and the expected int or None result. Asserts: - The result of strtoint matches the expected integer value and raises a ConversionError for invalid strings if specified. + The result of string_to_int matches the expected integer value and raises a ConversionError for invalid strings if specified. """ - val, expected = strtoint_data - assert strtoint(val) == expected + val, expected = string_to_int_data + assert string_to_int(val) == expected if expected is None and val == "invalid": with pytest.raises(ConversionError, match=r"Invalid value: 'invalid'"): - strtoint(val, raise_on_error=True) + string_to_int(val, raise_on_error=True) -def test_strtoint_wraps_nested_conversion_errors(mocker) -> None: +def test_string_to_int_wraps_nested_conversion_errors(mocker) -> None: """Map nested float conversion failures to integer conversion failures.""" mocker.patch( - "extended_data.primitives.types.strtofloat", + "extended_data.primitives.types.string_to_float", side_effect=ConversionError(float, "3.14"), ) with pytest.raises(ConversionError, match=r"Invalid value: '3.14'"): - strtoint("3.14", raise_on_error=True) + string_to_int("3.14", raise_on_error=True) -def test_strtoint_swallows_nested_conversion_errors_when_not_requested(mocker) -> None: +def test_string_to_int_swallows_nested_conversion_errors_when_not_requested(mocker) -> None: """Return None when nested conversion fails and raise_on_error is disabled.""" mocker.patch( - "extended_data.primitives.types.strtofloat", + "extended_data.primitives.types.string_to_float", side_effect=ConversionError(float, "3.14"), ) - assert strtoint("3.14") is None + assert string_to_int("3.14") is None -def test_strtoint_raises_when_nested_conversion_returns_none(mocker) -> None: +def test_string_to_int_raises_when_nested_conversion_returns_none(mocker) -> None: """Raise an integer conversion error when nested conversion returns no value.""" - mocker.patch("extended_data.primitives.types.strtofloat", return_value=None) + mocker.patch("extended_data.primitives.types.string_to_float", return_value=None) with pytest.raises(ConversionError, match=r"Invalid value: '3.14'"): - strtoint("3.14", raise_on_error=True) + string_to_int("3.14", raise_on_error=True) -def test_strtopath( +def test_string_to_path( valid_path_data: tuple[str | bytes | Path | None, Path | None], ) -> None: - """Tests the strtopath function for converting valid inputs into Path objects. + """Tests the string_to_path function for converting valid inputs into Path objects. Args: valid_path_data (tuple[str | bytes | Path | None, Path | None]): A fixture providing the input value and the expected Path or None result. Asserts: - The result of strtopath matches the expected Path object or None. + The result of string_to_path matches the expected Path object or None. """ value, expected = valid_path_data - assert strtopath(value) == expected + assert string_to_path(value) == expected -def test_strtopath_invalid( +def test_string_to_path_invalid( invalid_path_data: tuple[str | bytes, type[Exception], bool], ) -> None: - """Tests the strtopath function for handling invalid inputs that should raise exceptions. + """Tests the string_to_path function for handling invalid inputs that should raise exceptions. Args: invalid_path_data (tuple[str | bytes, Type[Exception], bool]): A fixture providing the input value, expected exception type, and the raise_on_error flag. Asserts: - The strtopath function raises the expected exception with the correct error message when the raise_on_error flag is set to True. + The string_to_path function raises the expected exception with the correct error message when the raise_on_error flag is set to True. """ value, expected_exception, raise_on_error = invalid_path_data with pytest.raises(expected_exception, match=r"Invalid value"): - strtopath(value, raise_on_error=raise_on_error) + string_to_path(value, raise_on_error=raise_on_error) -def test_strtopath_invalid_silent(silent_invalid_path_data: str | bytes) -> None: - """Tests the strtopath function with invalid inputs when fail_silently is set to True. +def test_string_to_path_invalid_silent(silent_invalid_path_data: str | bytes) -> None: + """Tests the string_to_path function with invalid inputs when fail_silently is set to True. Args: silent_invalid_path_data (str | bytes): A fixture providing the invalid input value to test. Asserts: - The strtopath function returns None when the input is invalid and the raise_on_error flag is False. + The string_to_path function returns None when the input is invalid and the raise_on_error flag is False. """ - assert strtopath(silent_invalid_path_data) is None + assert string_to_path(silent_invalid_path_data) is None -def test_strtodate(strtodate_data: tuple[str, datetime.date | None]) -> None: +def test_string_to_date(string_to_date_data: tuple[str, datetime.date | None]) -> None: """Tests converting a string to a date value. Args: - strtodate_data (tuple[str, datetime.date | None]): A fixture providing the input string and the expected date object or None. + string_to_date_data (tuple[str, datetime.date | None]): A fixture providing the input string and the expected date object or None. Asserts: - The result of strtodate matches the expected date value and raises a ConversionError for invalid strings if specified. + The result of string_to_date matches the expected date value and raises a ConversionError for invalid strings if specified. """ - val, expected = strtodate_data - assert strtodate(val) == expected + val, expected = string_to_date_data + assert string_to_date(val) == expected if expected is None and val == "invalid-date": with pytest.raises( ConversionError, match=r"Invalid value: 'invalid-date'", ): - strtodate(val, raise_on_error=True) + string_to_date(val, raise_on_error=True) -def test_strtodate_invalid_matching_pattern_raises() -> None: +def test_string_to_date_invalid_matching_pattern_raises() -> None: """Reject impossible calendar dates that still match the date pattern.""" - assert strtodate("2023-13-40") is None + assert string_to_date("2023-13-40") is None with pytest.raises(ConversionError, match=r"Invalid value: '2023-13-40'"): - strtodate("2023-13-40", raise_on_error=True) + string_to_date("2023-13-40", raise_on_error=True) -def test_strtodatetime( - strtodatetime_data: tuple[str, datetime.datetime | None], +def test_string_to_datetime( + string_to_datetime_data: tuple[str, datetime.datetime | None], ) -> None: """Tests converting a string to a datetime value. Args: - strtodatetime_data (tuple[str, datetime.datetime | None]): A fixture providing the input string and the expected datetime object or None. + string_to_datetime_data (tuple[str, datetime.datetime | None]): A fixture providing the input string and the expected datetime object or None. Asserts: - The result of strtodatetime matches the expected datetime value and raises a ConversionError for invalid strings if specified. + The result of string_to_datetime matches the expected datetime value and raises a ConversionError for invalid strings if specified. """ - val, expected = strtodatetime_data - assert strtodatetime(val) == expected + val, expected = string_to_datetime_data + assert string_to_datetime(val) == expected if expected is None and val == "invalid-datetime": with pytest.raises( ConversionError, match=r"Invalid value: 'invalid-datetime'", ): - strtodatetime(val, raise_on_error=True) + string_to_datetime(val, raise_on_error=True) -def test_strtodatetime_invalid_matching_pattern_raises() -> None: +def test_string_to_datetime_invalid_matching_pattern_raises() -> None: """Reject impossible datetimes that still match the datetime pattern.""" invalid_value = "2023-13-05T25:61:00" - assert strtodatetime(invalid_value) is None + assert string_to_datetime(invalid_value) is None with pytest.raises(ConversionError, match=r"Invalid value: '2023-13-05T25:61:00'"): - strtodatetime(invalid_value, raise_on_error=True) + string_to_datetime(invalid_value, raise_on_error=True) -def test_strtodatetime_preserves_explicit_timezone() -> None: +def test_string_to_datetime_preserves_explicit_timezone() -> None: """Keep explicit timezone offsets instead of forcing UTC.""" - result = strtodatetime("2023-09-05T12:30:00+02:00") + result = string_to_datetime("2023-09-05T12:30:00+02:00") assert result == datetime.datetime(2023, 9, 5, 12, 30, 0, tzinfo=datetime.timezone(datetime.timedelta(hours=2))) -def test_strtotime(strtotime_data: tuple[str, datetime.time | None]) -> None: +def test_string_to_time(string_to_time_data: tuple[str, datetime.time | None]) -> None: """Tests converting a string to a time value. Args: - strtotime_data (tuple[str, datetime.time | None]): A fixture providing the input string and the expected time object or None. + string_to_time_data (tuple[str, datetime.time | None]): A fixture providing the input string and the expected time object or None. Asserts: - The result of strtotime matches the expected time value and raises a ConversionError for invalid strings if specified. + The result of string_to_time matches the expected time value and raises a ConversionError for invalid strings if specified. """ - val, expected = strtotime_data - assert strtotime(val) == expected + val, expected = string_to_time_data + assert string_to_time(val) == expected if expected is None and val == "invalid-time": with pytest.raises( ConversionError, match=r"Invalid value: 'invalid-time'", ): - strtotime(val, raise_on_error=True) + string_to_time(val, raise_on_error=True) -def test_strtotime_invalid_matching_pattern_raises() -> None: +def test_string_to_time_invalid_matching_pattern_raises() -> None: """Reject impossible times that still match the time pattern.""" invalid_value = "25:61:00" - assert strtotime(invalid_value) is None + assert string_to_time(invalid_value) is None with pytest.raises(ConversionError, match=r"Invalid value: '25:61:00'"): - strtotime(invalid_value, raise_on_error=True) + string_to_time(invalid_value, raise_on_error=True) # Test for get_default_value_for_type function From 4b1663d391cd48eb79be09b47e3b4b4725c6d443 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:30:13 -0500 Subject: [PATCH 132/287] docs: document explicit primitive names --- README.md | 7 +++++++ docs/package-surface.md | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/README.md b/README.md index 9b544b6..c61ec07 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,13 @@ extended_data/ workflows/ Tier 3 higher-order workflow composition ``` +Tier 1 primitive names are explicit in this major version. Use +`bytes_to_string()` for bytes-like coercion and `string_to_bool()`, +`string_to_int()`, `string_to_float()`, `string_to_path()`, +`string_to_date()`, `string_to_datetime()`, and `string_to_time()` for scalar +string conversion. The old `bytestostr` and `strto*` helper names are not +preserved. + Vendor connectors are first-class adapters in the data fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with diff --git a/docs/package-surface.md b/docs/package-surface.md index 77acf32..7e1c5d4 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -46,6 +46,13 @@ from extended_data import ( - Tier 3 processors use the first two tiers to handle files, imports, exports, inputs, API data, vendor integrations, and workflows. +Clean major-version primitive names prefer explicit Python words over inherited +helper spellings: use `bytes_to_string()` and the `string_to_*()` conversion +family (`string_to_bool()`, `string_to_int()`, `string_to_float()`, +`string_to_path()`, `string_to_date()`, `string_to_datetime()`, and +`string_to_time()`). The old `bytestostr` and `strto*` helper names are +intentionally not preserved. + Direct JSON, YAML, TOML, and HCL decode failures raise `DataDecodeError` with format and position context while preserving the parser exception as the cause; the public error message does not echo the raw payload. From 63c4ab44ccb61de96a144efadf00562c70894973 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:37:35 -0500 Subject: [PATCH 133/287] test: enforce workflow action sha pins --- tests/core/test_release_hygiene.py | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tests/core/test_release_hygiene.py diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py new file mode 100644 index 0000000..63091c2 --- /dev/null +++ b/tests/core/test_release_hygiene.py @@ -0,0 +1,35 @@ +"""Release hygiene checks for repository automation.""" + +from __future__ import annotations + +import re + +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] +WORKFLOW_ROOT = REPO_ROOT / ".github" / "workflows" +ACTION_REF_RE = re.compile(r"^\s*(?:-\s*)?uses:\s*([^#\s]+)") +PINNED_SHA_RE = re.compile(r"^[0-9a-f]{40}$") + + +def test_workflow_actions_are_pinned_to_exact_shas() -> None: + """Remote workflow actions should use immutable action commit SHAs.""" + offenders: list[str] = [] + + for path in sorted(WORKFLOW_ROOT.glob("*.yml")) + sorted(WORKFLOW_ROOT.glob("*.yaml")): + for line_number, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): + match = ACTION_REF_RE.match(line) + if match is None: + continue + + uses = match.group(1).strip() + if uses.startswith(("./", "docker://")): + continue + + _, separator, ref = uses.rpartition("@") + if not separator or PINNED_SHA_RE.fullmatch(ref) is None: + relative_path = path.relative_to(REPO_ROOT) + offenders.append(f"{relative_path}:{line_number}: {uses}") + + assert offenders == [] From 7dd1a1ee791f53e1f5638116f50bceaceafbac95 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:49:57 -0500 Subject: [PATCH 134/287] refactor: unify aws and google connector surfaces --- README.md | 9 +++- docs/package-surface.md | 10 +++- examples/connectors/basic_aws.py | 18 +++---- examples/connectors/basic_google.py | 18 +++---- src/extended_data/connectors/__init__.py | 16 ++----- src/extended_data/connectors/aws/__init__.py | 29 ++++------- src/extended_data/connectors/aws/tools.py | 32 ++++++------- .../connectors/google/__init__.py | 48 ++++++++----------- src/extended_data/connectors/google/tools.py | 24 +++++----- tests/connectors/test_aws_s3.py | 4 +- tests/connectors/test_aws_sso.py | 4 +- tests/connectors/test_aws_tools.py | 4 +- tests/connectors/test_google_cloud.py | 4 +- tests/connectors/test_google_connector.py | 9 ++-- tests/connectors/test_google_services.py | 4 +- tests/connectors/test_google_tools.py | 4 +- tests/connectors/test_google_workspace.py | 4 +- tests/core/test_package_surface.py | 15 ++++-- 18 files changed, 115 insertions(+), 141 deletions(-) diff --git a/README.md b/README.md index c61ec07..7a56b6e 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,10 @@ Vendor connectors are first-class adapters in the data fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with generic vendor lookup. +`AWSConnector` and `GoogleConnector` are unified first-class classes: S3, +Organizations, SSO, Workspace, Cloud Resource Manager, Billing, and services +operations live on those connectors directly rather than on separate +`*Full` classes. Connector data payloads are promoted into Tier 2 containers at the boundary, so decoded files, HTTP response data, GraphQL responses, and SDK-shaped maps can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. @@ -134,8 +138,9 @@ result = SecretsConnector(prefer_native=False).run_pipeline( The package is intentionally tiered: - Tier 1 functions stay stateless and composable. -- Tier 2 containers inherit Python's user container types and expose ergonomic - methods over Tier 1 functions. +- Tier 2 containers inherit `UserString`, `UserDict`, `UserList`, immutable + `tuple`, or `MutableSet`-compatible primitives and expose ergonomic methods + over Tier 1 functions. - Tier 3 processors use the first two tiers to handle files, inputs, API data, vendor integrations, and workflows. diff --git a/docs/package-surface.md b/docs/package-surface.md index 7e1c5d4..5f9885f 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -41,8 +41,9 @@ from extended_data import ( formats. - Tier 2 `extended_data.containers` classes wrap Python container primitives as `ExtendedString`, `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and - `ExtendedSet` with - ergonomic methods over Tier 1 primitives. + `ExtendedSet` with ergonomic methods over Tier 1 primitives. They use + `UserString`, `UserDict`, `UserList`, immutable `tuple`, or + `MutableSet`-compatible bases depending on the underlying data shape. - Tier 3 processors use the first two tiers to handle files, imports, exports, inputs, API data, vendor integrations, and workflows. @@ -173,6 +174,11 @@ messages. `ConnectorFabric` caches and coordinates vendor connectors while sharing input loading, logging, data normalization, retry behavior, and serialization. +`AWSConnector` and `GoogleConnector` are unified connector classes in this +major version: common S3, Organizations, SSO, Workspace, Cloud Resource +Manager, Billing, and service-discovery operations live directly on those +connectors. The old split between base connector classes and separate `*Full` +connector classes is intentionally not preserved. ## Connector Fabric diff --git a/examples/connectors/basic_aws.py b/examples/connectors/basic_aws.py index 94c1613..0f04b11 100644 --- a/examples/connectors/basic_aws.py +++ b/examples/connectors/basic_aws.py @@ -27,25 +27,19 @@ def main() -> int: return 1 try: - from extended_data.connectors import AWSConnector, AWSConnectorFull + from extended_data.connectors import AWSConnector except ImportError: print("Error: Could not import extended_data.connectors. Install with: pip install extended-data[aws]") return 1 - # Basic connector - just session management - print("Creating basic AWS connector...") - AWSConnector() - print("Basic connector created successfully.") - - # Full connector with all operations - print("\nCreating full AWS connector...") - full_connector = AWSConnectorFull() - print("Full connector created successfully.") + print("Creating AWS connector...") + connector = AWSConnector() + print("AWS connector created successfully.") # List S3 buckets print("\n--- S3 Buckets ---") try: - buckets = full_connector.list_s3_buckets() + buckets = connector.list_s3_buckets() for bucket_name, bucket in list(buckets.items())[:5]: # Show first 5 created = bucket.get("creation_date") or bucket.get("CreationDate") print(f" Bucket: {bucket_name} ({created})") @@ -57,7 +51,7 @@ def main() -> int: # List organization accounts (if using Organizations) print("\n--- Organization Accounts ---") try: - accounts = full_connector.get_accounts() + accounts = connector.get_accounts() for account_id, account in list(accounts.items())[:5]: name = account.get("name") or account.get("Name") or account_id print(f" Account: {account_id} ({name})") diff --git a/examples/connectors/basic_google.py b/examples/connectors/basic_google.py index 3bc9cc0..cb78291 100644 --- a/examples/connectors/basic_google.py +++ b/examples/connectors/basic_google.py @@ -26,25 +26,19 @@ def main() -> int: return 1 try: - from extended_data.connectors import GoogleConnector, GoogleConnectorFull + from extended_data.connectors import GoogleConnector except ImportError: print("Error: Could not import extended_data.connectors. Install with: pip install extended-data[google]") return 1 - # Basic connector - print("Creating basic Google connector...") - GoogleConnector() - print("Basic connector created successfully.") - - # Full connector with all operations - print("\nCreating full Google connector...") - full_connector = GoogleConnectorFull() - print("Full connector created successfully.") + print("Creating Google connector...") + connector = GoogleConnector() + print("Google connector created successfully.") # List projects print("\n--- Google Cloud Projects ---") try: - projects = full_connector.list_projects() + projects = connector.list_projects() for project in projects[:5]: print(f" Project: {project}") if len(projects) > 5: @@ -56,7 +50,7 @@ def main() -> int: if os.getenv("GOOGLE_DOMAIN"): print("\n--- Workspace Users ---") try: - users = full_connector.list_users() + users = connector.list_users() for user in users[:5]: email = user.get("primaryEmail", "Unknown") print(f" User: {email}") diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index cf61ea2..8adb225 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -12,13 +12,9 @@ - Zoom: User and meeting management Usage: - # Basic connector (session management + secrets) + # AWS connector with session management, secrets, Organizations, SSO, and S3 from extended_data.connectors import AWSConnector connector = AWSConnector() - - # Full connector with all operations - from extended_data.connectors.aws import AWSConnectorFull - connector = AWSConnectorFull() accounts = connector.get_accounts() # Cursor AI agents @@ -31,10 +27,10 @@ anthropic = AnthropicConnector() response = anthropic.create_message(...) - # Mixin approach for custom connectors - from extended_data.connectors.aws import AWSConnector, AWSOrganizationsMixin + # Custom connector behavior can subclass the unified connector + from extended_data.connectors.aws import AWSConnector - class MyConnector(AWSConnector, AWSOrganizationsMixin): + class MyConnector(AWSConnector): pass # Meshy AI 3D generation (functional interface) @@ -58,7 +54,6 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): from extended_data.connectors.anthropic import AnthropicConnector from extended_data.connectors.aws import ( AWSConnector, - AWSConnectorFull, AWSOrganizationsMixin, AWSS3Mixin, AWSSSOmixin, @@ -80,7 +75,6 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): GoogleCloudConnector, GoogleCloudMixin, GoogleConnector, - GoogleConnectorFull, GoogleServicesMixin, GoogleWorkspaceConnector, GoogleWorkspaceMixin, @@ -95,7 +89,6 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): __all__ = [ "AWSConnector", - "AWSConnectorFull", "AWSOrganizationsMixin", "AWSS3Mixin", "AWSSSOmixin", @@ -109,7 +102,6 @@ class MyConnector(AWSConnector, AWSOrganizationsMixin): "GoogleCloudConnector", "GoogleCloudMixin", "GoogleConnector", - "GoogleConnectorFull", "GoogleServicesMixin", "GoogleWorkspaceConnector", "GoogleWorkspaceMixin", diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index e09807d..a3d60a7 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -4,7 +4,7 @@ - organizations: AWS Organizations and Control Tower account management - sso: IAM Identity Center (SSO) operations - s3: S3 bucket and object operations -- secrets: Secrets Manager operations (in base connector) +- secrets: Secrets Manager operations - ecs: ECS cluster and service operations Usage: @@ -21,6 +21,9 @@ from extended_data import is_nothing from extended_data.connectors._optional import require_extra +from extended_data.connectors.aws.organizations import AWSOrganizationsMixin +from extended_data.connectors.aws.s3 import AWSS3Mixin +from extended_data.connectors.aws.sso import AWSSSOmixin from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data, to_builtin from extended_data.logging import Logging @@ -56,15 +59,14 @@ def _load_aws_sdk() -> Any: return boto3 -class AWSConnector(VendorConnectorBase): - """AWS connector for boto3 client and resource management. +class AWSConnector(AWSOrganizationsMixin, AWSSSOmixin, AWSS3Mixin, VendorConnectorBase): + """AWS connector for boto3 client, resource, and vendor data operations. - This is the base connector class providing: + This first-class connector provides: - Session management and role assumption - Client/resource creation with retry configuration - Secrets Manager operations - - Higher-level operations are provided via mixin classes from submodules. + - Organizations, IAM Identity Center, and S3 operations """ def __init__( @@ -604,20 +606,6 @@ def load_vendors_from_asm(prefix: str = "/vendors/") -> ExtendedDict: from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments -from extended_data.connectors.aws.organizations import AWSOrganizationsMixin -from extended_data.connectors.aws.s3 import AWSS3Mixin -from extended_data.connectors.aws.sso import AWSSSOmixin - - -class AWSConnectorFull(AWSConnector, AWSOrganizationsMixin, AWSSSOmixin, AWSS3Mixin): - """Full AWS connector with all operations. - - This class combines the base AWSConnector with all operation mixins. - Use this for full functionality, or use AWSConnector directly and - import specific mixins as needed. - """ - - from extended_data.connectors.aws.tools import ( get_crewai_tools, get_langchain_tools, @@ -629,7 +617,6 @@ class AWSConnectorFull(AWSConnector, AWSOrganizationsMixin, AWSSSOmixin, AWSS3Mi __all__ = [ # Core connector classes "AWSConnector", - "AWSConnectorFull", "AWSOrganizationsMixin", "AWSS3Mixin", "AWSSSOmixin", diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index b3d1025..9b49b42 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -90,9 +90,9 @@ def get_caller_account_id() -> ExtendedDict: Returns: Dict with account_id field. """ - from extended_data.connectors.aws import AWSConnectorFull + from extended_data.connectors.aws import AWSConnector - connector = AWSConnectorFull() + connector = AWSConnector() account_id = connector.get_caller_account_id() return extend_data({"account_id": account_id}) @@ -103,9 +103,9 @@ def list_s3_buckets() -> ExtendedList[ExtendedDict]: Returns: List of bucket info (name, creation_date, region). """ - from extended_data.connectors.aws import AWSConnectorFull + from extended_data.connectors.aws import AWSConnector - connector = AWSConnectorFull() + connector = AWSConnector() buckets = connector.list_s3_buckets() return extend_data( [ @@ -128,9 +128,9 @@ def list_s3_objects(bucket: str) -> ExtendedList[ExtendedDict]: Returns: List of object info (key, size, last_modified). """ - from extended_data.connectors.aws import AWSConnectorFull + from extended_data.connectors.aws import AWSConnector - connector = AWSConnectorFull() + connector = AWSConnector() objects_raw: Any = connector.list_objects(bucket) if isinstance(objects_raw, Mapping): objects = [{"key": key, **data} for key, data in objects_raw.items()] @@ -157,9 +157,9 @@ def list_accounts() -> ExtendedList[ExtendedDict]: Returns: List of account info (id, name, email, status). """ - from extended_data.connectors.aws import AWSConnectorFull + from extended_data.connectors.aws import AWSConnector - connector = AWSConnectorFull() + connector = AWSConnector() accounts = connector.get_accounts() return extend_data( [ @@ -180,9 +180,9 @@ def list_sso_users() -> ExtendedList[ExtendedDict]: Returns: List of user info (user_id, user_name, display_name, email). """ - from extended_data.connectors.aws import AWSConnectorFull + from extended_data.connectors.aws import AWSConnector - connector = AWSConnectorFull() + connector = AWSConnector() users = connector.list_sso_users() return extend_data( [ @@ -203,9 +203,9 @@ def list_sso_groups() -> ExtendedList[ExtendedDict]: Returns: List of group info (group_id, display_name, member_count). """ - from extended_data.connectors.aws import AWSConnectorFull + from extended_data.connectors.aws import AWSConnector - connector = AWSConnectorFull() + connector = AWSConnector() groups = connector.list_sso_groups() return extend_data( [ @@ -232,9 +232,9 @@ def list_secrets( Returns: List of secret info (name, arn, value). """ - from extended_data.connectors.aws import AWSConnectorFull + from extended_data.connectors.aws import AWSConnector - connector = AWSConnectorFull() + connector = AWSConnector() # Align with tests: only pass arguments that match test expectations kwargs: dict[str, Any] = {} if prefix: @@ -264,9 +264,9 @@ def get_secret(secret_id: str) -> ExtendedDict: Returns: Dict with secret_name, secret_value, and status. """ - from extended_data.connectors.aws import AWSConnectorFull + from extended_data.connectors.aws import AWSConnector - connector = AWSConnectorFull() + connector = AWSConnector() value = connector.get_secret(secret_id) return extend_data( { diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index c341dab..e31f992 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -9,6 +9,10 @@ from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.google.billing import GoogleBillingMixin +from extended_data.connectors.google.cloud import GoogleCloudMixin +from extended_data.connectors.google.services import GoogleServicesMixin +from extended_data.connectors.google.workspace import GoogleWorkspaceMixin from extended_data.containers import ExtendedDict, ExtendedList from extended_data.logging import Logging @@ -47,15 +51,20 @@ def _load_google_sdk() -> None: ] -class GoogleConnector(VendorConnectorBase): - """Google Cloud and Workspace base connector. +class GoogleConnector( + GoogleWorkspaceMixin, + GoogleCloudMixin, + GoogleBillingMixin, + GoogleServicesMixin, + VendorConnectorBase, +): + """Google Cloud and Workspace connector. - This is the base connector class providing: + This first-class connector provides: - Authentication via service account - Service client creation and caching - Subject impersonation for domain-wide delegation - - Higher-level operations are provided via mixin classes from submodules. + - Workspace, Cloud Resource Manager, Billing, and service discovery operations """ def __init__( @@ -553,9 +562,6 @@ def list_groups( return self.extend_result(filtered_groups) -# Import submodule operations -from extended_data.connectors.google.billing import GoogleBillingMixin -from extended_data.connectors.google.cloud import GoogleCloudMixin from extended_data.connectors.google.constants import ( DEFAULT_DOMAIN, DEFAULT_USER_OUS, @@ -572,37 +578,24 @@ def list_groups( SessionState, Source, ) -from extended_data.connectors.google.services import GoogleServicesMixin from extended_data.connectors.google.tools import ( get_crewai_tools, get_langchain_tools, get_strands_tools, get_tools, ) -from extended_data.connectors.google.workspace import GoogleWorkspaceMixin - - -class GoogleConnectorFull( - GoogleConnector, GoogleWorkspaceMixin, GoogleCloudMixin, GoogleBillingMixin, GoogleServicesMixin -): - """Full Google connector with all operations. - - This class combines the base GoogleConnector with all operation mixins. - Use this for full functionality, or use GoogleConnector directly and - import specific mixins as needed. - """ -class GoogleCloudConnector(GoogleConnector, GoogleCloudMixin): - """Google connector focused on Cloud Resource Manager and IAM operations.""" +class GoogleCloudConnector(GoogleConnector): + """Google connector entry point for Cloud Resource Manager and IAM workflows.""" -class GoogleWorkspaceConnector(GoogleConnector, GoogleWorkspaceMixin): - """Google connector focused on Admin Directory user and group operations.""" +class GoogleWorkspaceConnector(GoogleConnector): + """Google connector entry point for Admin Directory user and group workflows.""" -class GoogleBillingConnector(GoogleConnector, GoogleBillingMixin): - """Google connector focused on Cloud Billing account and project billing operations.""" +class GoogleBillingConnector(GoogleConnector): + """Google connector entry point for Cloud Billing account and project billing workflows.""" __all__ = [ @@ -619,7 +612,6 @@ class GoogleBillingConnector(GoogleConnector, GoogleBillingMixin): "GoogleCloudConnector", "GoogleCloudMixin", "GoogleConnector", - "GoogleConnectorFull", "GoogleServicesMixin", "GoogleWorkspaceConnector", "GoogleWorkspaceMixin", diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index 464f141..6b507b1 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -100,9 +100,9 @@ def list_projects( Returns: List of project info (project_id, name, state, parent). """ - from extended_data.connectors.google import GoogleConnectorFull + from extended_data.connectors.google import GoogleConnector - connector = GoogleConnectorFull() + connector = GoogleConnector() projects = connector.list_projects(parent=parent or None) # Limit results and extract key fields @@ -133,9 +133,9 @@ def list_folders( Returns: List of folder info (name, display_name, state, parent). """ - from extended_data.connectors.google import GoogleConnectorFull + from extended_data.connectors.google import GoogleConnector - connector = GoogleConnectorFull() + connector = GoogleConnector() folders = connector.list_folders(parent=parent) # Limit results and extract key fields @@ -166,9 +166,9 @@ def list_enabled_services( Returns: List of service info (name, title, state). """ - from extended_data.connectors.google import GoogleConnectorFull + from extended_data.connectors.google import GoogleConnector - connector = GoogleConnectorFull() + connector = GoogleConnector() services = connector.list_enabled_services(project_id=project_id) # Limit results and extract key fields @@ -196,9 +196,9 @@ def list_billing_accounts( Returns: List of billing account info (name, display_name, open, master_billing_account). """ - from extended_data.connectors.google import GoogleConnectorFull + from extended_data.connectors.google import GoogleConnector - connector = GoogleConnectorFull() + connector = GoogleConnector() accounts = connector.list_billing_accounts() # Limit results and extract key fields @@ -229,9 +229,9 @@ def list_workspace_users( Returns: List of user info (email, name, full_name, suspended, org_unit_path). """ - from extended_data.connectors.google import GoogleConnectorFull + from extended_data.connectors.google import GoogleConnector - connector = GoogleConnectorFull() + connector = GoogleConnector() users_raw: Any = connector.list_users( domain=domain or None, flatten_names=True, @@ -271,9 +271,9 @@ def list_workspace_groups( Returns: List of group info (email, name, description, direct_members_count). """ - from extended_data.connectors.google import GoogleConnectorFull + from extended_data.connectors.google import GoogleConnector - connector = GoogleConnectorFull() + connector = GoogleConnector() groups_raw: Any = connector.list_groups( domain=domain or None, key_by_email=False, diff --git a/tests/connectors/test_aws_s3.py b/tests/connectors/test_aws_s3.py index de79fa6..2ad0c2f 100644 --- a/tests/connectors/test_aws_s3.py +++ b/tests/connectors/test_aws_s3.py @@ -16,14 +16,14 @@ from botocore.exceptions import ClientError from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data -from extended_data.connectors.aws import AWSConnectorFull +from extended_data.connectors.aws import AWSConnector @pytest.fixture def aws_connector(): """Create AWS connector with mocked clients.""" with patch("extended_data.connectors.aws.boto3"): - connector = AWSConnectorFull() + connector = AWSConnector() connector.logger = MagicMock() return connector diff --git a/tests/connectors/test_aws_sso.py b/tests/connectors/test_aws_sso.py index bfd59dc..4077c0c 100644 --- a/tests/connectors/test_aws_sso.py +++ b/tests/connectors/test_aws_sso.py @@ -13,14 +13,14 @@ from botocore.exceptions import ClientError from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString -from extended_data.connectors.aws import AWSConnectorFull +from extended_data.connectors.aws import AWSConnector @pytest.fixture def aws_connector(): """Create AWS connector with mocked clients.""" with patch("extended_data.connectors.aws.boto3"): - connector = AWSConnectorFull() + connector = AWSConnector() connector.logger = MagicMock() return connector diff --git a/tests/connectors/test_aws_tools.py b/tests/connectors/test_aws_tools.py index ca77f1d..012dda4 100644 --- a/tests/connectors/test_aws_tools.py +++ b/tests/connectors/test_aws_tools.py @@ -11,8 +11,8 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data -# Patch target for AWSConnectorFull - must patch where it's imported -AWS_CONNECTOR_PATCH = "extended_data.connectors.aws.AWSConnectorFull" +# Patch where the tool functions instantiate the first-class connector. +AWS_CONNECTOR_PATCH = "extended_data.connectors.aws.AWSConnector" def test_aws_connector_requires_boto3_when_constructed_without_extra() -> None: diff --git a/tests/connectors/test_google_cloud.py b/tests/connectors/test_google_cloud.py index efecdc5..0650e61 100644 --- a/tests/connectors/test_google_cloud.py +++ b/tests/connectors/test_google_cloud.py @@ -11,7 +11,7 @@ pytest.importorskip("googleapiclient") from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data -from extended_data.connectors.google import GoogleConnectorFull +from extended_data.connectors.google import GoogleConnector @pytest.fixture @@ -25,7 +25,7 @@ def google_connector(): "project_id": "test-project", } with patch("googleapiclient.discovery.build"): - connector = GoogleConnectorFull(service_account_info=service_account) + connector = GoogleConnector(service_account_info=service_account) connector.logger = MagicMock() return connector diff --git a/tests/connectors/test_google_connector.py b/tests/connectors/test_google_connector.py index ddc7078..ded1215 100644 --- a/tests/connectors/test_google_connector.py +++ b/tests/connectors/test_google_connector.py @@ -15,7 +15,6 @@ GoogleBillingConnector, GoogleCloudConnector, GoogleConnector, - GoogleConnectorFull, GoogleWorkspaceConnector, ) @@ -248,12 +247,12 @@ def test_specialized_connector_exports_match_available_operations(self, base_con cloud = GoogleCloudConnector(service_account_info=service_account, **base_connector_kwargs) workspace = GoogleWorkspaceConnector(service_account_info=service_account, **base_connector_kwargs) billing = GoogleBillingConnector(service_account_info=service_account, **base_connector_kwargs) - full = GoogleConnectorFull(service_account_info=service_account, **base_connector_kwargs) + connector = GoogleConnector(service_account_info=service_account, **base_connector_kwargs) assert hasattr(cloud, "list_projects") assert hasattr(workspace, "list_users") assert hasattr(billing, "list_billing_accounts") - assert hasattr(full, "list_projects") - assert hasattr(full, "list_users") - assert hasattr(full, "list_billing_accounts") + assert hasattr(connector, "list_projects") + assert hasattr(connector, "list_users") + assert hasattr(connector, "list_billing_accounts") diff --git a/tests/connectors/test_google_services.py b/tests/connectors/test_google_services.py index e668925..1c72685 100644 --- a/tests/connectors/test_google_services.py +++ b/tests/connectors/test_google_services.py @@ -11,7 +11,7 @@ pytest.importorskip("googleapiclient") from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data -from extended_data.connectors.google import GoogleConnectorFull +from extended_data.connectors.google import GoogleConnector @pytest.fixture @@ -25,7 +25,7 @@ def google_connector(): "project_id": "test-project", } with patch("googleapiclient.discovery.build"): - connector = GoogleConnectorFull(service_account_info=service_account) + connector = GoogleConnector(service_account_info=service_account) connector.logger = MagicMock() return connector diff --git a/tests/connectors/test_google_tools.py b/tests/connectors/test_google_tools.py index 7057ea2..f9b57ac 100644 --- a/tests/connectors/test_google_tools.py +++ b/tests/connectors/test_google_tools.py @@ -11,8 +11,8 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data -# Patch target for GoogleConnectorFull - must patch where it's imported -GOOGLE_CONNECTOR_PATCH = "extended_data.connectors.google.GoogleConnectorFull" +# Patch where the tool functions instantiate the first-class connector. +GOOGLE_CONNECTOR_PATCH = "extended_data.connectors.google.GoogleConnector" def test_google_connector_requires_google_sdk_when_constructed_without_extra() -> None: diff --git a/tests/connectors/test_google_workspace.py b/tests/connectors/test_google_workspace.py index 3aacfec..7938c3e 100644 --- a/tests/connectors/test_google_workspace.py +++ b/tests/connectors/test_google_workspace.py @@ -11,7 +11,7 @@ pytest.importorskip("googleapiclient") from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data -from extended_data.connectors.google import GoogleConnectorFull +from extended_data.connectors.google import GoogleConnector @pytest.fixture @@ -25,7 +25,7 @@ def google_connector(): "project_id": "test-project", } with patch("googleapiclient.discovery.build"): - connector = GoogleConnectorFull(service_account_info=service_account) + connector = GoogleConnector(service_account_info=service_account) connector.logger = MagicMock() return connector diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 5293408..1cf944a 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -79,6 +79,8 @@ def test_clean_major_version_public_names() -> None: assert connectors.ConnectorFabric is ConnectorFabric assert not hasattr(inputs, "DirectedInputsClass") assert not hasattr(connectors, "VendorConnectors") + assert not hasattr(connectors, "AWSConnectorFull") + assert not hasattr(connectors, "GoogleConnectorFull") assert not hasattr(primitives, "removeprefix") assert not hasattr(primitives, "removesuffix") assert not hasattr(primitives, "bytestostr") @@ -154,11 +156,14 @@ def test_package_root_exports_builtin_connector_classes() -> None: assert root_value is connector_value -def test_aws_full_connector_keeps_operation_mixins_without_aws_extra() -> None: - """AWSConnectorFull should expose real operation mixins even before boto3 is installed.""" - assert callable(connectors.AWSConnectorFull.list_s3_buckets) - assert callable(connectors.AWSConnectorFull.get_organization_accounts) - assert callable(connectors.AWSConnectorFull.list_sso_users) +def test_first_class_connectors_keep_operation_mixins_without_optional_extras() -> None: + """Unified connector classes should expose real operation mixins before SDK extras are installed.""" + assert callable(connectors.AWSConnector.list_s3_buckets) + assert callable(connectors.AWSConnector.get_organization_accounts) + assert callable(connectors.AWSConnector.list_sso_users) + assert callable(connectors.GoogleConnector.list_projects) + assert callable(connectors.GoogleConnector.list_users) + assert callable(connectors.GoogleConnector.list_billing_accounts) def test_clean_major_version_does_not_preserve_duplicate_tool_modules() -> None: From 6d24bef66bf6454fa16e6469c4356d6539d57a2d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 10:55:57 -0500 Subject: [PATCH 135/287] docs: remove old project origin references --- .../connectors/aws/codedeploy.py | 5 ++-- .../connectors/aws/organizations.py | 10 +++---- src/extended_data/connectors/connectors.py | 6 ++-- .../connectors/google/constants.py | 6 ++-- .../connectors/vault/__init__.py | 2 +- tests/core/test_release_hygiene.py | 29 +++++++++++++++++++ 6 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/extended_data/connectors/aws/codedeploy.py b/src/extended_data/connectors/aws/codedeploy.py index 3e11583..4d78024 100644 --- a/src/extended_data/connectors/aws/codedeploy.py +++ b/src/extended_data/connectors/aws/codedeploy.py @@ -1,8 +1,7 @@ """AWS CodeDeploy helpers for extended-data. -This module centralizes the CodeDeploy helper functions that previously -lived inside terraform-modules so Terraform stacks and standalone Python -workloads can rely on the same implementation. +This module centralizes CodeDeploy helper functions so infrastructure stacks +and standalone Python workloads can rely on the same implementation. """ from __future__ import annotations diff --git a/src/extended_data/connectors/aws/organizations.py b/src/extended_data/connectors/aws/organizations.py index e8d3aad..1f66868 100644 --- a/src/extended_data/connectors/aws/organizations.py +++ b/src/extended_data/connectors/aws/organizations.py @@ -553,7 +553,7 @@ def classify_accounts( return self.extend_result(account_map) # --------------------------------------------------------------------- # - # Terraform-migrated helpers # + # Account labeling and organization preprocessing helpers # # --------------------------------------------------------------------- # def label_aws_accounts( @@ -565,8 +565,6 @@ def label_aws_accounts( ) -> ExtendedDict: """Return normalized metadata for every AWS account. - This mirrors the historical ``label_aws_account`` helper from terraform-modules. - Args: domains: Mapping of environment -> root domain. aws_organization_units: Optional precomputed OU metadata (with tags). @@ -658,7 +656,7 @@ def classify_aws_accounts( caller_account_id: str | None = None, execution_role_arn: str | None = None, ) -> ExtendedDict: - """Group accounts by classification, matching terraform-modules output.""" + """Group accounts by classification for infrastructure data consumers.""" if labeled_accounts is None: if not domains: msg = "domains mapping required when labeled_accounts is not provided" @@ -744,9 +742,9 @@ def preprocess_organization( include_classification: bool = True, execution_role_arn: str | None = None, ) -> ExtendedDict: - """Preprocess AWS Organization data for terraform consumption. + """Preprocess AWS Organization data for infrastructure workflows. - Returns a structured dict suitable for terraform data sources. + Returns a structured dictionary suitable for downstream data sources. Args: include_tags: Include account tags. Defaults to True. diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 831af14..934b7e7 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -1,4 +1,4 @@ -"""ConnectorFabric - Public API with caching like TerraformDataSource.""" +"""ConnectorFabric - cached vendor connector access for Extended Data.""" from __future__ import annotations @@ -47,8 +47,8 @@ class ConnectorFabric(InputProvider): """Public API for extended data connectors with client caching. - This class provides cached access to all extended data connectors, similar to - how TerraformDataSource works in terraform-modules libraries. + This class provides cached access to registered vendor connectors while + sharing input snapshots, lifecycle logging, and data normalization. Usage: vc = ConnectorFabric() diff --git a/src/extended_data/connectors/google/constants.py b/src/extended_data/connectors/google/constants.py index c92d70f..d724f4d 100644 --- a/src/extended_data/connectors/google/constants.py +++ b/src/extended_data/connectors/google/constants.py @@ -1,4 +1,4 @@ -"""Google connector constants for terraform-modules settings. +"""Google connector constants for cloud and Workspace workflows. These constants provide default configurations for Google Cloud and Workspace operations. Override these values with environment-specific configuration. @@ -12,7 +12,7 @@ # Default domain - MUST be overridden via environment variable DEFAULT_DOMAIN = os.getenv("GOOGLE_WORKSPACE_DOMAIN", "example.com") -# Full OAuth scopes matching terraform-modules for maximum compatibility +# Broad OAuth scopes for mixed Workspace, Cloud, billing, and service discovery workflows. DEFAULT_SCOPES = [ "https://mail.google.com/", "https://www.googleapis.com/auth/apps.alerts", @@ -56,7 +56,7 @@ }, } -# KMS configuration for terraform secrets +# KMS configuration defaults for infrastructure secret material. GCP_KMS = { "keyring_name": "terraform-secrets", "key_name": "terraform-key", diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index f1fe993..7f36945 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -399,7 +399,7 @@ def write_secret( return False # --------------------------------------------------------------------- - # Vault AWS IAM helpers (migrated from terraform-modules) + # Vault AWS IAM helpers # --------------------------------------------------------------------- def list_aws_iam_roles( diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 63091c2..75e3ef1 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -11,6 +11,13 @@ WORKFLOW_ROOT = REPO_ROOT / ".github" / "workflows" ACTION_REF_RE = re.compile(r"^\s*(?:-\s*)?uses:\s*([^#\s]+)") PINNED_SHA_RE = re.compile(r"^[0-9a-f]{40}$") +PUBLIC_TEXT_ROOTS = ( + REPO_ROOT / "src", + REPO_ROOT / "docs", + REPO_ROOT / "examples", + REPO_ROOT / "README.md", +) +OLD_PROJECT_TERMS = ("terraform-modules", "TerraformDataSource") def test_workflow_actions_are_pinned_to_exact_shas() -> None: @@ -33,3 +40,25 @@ def test_workflow_actions_are_pinned_to_exact_shas() -> None: offenders.append(f"{relative_path}:{line_number}: {uses}") assert offenders == [] + + +def test_public_text_does_not_reference_old_project_origins() -> None: + """Public code/docs should describe current Extended Data surfaces, not origin packages.""" + offenders: list[str] = [] + + paths: list[Path] = [] + for root in PUBLIC_TEXT_ROOTS: + if root.is_file(): + paths.append(root) + else: + paths.extend(path for path in root.rglob("*") if path.is_file()) + + for path in sorted(paths): + if path.suffix in {".pyc", ".png"}: + continue + text = path.read_text(encoding="utf-8") + for term in OLD_PROJECT_TERMS: + if term in text: + offenders.append(f"{path.relative_to(REPO_ROOT)}: {term}") + + assert offenders == [] From 56ae5edcc36b44e2b33992484b2abadd257db71e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:00:53 -0500 Subject: [PATCH 136/287] refactor: return connector names as data --- README.md | 5 +++++ docs/package-surface.md | 2 ++ src/extended_data/connectors/connectors.py | 4 ++-- src/extended_data/connectors/mcp.py | 4 ++-- src/extended_data/connectors/registry.py | 15 ++++++++++----- .../test_connector_payload_contracts.py | 2 -- tests/connectors/test_connectors.py | 6 +++++- tests/core/test_package_surface.py | 5 +++++ 8 files changed, 31 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 7a56b6e..2da8b50 100644 --- a/README.md +++ b/README.md @@ -70,10 +70,15 @@ requested without its optional extra installed, the registry raises an Inspect connector availability before wiring vendor workflows: ```python +names = connectors.list_connectors() catalog = connectors.list_connector_info() github_info = connectors.get_connector_info("github") ``` +`list_connectors()` returns an `ExtendedList` of available connector names. +Use `list_connector_info()` when a workflow needs availability, extra, install, +class, module, and description metadata. + The same catalog is available from the CLI: ```bash diff --git a/docs/package-surface.md b/docs/package-surface.md index 5f9885f..28497a3 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -255,10 +255,12 @@ Use the catalog helpers when a workflow needs to inspect which integrations can run in the current environment: ```python +names = fabric.list_connectors() catalog = fabric.list_connector_info() github_info = fabric.get_connector_info("github") ``` +`list_connectors()` returns an `ExtendedList` of available connector names. Each catalog entry includes availability, source, extra name, install command, required packages, missing packages, module, class, and description fields. The installed CLI exposes the same discovery layer for shell automation: diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 934b7e7..3e6f040 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -91,8 +91,8 @@ def _set_cached_client(self, client_type: str, client: Any, **kwargs: Any) -> No cache_key = self._get_cache_key(**kwargs) self._client_cache[client_type][cache_key] = client - def list_connectors(self) -> dict[str, Any]: - """List connector classes available in the current environment.""" + def list_connectors(self) -> ExtendedList[Any]: + """List connector names available in the current environment.""" return list_registered_connectors() def list_connector_info(self, *, include_unavailable: bool = True) -> ExtendedList[ExtendedDict]: diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 9552b49..37c8d3e 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -28,7 +28,7 @@ from collections.abc import Callable, Iterable, Mapping from typing import Any, cast -from extended_data.connectors.registry import get_connector, list_connectors +from extended_data.connectors.registry import _list_connector_classes, get_connector from extended_data.connectors.surface import connector_data_methods from extended_data.containers import to_builtin @@ -124,7 +124,7 @@ def create_server() -> Any: tools: dict[str, dict[str, Any]] = {} # Discover all connectors - connectors = list_connectors() + connectors = _list_connector_classes() for connector_name, connector_class in connectors.items(): # Get public methods diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 30da5b5..e02540c 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -13,7 +13,7 @@ # List available connectors available = list_connectors() - # {'jules': , 'cursor': , ...} + # ExtendedList(["anthropic", "aws", "cursor", ...]) # Get a specific connector instance connector = get_connector('jules', api_key='...') @@ -183,13 +183,18 @@ def _raise_unregistered_builtin_connector(name: str) -> NoReturn: ) -def list_connectors() -> dict[str, builtins.type[VendorConnectorBase]]: - """List all available connectors. +def _list_connector_classes() -> dict[str, builtins.type[VendorConnectorBase]]: + """List available connector classes for internal tool registration.""" + return _discover_connectors().copy() + + +def list_connectors() -> ExtendedList[Any]: + """List available connector names. Returns: - Dict mapping connector name to connector class. + ExtendedList of connector registry names. """ - return _discover_connectors().copy() + return extend_data(sorted(_discover_connectors())) def get_connector_class(name: str) -> builtins.type[VendorConnectorBase]: diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index e8fcc38..dc805cf 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -199,8 +199,6 @@ RAW_CONNECTOR_BOUNDARIES = { ("src/extended_data/connectors/ai_tools.py", "build_langchain_tools"), ("src/extended_data/connectors/base.py", "VendorConnectorBase.get_tools"), - ("src/extended_data/connectors/connectors.py", "ConnectorFabric.list_connectors"), - ("src/extended_data/connectors/registry.py", "list_connectors"), ("src/extended_data/connectors/surface.py", "connector_data_methods"), ("src/extended_data/connectors/zoom/__init__.py", "ZoomConnector.get_headers"), } diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 3456c64..bfcff82 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -145,9 +145,13 @@ def test_connector_fabric_exposes_catalog_info(self): assert "cursor" in names assert "github" in names github_info = vc.get_connector_info(" github ") + connector_names = vc.list_connectors() assert isinstance(github_info, ExtendedDict) assert github_info["name"] == "github" - assert isinstance(vc.list_connectors(), dict) + assert isinstance(connector_names, ExtendedList) + assert isinstance(connector_names[0], ExtendedString) + assert "github" in connector_names + assert "cursor" in connector_names @requires_boto3 @patch("extended_data.connectors.aws.AWSConnector") diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 1cf944a..4ca0d33 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -12,6 +12,7 @@ from extended_data import connectors, containers, inputs, io, primitives, secrets, workflows from extended_data.connectors.connectors import ConnectorFabric from extended_data.connectors.registry import BUILTIN_CONNECTORS +from extended_data.containers import ExtendedList, ExtendedString from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -136,6 +137,10 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert callable(extended_data.read_data_file) assert callable(extended_data.get_connector) assert callable(extended_data.list_connector_info) + connector_names = extended_data.list_connectors() + assert isinstance(connector_names, ExtendedList) + assert isinstance(connector_names[0], ExtendedString) + assert "github" in connector_names def test_connectors_root_exports_builtin_connector_classes() -> None: From 64aa7afa66b5cb474d0cf9a7a01fdfffa1e94800 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:05:03 -0500 Subject: [PATCH 137/287] test: enforce clean major-version breakage --- README.md | 3 ++- docs/package-surface.md | 16 +++++++++------- tests/core/test_release_hygiene.py | 21 +++++++++++++++++++++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2da8b50..34e5b77 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,8 @@ Tier 1 primitive names are explicit in this major version. Use `string_to_int()`, `string_to_float()`, `string_to_path()`, `string_to_date()`, `string_to_datetime()`, and `string_to_time()` for scalar string conversion. The old `bytestostr` and `strto*` helper names are not -preserved. +preserved. Old package import namespaces are not shimmed; missing imports are +intentional so remaining migration work fails fast. Vendor connectors are first-class adapters in the data fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging diff --git a/docs/package-surface.md b/docs/package-surface.md index 28497a3..7429d72 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -290,10 +290,12 @@ All built-in CrewAI tool adapters use incompatible CrewAI installs fail with the same user-managed install guidance. Optional dependency checks live in `extended_data.connectors._optional`; there -are no old package compatibility shims in the public API. When a known built-in -connector is requested without its optional extra installed, the registry raises -an `ImportError` with the exact `extended-data[...]` install target instead of -reporting the connector as unknown. Built-in connectors must also be registered -through the `extended_data.connectors` entry point group; missing entry-point -registration is treated as a package configuration error instead of being -patched over by direct source imports. +are no old package compatibility shims in the public API. Missing old imports +are intentional in this major version so unfinished migration work stays +visible. When a known built-in connector is requested without its optional extra +installed, the registry raises an `ImportError` with the exact +`extended-data[...]` install target instead of reporting the connector as +unknown. Built-in connectors must also be registered through the +`extended_data.connectors` entry point group; missing entry-point registration is +treated as a package configuration error instead of being patched over by direct +source imports. diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 75e3ef1..e812448 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -18,6 +18,12 @@ REPO_ROOT / "README.md", ) OLD_PROJECT_TERMS = ("terraform-modules", "TerraformDataSource") +OLD_PACKAGE_NAMESPACES = ( + "directed_inputs_class", + "extended_data_types", + "lifecyclelogging", + "vendor_connectors", +) def test_workflow_actions_are_pinned_to_exact_shas() -> None: @@ -62,3 +68,18 @@ def test_public_text_does_not_reference_old_project_origins() -> None: offenders.append(f"{path.relative_to(REPO_ROOT)}: {term}") assert offenders == [] + + +def test_old_package_namespace_shims_do_not_exist() -> None: + """Clean major-version breaks should not grow old import namespace shims.""" + offenders: list[str] = [] + + for namespace in OLD_PACKAGE_NAMESPACES: + package_path = REPO_ROOT / "src" / namespace + module_path = REPO_ROOT / "src" / f"{namespace}.py" + if package_path.exists(): + offenders.append(str(package_path.relative_to(REPO_ROOT))) + if module_path.exists(): + offenders.append(str(module_path.relative_to(REPO_ROOT))) + + assert offenders == [] From 21bf5f30bc516223ddb41d058b12f59aa8c6adf5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:11:22 -0500 Subject: [PATCH 138/287] refactor: remove functions tool framework alias --- .../connectors/anthropic/tools.py | 2 +- src/extended_data/connectors/aws/tools.py | 2 +- src/extended_data/connectors/cursor/tools.py | 2 +- src/extended_data/connectors/github/tools.py | 2 +- src/extended_data/connectors/google/tools.py | 5 ++-- src/extended_data/connectors/meshy/tools.py | 7 ++--- src/extended_data/connectors/secrets/tools.py | 4 +-- src/extended_data/connectors/slack/tools.py | 5 ++-- src/extended_data/connectors/vault/tools.py | 2 +- src/extended_data/connectors/zoom/tools.py | 2 +- tests/connectors/meshy/test_tools.py | 11 +++++-- tests/connectors/test_github_connector.py | 4 +-- tests/connectors/test_github_tools.py | 10 +++---- tests/connectors/test_google_tools.py | 9 +++--- tests/connectors/test_tool_frameworks.py | 30 +++++++++++++++++++ tests/connectors/test_vault_tools.py | 10 +++---- tests/connectors/test_zoom_tools.py | 9 +++--- 17 files changed, 72 insertions(+), 44 deletions(-) create mode 100644 tests/connectors/test_tool_frameworks.py diff --git a/src/extended_data/connectors/anthropic/tools.py b/src/extended_data/connectors/anthropic/tools.py index 3a6be1c..436969e 100644 --- a/src/extended_data/connectors/anthropic/tools.py +++ b/src/extended_data/connectors/anthropic/tools.py @@ -151,7 +151,7 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() raise ValueError(f"Unknown framework: {framework}") diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index 9b49b42..5c7e9ee 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -383,7 +383,7 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() raise ValueError(f"Unknown framework: {framework}") diff --git a/src/extended_data/connectors/cursor/tools.py b/src/extended_data/connectors/cursor/tools.py index f55f203..232477c 100644 --- a/src/extended_data/connectors/cursor/tools.py +++ b/src/extended_data/connectors/cursor/tools.py @@ -154,7 +154,7 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() raise ValueError(f"Unknown framework: {framework}") diff --git a/src/extended_data/connectors/github/tools.py b/src/extended_data/connectors/github/tools.py index 20a15e4..01f8c15 100644 --- a/src/extended_data/connectors/github/tools.py +++ b/src/extended_data/connectors/github/tools.py @@ -351,7 +351,7 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() raise ValueError(f"Unknown framework: {framework}") diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index 6b507b1..5319444 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -403,7 +403,6 @@ def get_tools(framework: str = "auto") -> list[Any]: - "langchain": Force LangChain StructuredTools - "crewai": Force CrewAI tools - "strands": Force plain functions for Strands - - "functions": Force plain functions (alias for strands) Returns: List of tools in the appropriate format for the framework. @@ -425,10 +424,10 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands, functions") + raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands") # ============================================================================= diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index 70a3509..2a92f5f 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -601,7 +601,6 @@ def get_tools(framework: str = "auto") -> list[Any]: - "langchain": Force LangChain StructuredTools - "crewai": Force CrewAI tools - "strands": Force plain functions for Strands - - "functions": Force plain functions (alias for strands) Returns: List of tools in the appropriate format for the framework. @@ -621,7 +620,7 @@ def get_tools(framework: str = "auto") -> list[Any]: from extended_data.connectors._optional import is_available if framework == "auto": - # Priority: CrewAI > LangChain > Strands/functions + # Priority: CrewAI > LangChain > Strands # (CrewAI first since it's more opinionated about tool format) if is_available("crewai"): return get_crewai_tools() @@ -634,10 +633,10 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands, functions") + raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands") # ============================================================================= diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index b0b241c..fc6cdc5 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -290,7 +290,7 @@ def get_tools(framework: str = "auto") -> list[Any]: """Get secrets sync tools for the specified or auto-detected framework. Args: - framework: One of 'auto', 'langchain', 'crewai', 'strands', 'functions' + framework: One of 'auto', 'langchain', 'crewai', 'strands' Returns: List of tools in the appropriate format @@ -308,7 +308,7 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() raise ValueError(f"Unknown framework: {framework}") diff --git a/src/extended_data/connectors/slack/tools.py b/src/extended_data/connectors/slack/tools.py index 824480c..2c2cc0b 100644 --- a/src/extended_data/connectors/slack/tools.py +++ b/src/extended_data/connectors/slack/tools.py @@ -353,7 +353,6 @@ def get_tools(framework: str = "auto") -> list[Any]: - "langchain": Force LangChain StructuredTools - "crewai": Force CrewAI tools - "strands": Force plain functions for Strands - - "functions": Force plain functions (alias for strands) Returns: List of tools in the appropriate format for the framework. @@ -375,10 +374,10 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands, functions") + raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands") # ============================================================================= diff --git a/src/extended_data/connectors/vault/tools.py b/src/extended_data/connectors/vault/tools.py index df06cc4..6f662b6 100644 --- a/src/extended_data/connectors/vault/tools.py +++ b/src/extended_data/connectors/vault/tools.py @@ -170,7 +170,7 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() raise ValueError(f"Unknown framework: {framework}") diff --git a/src/extended_data/connectors/zoom/tools.py b/src/extended_data/connectors/zoom/tools.py index 42300dd..4653f27 100644 --- a/src/extended_data/connectors/zoom/tools.py +++ b/src/extended_data/connectors/zoom/tools.py @@ -203,7 +203,7 @@ def get_tools(framework: str = "auto") -> list[Any]: return get_langchain_tools() if framework == "crewai": return get_crewai_tools() - if framework in ("strands", "functions"): + if framework == "strands": return get_strands_tools() raise ValueError(f"Unknown framework: {framework}") diff --git a/tests/connectors/meshy/test_tools.py b/tests/connectors/meshy/test_tools.py index da7ebcc..9943853 100644 --- a/tests/connectors/meshy/test_tools.py +++ b/tests/connectors/meshy/test_tools.py @@ -477,11 +477,18 @@ def test_get_tools_with_explicit_framework(self): """Test get_tools with explicit framework selection.""" from extended_data.connectors.meshy.tools import get_tools - # Strands/functions always works (no deps) - tools = get_tools("functions") + # Strands always works because it returns plain functions. + tools = get_tools("strands") assert isinstance(tools, list) assert all(callable(t) for t in tools) + def test_get_tools_rejects_functions_alias(self): + """Plain-function tools should use the canonical strands framework name.""" + from extended_data.connectors.meshy.tools import get_tools + + with pytest.raises(ValueError, match="Unknown framework"): + get_tools("functions") + def test_get_tools_invalid_framework(self): """Test get_tools raises ValueError for invalid framework.""" from extended_data.connectors.meshy.tools import get_tools diff --git a/tests/connectors/test_github_connector.py b/tests/connectors/test_github_connector.py index ca3183d..df8e036 100644 --- a/tests/connectors/test_github_connector.py +++ b/tests/connectors/test_github_connector.py @@ -1,5 +1,5 @@ # ruff: noqa: I001 -"""Tests for GitHub connector aliases and behavior.""" +"""Tests for GitHub connector exports and behavior.""" from __future__ import annotations @@ -17,7 +17,7 @@ class TestGitHubConnector: """Test suite for GitHub connector behavior.""" - def test_root_export_alias_points_to_same_connector(self): + def test_root_export_points_to_same_connector(self): """The canonical root export and canonical class should resolve to the same class.""" assert RootGitHubConnector is GitHubConnector diff --git a/tests/connectors/test_github_tools.py b/tests/connectors/test_github_tools.py index 809d1ad..90c0c5b 100644 --- a/tests/connectors/test_github_tools.py +++ b/tests/connectors/test_github_tools.py @@ -389,14 +389,12 @@ def test_get_tools_invalid_framework(self): with pytest.raises(ValueError, match="Unknown framework"): get_tools(framework="invalid") - def test_get_tools_strands_alias(self): - """Test get_tools with 'functions' alias for strands.""" + def test_get_tools_rejects_functions_alias(self): + """Plain-function tools should use the canonical strands framework name.""" from extended_data.connectors.github.tools import get_tools - tools = get_tools(framework="functions") - - assert len(tools) > 0 - assert all(callable(t) for t in tools) + with pytest.raises(ValueError, match="Unknown framework"): + get_tools(framework="functions") class TestExports: diff --git a/tests/connectors/test_google_tools.py b/tests/connectors/test_google_tools.py index f9b57ac..65d83c2 100644 --- a/tests/connectors/test_google_tools.py +++ b/tests/connectors/test_google_tools.py @@ -451,13 +451,12 @@ def test_get_tools_invalid_framework(self): with pytest.raises(ValueError, match="Unknown framework"): get_tools(framework="invalid") - def test_get_tools_strands_alias(self): - """Test 'functions' is an alias for 'strands'.""" + def test_get_tools_rejects_functions_alias(self): + """Plain-function tools should use the canonical strands framework name.""" from extended_data.connectors.google.tools import get_tools - tools = get_tools(framework="functions") - assert len(tools) == 6 - assert all(callable(t) for t in tools) + with pytest.raises(ValueError, match="Unknown framework"): + get_tools(framework="functions") def test_all_exports_exist(self): """Test that all expected exports are available.""" diff --git a/tests/connectors/test_tool_frameworks.py b/tests/connectors/test_tool_frameworks.py new file mode 100644 index 0000000..b35e3a0 --- /dev/null +++ b/tests/connectors/test_tool_frameworks.py @@ -0,0 +1,30 @@ +"""Shared framework selection tests for connector tool modules.""" + +from __future__ import annotations + +import importlib + +import pytest + + +TOOL_MODULES = ( + "extended_data.connectors.anthropic.tools", + "extended_data.connectors.aws.tools", + "extended_data.connectors.cursor.tools", + "extended_data.connectors.github.tools", + "extended_data.connectors.google.tools", + "extended_data.connectors.meshy.tools", + "extended_data.connectors.secrets.tools", + "extended_data.connectors.slack.tools", + "extended_data.connectors.vault.tools", + "extended_data.connectors.zoom.tools", +) + + +@pytest.mark.parametrize("module_path", TOOL_MODULES) +def test_get_tools_rejects_functions_alias(module_path: str) -> None: + """Plain-function tools should use the canonical strands framework name.""" + module = importlib.import_module(module_path) + + with pytest.raises(ValueError, match="Unknown framework"): + module.get_tools("functions") diff --git a/tests/connectors/test_vault_tools.py b/tests/connectors/test_vault_tools.py index 99e7829..ca7a284 100644 --- a/tests/connectors/test_vault_tools.py +++ b/tests/connectors/test_vault_tools.py @@ -220,14 +220,12 @@ def test_get_tools_strands(self): assert len(tools) > 0 assert all(callable(t) for t in tools) - def test_get_tools_functions(self): - """Test get_tools with functions framework (alias for strands).""" + def test_get_tools_rejects_functions_alias(self): + """Plain-function tools should use the canonical strands framework name.""" from extended_data.connectors.vault.tools import get_tools - tools = get_tools(framework="functions") - - assert len(tools) > 0 - assert all(callable(t) for t in tools) + with pytest.raises(ValueError, match="Unknown framework"): + get_tools(framework="functions") def test_get_tools_invalid_framework(self): """Test get_tools with invalid framework raises ValueError.""" diff --git a/tests/connectors/test_zoom_tools.py b/tests/connectors/test_zoom_tools.py index 6944fc3..74d28a3 100644 --- a/tests/connectors/test_zoom_tools.py +++ b/tests/connectors/test_zoom_tools.py @@ -310,13 +310,12 @@ def test_get_tools_strands(self): assert len(tools) == 4 assert all(callable(t) for t in tools) - def test_get_tools_functions(self): - """Test get_tools with functions framework (alias for strands).""" + def test_get_tools_rejects_functions_alias(self): + """Plain-function tools should use the canonical strands framework name.""" from extended_data.connectors.zoom.tools import get_tools - tools = get_tools(framework="functions") - assert len(tools) == 4 - assert all(callable(t) for t in tools) + with pytest.raises(ValueError, match="Unknown framework"): + get_tools(framework="functions") def test_get_tools_invalid_framework(self): """Test get_tools with invalid framework raises error.""" From addf5f0bde4fc840dabb40ad294d8974ec128f1f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:18:01 -0500 Subject: [PATCH 139/287] fix: redact connector cli secret output --- src/extended_data/connectors/cli.py | 42 +++++++++++++++++++- tests/connectors/test_cli.py | 59 +++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index d8a5aff..de146ef 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -18,6 +18,7 @@ import argparse import json +import re import sys from collections.abc import Mapping @@ -34,6 +35,43 @@ from extended_data.containers.factory import to_builtin +_SENSITIVE_KEY_PATTERN = ( + r"api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|token|secret|password|passwd|pwd|" + r"authorization|client[_-]?secret|private[_-]?key" +) +_JSON_SECRET_RE = re.compile( + rf"(?i)([\"']?(?:{_SENSITIVE_KEY_PATTERN})[\"']?\s*:\s*)" + rf"([\"'][^\"']*[\"']|Bearer\s+[^\s,;}}\]]+|[^,\s}}\]]+)" +) +_KEY_VALUE_SECRET_RE = re.compile(rf"(?i)(\b(?:{_SENSITIVE_KEY_PATTERN})\b\s*=\s*)([^\s,;]+)") +_CLI_SECRET_RE = re.compile(rf"(?i)(--(?:{_SENSITIVE_KEY_PATTERN})(?:=|\s+))(\S+)") +_BEARER_SECRET_RE = re.compile(r"(?i)(\bBearer\s+)[A-Za-z0-9._~+/=-]+") + + +def _redacted_value(value: str) -> str: + """Return a redacted placeholder while preserving matching quotes.""" + quote = value[:1] if value[:1] in {"'", '"'} else "" + return f"{quote}[REDACTED]{quote}" + + +def _redacted_field(match: re.Match[str]) -> str: + """Return a redacted key/value field while preserving JSON shape.""" + prefix = match.group(1) + value = match.group(2) + if prefix.lstrip().startswith(('"', "'")) and value[:1] not in {"'", '"'}: + return f'{prefix}"[REDACTED]"' + return f"{prefix}{_redacted_value(value)}" + + +def _redact_sensitive_text(message: Any) -> str: + """Redact common secret fields before CLI terminal output.""" + text = str(message) + text = _JSON_SECRET_RE.sub(_redacted_field, text) + text = _KEY_VALUE_SECRET_RE.sub(lambda match: f"{match.group(1)}[REDACTED]", text) + text = _CLI_SECRET_RE.sub(lambda match: f"{match.group(1)}[REDACTED]", text) + return _BEARER_SECRET_RE.sub(r"\1[REDACTED]", text) + + def _json_output(data: Any) -> str: """Format data as JSON for output.""" data = to_builtin(data) @@ -82,12 +120,12 @@ def _format_list(values: list[Any] | tuple[Any, ...] | ExtendedList[Any] | None) def _write_stdout(message: str) -> None: """Write one CLI output line.""" - sys.stdout.write(f"{message}\n") + sys.stdout.write(f"{_redact_sensitive_text(message)}\n") def _write_stderr(message: str) -> None: """Write one CLI error line.""" - sys.stderr.write(f"{message}\n") + sys.stderr.write(f"{_redact_sensitive_text(message)}\n") # ============================================================================= diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 1ffa5de..de66af5 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -20,6 +20,18 @@ def fetch(self, enabled: bool = False, count: int = 0) -> ExtendedDict: """Fetch example data.""" return ExtendedDict({"enabled": enabled, "count": count}) + def secrets(self) -> ExtendedDict: + """Fetch example sensitive data.""" + return ExtendedDict( + { + "password": "hunter2", + "access_token": "tok_123", + "id_token": 12345, + "nested": {"api_key": "key_456"}, + "ok": True, + } + ) + def test_cli_list() -> None: """Test the list command.""" @@ -139,6 +151,31 @@ def test_cli_call_serializes_extended_containers_as_data() -> None: assert json.loads(mock_write.call_args.args[0]) == {"service": {"name": "api"}} +def test_cli_call_redacts_sensitive_json_output() -> None: + """Call command should not write common secret fields to stdout.""" + connector = MagicMock() + connector.secrets.return_value = ExampleConnector().secrets() + args = argparse.Namespace(connector="example", method="secrets", extra=[], json=True) + + with ( + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), + patch("extended_data.connectors.cli.get_connector", return_value=connector), + patch("sys.stdout.write") as mock_write, + ): + exit_code = cmd_call(args) + + assert exit_code == 0 + output = mock_write.call_args.args[0] + assert "hunter2" not in output + assert "tok_123" not in output + assert "12345" not in output + assert "key_456" not in output + assert json.loads(output)["id_token"] == "[REDACTED]" + assert '"password": "[REDACTED]"' in output + assert '"access_token": "[REDACTED]"' in output + assert '"api_key": "[REDACTED]"' in output + + def test_cli_call_reports_missing_method() -> None: """Call command reports missing methods instead of failing silently.""" args = argparse.Namespace(connector="example", method="missing", extra=[], json=False) @@ -181,6 +218,28 @@ def test_cli_call_reports_connector_errors() -> None: assert "boom" in mock_write.call_args.args[0] +def test_cli_call_redacts_sensitive_error_output() -> None: + """Call command should sanitize common secret values in stderr.""" + args = argparse.Namespace(connector="example", method="fetch", extra=[], json=False) + error = RuntimeError("failed password=hunter2 token: tok_123 Authorization: Bearer raw_token") + + with ( + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), + patch("extended_data.connectors.cli.get_connector", side_effect=error), + patch("sys.stderr.write") as mock_write, + ): + exit_code = cmd_call(args) + + assert exit_code == 1 + output = mock_write.call_args.args[0] + assert "hunter2" not in output + assert "tok_123" not in output + assert "raw_token" not in output + assert "password=[REDACTED]" in output + assert "token: [REDACTED]" in output + assert "Authorization: [REDACTED]" in output + + def test_cli_main_help() -> None: """Test main CLI entry point with help.""" with patch("sys.argv", ["extended-data", "--help"]): From a46a2950d0a46915126a5ce1354fc1e8070ae8ba Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:23:29 -0500 Subject: [PATCH 140/287] refactor: remove zoom user listing alias --- src/extended_data/connectors/zoom/__init__.py | 18 +++---- .../test_connector_payload_contracts.py | 1 - tests/connectors/test_zoom_connector.py | 48 ++++++------------- 3 files changed, 20 insertions(+), 47 deletions(-) diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index b5a6bfa..4939e30 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -61,8 +61,12 @@ def get_headers(self) -> dict[str, str]: raise RuntimeError(msg) return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} - def get_zoom_users(self) -> ExtendedDict: - """Get all Zoom users.""" + def list_users(self) -> ExtendedDict: + """List all Zoom users. + + Returns: + Dictionary mapping user emails to user data. + """ url = "https://api.zoom.us/v2/users" headers = self.get_headers() users: dict[str, dict[str, Any]] = {} @@ -121,16 +125,6 @@ def create_zoom_user(self, email: str, first_name: str, last_name: str) -> bool: self.logger.exception(error_msg) return False - def list_users(self) -> ExtendedDict: - """List all Zoom users. - - This is an alias for get_zoom_users() for consistency with AI tools naming. - - Returns: - Dictionary mapping user emails to user data - """ - return self.get_zoom_users() - def get_user(self, user_id: str) -> ExtendedDict: """Get a specific Zoom user by ID or email. diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index dc805cf..7a6918c 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -189,7 +189,6 @@ (VaultConnector.list_aws_iam_roles, ExtendedList[ExtendedString]), (VaultConnector.get_aws_iam_role, ExtendedDict | None), (VaultConnector.generate_aws_credentials, ExtendedDict), - (ZoomConnector.get_zoom_users, ExtendedDict), (ZoomConnector.list_users, ExtendedDict), (ZoomConnector.get_user, ExtendedDict), (ZoomConnector.list_meetings, ExtendedList[ExtendedDict]), diff --git a/tests/connectors/test_zoom_connector.py b/tests/connectors/test_zoom_connector.py index ee9b205..77d0207 100644 --- a/tests/connectors/test_zoom_connector.py +++ b/tests/connectors/test_zoom_connector.py @@ -64,8 +64,8 @@ def test_get_access_token_failure(self, mock_post, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") - def test_get_zoom_users(self, mock_post, mock_get, base_connector_kwargs): - """Test getting Zoom users.""" + def test_list_users(self, mock_post, mock_get, base_connector_kwargs): + """Test listing Zoom users.""" mock_token_response = MagicMock() mock_token_response.json.return_value = {"access_token": "test-token"} mock_token_response.raise_for_status = MagicMock() @@ -89,7 +89,7 @@ def test_get_zoom_users(self, mock_post, mock_get, base_connector_kwargs): **base_connector_kwargs, ) - users = connector.get_zoom_users() + users = connector.list_users() assert isinstance(users, ExtendedDict) assert isinstance(users["user1@example.com"], ExtendedDict) assert isinstance(users["user1@example.com"]["first_name"], ExtendedString) @@ -97,6 +97,17 @@ def test_get_zoom_users(self, mock_post, mock_get, base_connector_kwargs): assert "user2@example.com" in users assert len(users) == 2 + def test_get_zoom_users_alias_is_not_preserved(self, base_connector_kwargs): + """The clean major version should expose only the canonical list_users method.""" + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + assert not hasattr(connector, "get_zoom_users") + @patch("extended_data.connectors.zoom.requests.post") def test_create_zoom_user(self, mock_post, base_connector_kwargs): """Test creating a Zoom user.""" @@ -120,37 +131,6 @@ def test_create_zoom_user(self, mock_post, base_connector_kwargs): assert result is True assert mock_post.call_count == 2 - @patch("extended_data.connectors.zoom.requests.get") - @patch("extended_data.connectors.zoom.requests.post") - def test_list_users(self, mock_post, mock_get, base_connector_kwargs): - """Test list_users method (alias for get_zoom_users).""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response - - mock_users_response = MagicMock() - mock_users_response.json.return_value = { - "users": [ - {"email": "user1@example.com", "id": "123"}, - ], - "next_page_token": None, - } - mock_users_response.raise_for_status = MagicMock() - mock_get.return_value = mock_users_response - - connector = ZoomConnector( - client_id="test-client-id", - client_secret="test-client-secret", - account_id="test-account-id", - **base_connector_kwargs, - ) - - users = connector.list_users() - assert isinstance(users, ExtendedDict) - assert isinstance(users["user1@example.com"], ExtendedDict) - assert "user1@example.com" in users - @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_get_user(self, mock_post, mock_get, base_connector_kwargs): From 3e145bdc07f00dd988d8f733c381a5ac044406df Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:34:14 -0500 Subject: [PATCH 141/287] fix: redact connector mcp results --- src/extended_data/connectors/cli.py | 43 +------------- src/extended_data/connectors/mcp.py | 5 +- src/extended_data/connectors/redaction.py | 68 +++++++++++++++++++++++ tests/connectors/test_mcp.py | 7 +++ tests/connectors/test_redaction.py | 38 +++++++++++++ 5 files changed, 119 insertions(+), 42 deletions(-) create mode 100644 src/extended_data/connectors/redaction.py create mode 100644 tests/connectors/test_redaction.py diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index de146ef..10e4431 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -18,12 +18,12 @@ import argparse import json -import re import sys from collections.abc import Mapping from typing import Any +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.connectors.registry import ( get_connector, get_connector_class, @@ -35,43 +35,6 @@ from extended_data.containers.factory import to_builtin -_SENSITIVE_KEY_PATTERN = ( - r"api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|token|secret|password|passwd|pwd|" - r"authorization|client[_-]?secret|private[_-]?key" -) -_JSON_SECRET_RE = re.compile( - rf"(?i)([\"']?(?:{_SENSITIVE_KEY_PATTERN})[\"']?\s*:\s*)" - rf"([\"'][^\"']*[\"']|Bearer\s+[^\s,;}}\]]+|[^,\s}}\]]+)" -) -_KEY_VALUE_SECRET_RE = re.compile(rf"(?i)(\b(?:{_SENSITIVE_KEY_PATTERN})\b\s*=\s*)([^\s,;]+)") -_CLI_SECRET_RE = re.compile(rf"(?i)(--(?:{_SENSITIVE_KEY_PATTERN})(?:=|\s+))(\S+)") -_BEARER_SECRET_RE = re.compile(r"(?i)(\bBearer\s+)[A-Za-z0-9._~+/=-]+") - - -def _redacted_value(value: str) -> str: - """Return a redacted placeholder while preserving matching quotes.""" - quote = value[:1] if value[:1] in {"'", '"'} else "" - return f"{quote}[REDACTED]{quote}" - - -def _redacted_field(match: re.Match[str]) -> str: - """Return a redacted key/value field while preserving JSON shape.""" - prefix = match.group(1) - value = match.group(2) - if prefix.lstrip().startswith(('"', "'")) and value[:1] not in {"'", '"'}: - return f'{prefix}"[REDACTED]"' - return f"{prefix}{_redacted_value(value)}" - - -def _redact_sensitive_text(message: Any) -> str: - """Redact common secret fields before CLI terminal output.""" - text = str(message) - text = _JSON_SECRET_RE.sub(_redacted_field, text) - text = _KEY_VALUE_SECRET_RE.sub(lambda match: f"{match.group(1)}[REDACTED]", text) - text = _CLI_SECRET_RE.sub(lambda match: f"{match.group(1)}[REDACTED]", text) - return _BEARER_SECRET_RE.sub(r"\1[REDACTED]", text) - - def _json_output(data: Any) -> str: """Format data as JSON for output.""" data = to_builtin(data) @@ -120,12 +83,12 @@ def _format_list(values: list[Any] | tuple[Any, ...] | ExtendedList[Any] | None) def _write_stdout(message: str) -> None: """Write one CLI output line.""" - sys.stdout.write(f"{_redact_sensitive_text(message)}\n") + sys.stdout.write(f"{redact_sensitive_text(message)}\n") def _write_stderr(message: str) -> None: """Write one CLI error line.""" - sys.stderr.write(f"{_redact_sensitive_text(message)}\n") + sys.stderr.write(f"{redact_sensitive_text(message)}\n") # ============================================================================= diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 37c8d3e..c56ddb7 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -28,6 +28,7 @@ from collections.abc import Callable, Iterable, Mapping from typing import Any, cast +from extended_data.connectors.redaction import redact_sensitive_data from extended_data.connectors.registry import _list_connector_classes, get_connector from extended_data.connectors.surface import connector_data_methods from extended_data.containers import to_builtin @@ -105,8 +106,8 @@ def _jsonable_tool_result(result: Any) -> Any: result = [item.model_dump() if hasattr(item, "model_dump") else item for item in result] result = to_builtin(result) if isinstance(result, set | frozenset): - return [to_builtin(item) for item in result] - return result + result = [to_builtin(item) for item in result] + return redact_sensitive_data(result) def create_server() -> Any: diff --git a/src/extended_data/connectors/redaction.py b/src/extended_data/connectors/redaction.py new file mode 100644 index 0000000..bd802ee --- /dev/null +++ b/src/extended_data/connectors/redaction.py @@ -0,0 +1,68 @@ +"""Redaction helpers for connector output boundaries.""" + +from __future__ import annotations + +import re + +from collections.abc import Mapping +from typing import Any + + +SENSITIVE_KEY_PATTERN = ( + r"api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|token|secret|password|passwd|pwd|" + r"authorization|client[_-]?secret|private[_-]?key" +) +SENSITIVE_KEY_RE = re.compile(rf"(?i)^(?:{SENSITIVE_KEY_PATTERN})$") +JSON_SECRET_RE = re.compile( + rf"(?i)([\"']?(?:{SENSITIVE_KEY_PATTERN})[\"']?\s*:\s*)" + rf"([\"'][^\"']*[\"']|Bearer\s+[^\s,;}}\]]+|[^,\s}}\]]+)" +) +KEY_VALUE_SECRET_RE = re.compile(rf"(?i)(\b(?:{SENSITIVE_KEY_PATTERN})\b\s*=\s*)([^\s,;]+)") +CLI_SECRET_RE = re.compile(rf"(?i)(--(?:{SENSITIVE_KEY_PATTERN})(?:=|\s+))(\S+)") +BEARER_SECRET_RE = re.compile(r"(?i)(\bBearer\s+)[A-Za-z0-9._~+/=-]+") +REDACTED = "[REDACTED]" + + +def _redacted_value(value: str) -> str: + """Return a redacted placeholder while preserving matching quotes.""" + quote = value[:1] if value[:1] in {"'", '"'} else "" + return f"{quote}{REDACTED}{quote}" + + +def _redacted_field(match: re.Match[str]) -> str: + """Return a redacted key/value field while preserving JSON shape.""" + prefix = match.group(1) + value = match.group(2) + if prefix.lstrip().startswith(('"', "'")) and value[:1] not in {"'", '"'}: + return f'{prefix}"{REDACTED}"' + return f"{prefix}{_redacted_value(value)}" + + +def redact_sensitive_text(message: Any) -> str: + """Redact common secret fields in terminal-oriented text.""" + text = str(message) + text = JSON_SECRET_RE.sub(_redacted_field, text) + text = KEY_VALUE_SECRET_RE.sub(lambda match: f"{match.group(1)}{REDACTED}", text) + text = CLI_SECRET_RE.sub(lambda match: f"{match.group(1)}{REDACTED}", text) + return BEARER_SECRET_RE.sub(rf"\1{REDACTED}", text) + + +def redact_sensitive_data(value: Any) -> Any: + """Recursively redact common secret fields in JSON-like connector data.""" + if isinstance(value, Mapping): + redacted: dict[Any, Any] = {} + for key, item in value.items(): + if isinstance(key, str) and SENSITIVE_KEY_RE.fullmatch(key): + redacted[key] = REDACTED + else: + redacted[key] = redact_sensitive_data(item) + return redacted + if isinstance(value, list): + return [redact_sensitive_data(item) for item in value] + if isinstance(value, tuple): + return tuple(redact_sensitive_data(item) for item in value) + if isinstance(value, set): + return {redact_sensitive_data(item) for item in value} + if isinstance(value, str): + return redact_sensitive_text(value) + return value diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index 9992306..45c68fd 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -39,6 +39,13 @@ def test_jsonable_tool_result_lowers_extended_mapping_payloads() -> None: assert _jsonable_tool_result(payload) == {"service": {"name": "api"}} +def test_jsonable_tool_result_redacts_sensitive_mapping_payloads() -> None: + """MCP result serialization should not bypass connector redaction.""" + payload = ExtendedDict({"password": "hunter2", "nested": {"api_key": "key_123"}}) + + assert _jsonable_tool_result(payload) == {"password": "[REDACTED]", "nested": {"api_key": "[REDACTED]"}} + + def test_jsonable_tool_result_lowers_extended_sequence_payloads() -> None: """MCP result serialization keeps Tier 2 sequence payloads as JSON arrays.""" payload = ExtendedList([{"service": "api"}]) diff --git a/tests/connectors/test_redaction.py b/tests/connectors/test_redaction.py new file mode 100644 index 0000000..188ea37 --- /dev/null +++ b/tests/connectors/test_redaction.py @@ -0,0 +1,38 @@ +"""Tests for connector output redaction helpers.""" + +from __future__ import annotations + +from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text + + +def test_redact_sensitive_text_preserves_json_shape() -> None: + """Terminal text redaction should keep JSON-ish values parseable.""" + message = '{"password": "hunter2", "id_token": 12345, "Authorization": Bearer raw_token}' + + redacted = redact_sensitive_text(message) + + assert "hunter2" not in redacted + assert "12345" not in redacted + assert "raw_token" not in redacted + assert '"password": "[REDACTED]"' in redacted + assert '"id_token": "[REDACTED]"' in redacted + assert '"Authorization": "[REDACTED]"' in redacted + + +def test_redact_sensitive_data_recurses_through_json_like_payloads() -> None: + """Structured redaction should handle nested connector data.""" + payload = { + "password": "hunter2", + "nested": [{"api_key": "key_123", "value": "ok"}], + "headers": {"authorization": "Bearer raw_token"}, + "message": "client_secret=secret_123", + } + + redacted = redact_sensitive_data(payload) + + assert redacted == { + "password": "[REDACTED]", + "nested": [{"api_key": "[REDACTED]", "value": "ok"}], + "headers": {"authorization": "[REDACTED]"}, + "message": "client_secret=[REDACTED]", + } From 02ae30e25b6e784d7432c4d337598a12cd1d24b8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:45:15 -0500 Subject: [PATCH 142/287] docs: document connector output redaction --- README.md | 4 ++++ docs/package-surface.md | 5 +++++ tests/connectors/test_mcp.py | 10 ++++++++++ 3 files changed, 19 insertions(+) diff --git a/README.md b/README.md index 34e5b77..9c41cb6 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,10 @@ payload contract; framework factory functions still return framework tool objects. The generic CLI `call` command and MCP bridge expose only methods that advertise Extended Data payload returns. +Both serialized boundaries redact common secret-bearing keys and token-shaped +strings before writing terminal or MCP output, so connector data methods can +return structured vendor payloads without making stdout or tool responses a +secret leak by default. Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. diff --git a/docs/package-surface.md b/docs/package-surface.md index 7429d72..b1ffba4 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -227,6 +227,11 @@ their payload returns as `ExtendedDict` or `ExtendedList[ExtendedDict]`. The generic CLI `call` command and MCP bridge expose only connector methods that advertise Extended Data payload returns, so raw SDK client factories and low-level HTTP helpers do not leak into serialized tool catalogs. +Those serialized boundaries apply redaction after Tier 2 containers are lowered +to JSON-compatible data. Common secret-bearing keys such as `password`, +`api_key`, `access_token`, `authorization`, and `client_secret`, plus token-like +strings in error text, are replaced with `[REDACTED]` before CLI stdout/stderr +or MCP tool responses are emitted. LangChain, CrewAI, Strands, and auto-detection factory functions still return plain framework tool object lists. diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index 45c68fd..e11c316 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -53,6 +53,16 @@ def test_jsonable_tool_result_lowers_extended_sequence_payloads() -> None: assert _jsonable_tool_result(payload) == [{"service": "api"}] +def test_jsonable_tool_result_redacts_sensitive_sequence_payloads() -> None: + """MCP result serialization should redact secrets inside array payloads.""" + payload = ExtendedList([{"name": "api", "access_token": "tok_123"}, {"message": "client_secret=raw"}]) + + assert _jsonable_tool_result(payload) == [ + {"name": "api", "access_token": "[REDACTED]"}, + {"message": "client_secret=[REDACTED]"}, + ] + + def test_jsonable_tool_result_lowers_extended_set_payloads() -> None: """MCP result serialization turns Tier 2 sets into JSON arrays.""" payload = ExtendedSet({"api", "worker"}) From f38a8fab331fa1d2fc061412694142d20f3a5d8e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:49:37 -0500 Subject: [PATCH 143/287] refactor: generalize aws secret prefix loading --- README.md | 2 + docs/package-surface.md | 4 + src/extended_data/connectors/aws/__init__.py | 76 ++++++++++--------- tests/connectors/test_aws_connector.py | 46 +++++++---- .../test_connector_payload_contracts.py | 2 +- 5 files changed, 76 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 9c41cb6..39bf2e1 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,8 @@ generic vendor lookup. Organizations, SSO, Workspace, Cloud Resource Manager, Billing, and services operations live on those connectors directly rather than on separate `*Full` classes. +AWS Secrets Manager prefix loading is exposed as the generic +`load_secrets_by_prefix()` data method rather than as a vendor-specific helper. Connector data payloads are promoted into Tier 2 containers at the boundary, so decoded files, HTTP response data, GraphQL responses, and SDK-shaped maps can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. diff --git a/docs/package-surface.md b/docs/package-surface.md index b1ffba4..c0b1f95 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -179,6 +179,10 @@ major version: common S3, Organizations, SSO, Workspace, Cloud Resource Manager, Billing, and service-discovery operations live directly on those connectors. The old split between base connector classes and separate `*Full` connector classes is intentionally not preserved. +AWS Secrets Manager prefix loading is generic too: use +`AWSConnector.load_secrets_by_prefix()` when a workflow needs a promoted mapping +of secret names to values. The old vendor-specific ASM loader name is +intentionally not preserved. ## Connector Fabric diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index a3d60a7..9c097ea 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -25,7 +25,7 @@ from extended_data.connectors.aws.s3 import AWSS3Mixin from extended_data.connectors.aws.sso import AWSSSOmixin from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data, to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging @@ -560,49 +560,51 @@ def copy_secrets_to_s3( self.logger.info(f"Uploaded secrets to {s3_uri}") return self.extend_result(s3_uri) - @staticmethod - def load_vendors_from_asm(prefix: str = "/vendors/") -> ExtendedDict: - """Load vendor secrets from AWS Secrets Manager. - - This is used in Lambda environments where vendor credentials are stored - in ASM under a common prefix (e.g., /vendors/). + def load_secrets_by_prefix( + self, + prefix: str, + *, + strip_prefix: bool = True, + uppercase_keys: bool = False, + skip_empty_secrets: bool = True, + execution_role_arn: str | None = None, + role_session_name: str | None = None, + ) -> ExtendedDict: + """Load AWS Secrets Manager values into a mapping keyed by secret name. Args: - prefix: The prefix path for vendor secrets (default: /vendors/) + prefix: AWS Secrets Manager name prefix to load. + strip_prefix: Remove the prefix from returned mapping keys. + uppercase_keys: Uppercase returned mapping keys for env-style use. + skip_empty_secrets: Skip missing or empty secret values. + execution_role_arn: ARN of role to assume for cross-account access. + role_session_name: Session name for assumed role. Returns: - Dictionary mapping secret keys (with prefix removed) to their values. + Mapping of transformed secret names to secret values. """ - import os + if not prefix: + msg = "prefix is required to load secrets" + raise ValueError(msg) - vendors: dict[str, str] = {} - prefix = os.getenv("TM_VENDORS_PREFIX", prefix) + secrets = self.list_secrets( + prefix=prefix, + get_secret_values=True, + skip_empty_secrets=skip_empty_secrets, + execution_role_arn=execution_role_arn, + role_session_name=role_session_name, + ) - try: - aws_sdk = _load_aws_sdk() - session = aws_sdk.Session() - secretsmanager = session.client("secretsmanager") - - # List secrets with the prefix - paginator = secretsmanager.get_paginator("list_secrets") - for page in paginator.paginate(Filters=[{"Key": "name", "Values": [prefix]}]): - for secret in page.get("SecretList", []): - secret_name = secret["Name"] - if secret_name.startswith(prefix): - try: - response = secretsmanager.get_secret_value(SecretId=secret_name) - secret_value = response.get("SecretString", "") - # Remove prefix from key name - key = secret_name.removeprefix(prefix).upper() - vendors[key] = secret_value - except ClientError: - # Skip secrets we can't read - pass - except ClientError: - # Return empty dict if we can't access Secrets Manager - pass - - return extend_data(vendors) + loaded: dict[str, AWSSecretValue] = {} + for secret_name, secret_value in secrets.items(): + key = str(secret_name) + if strip_prefix and key.startswith(prefix): + key = key.removeprefix(prefix) + if uppercase_keys: + key = key.upper() + loaded[key] = secret_value + + return self.extend_result(loaded) from extended_data.connectors.aws.codedeploy import create_codedeploy_deployment, get_aws_codedeploy_deployments diff --git a/tests/connectors/test_aws_connector.py b/tests/connectors/test_aws_connector.py index f316400..23abbda 100644 --- a/tests/connectors/test_aws_connector.py +++ b/tests/connectors/test_aws_connector.py @@ -423,22 +423,36 @@ def test_copy_secrets_to_s3_unwraps_extended_data(self, base_connector_kwargs): ContentType="application/json", ) - def test_load_vendors_from_asm_returns_extended_mapping(self): - """Ensure load_vendors_from_asm promotes loaded vendor secrets.""" - mock_secretsmanager = MagicMock() - mock_paginator = MagicMock() - mock_paginator.paginate.return_value = [{"SecretList": [{"Name": "/vendors/github_token"}]}] - mock_secretsmanager.get_paginator.return_value = mock_paginator - mock_secretsmanager.get_secret_value.return_value = {"SecretString": "ghp_test"} + def test_load_secrets_by_prefix_returns_extended_mapping(self, base_connector_kwargs): + """Ensure prefix-loaded secrets are promoted without vendor-specific naming.""" + connector = AWSConnector(**base_connector_kwargs) + connector.list_secrets = MagicMock(return_value={"/services/github_token": "ghp_test"}) - mock_session = MagicMock() - mock_session.client.return_value = mock_secretsmanager - mock_sdk = MagicMock() - mock_sdk.Session.return_value = mock_session + secrets = connector.load_secrets_by_prefix( + prefix="/services/", + uppercase_keys=True, + execution_role_arn="arn:role:override", + role_session_name="session", + ) + + assert isinstance(secrets, ExtendedDict) + assert isinstance(secrets["GITHUB_TOKEN"], ExtendedString) + assert secrets == {"GITHUB_TOKEN": "ghp_test"} + connector.list_secrets.assert_called_once_with( + prefix="/services/", + get_secret_values=True, + skip_empty_secrets=True, + execution_role_arn="arn:role:override", + role_session_name="session", + ) + + def test_load_secrets_by_prefix_requires_prefix(self, base_connector_kwargs): + """Ensure prefix loading fails loudly without a prefix.""" + connector = AWSConnector(**base_connector_kwargs) - with patch("extended_data.connectors.aws._load_aws_sdk", return_value=mock_sdk): - vendors = AWSConnector.load_vendors_from_asm(prefix="/vendors/") + with pytest.raises(ValueError, match="prefix is required"): + connector.load_secrets_by_prefix("") - assert isinstance(vendors, ExtendedDict) - assert isinstance(vendors["GITHUB_TOKEN"], ExtendedString) - assert vendors == {"GITHUB_TOKEN": "ghp_test"} + def test_aws_connector_does_not_keep_vendor_secret_loader_alias(self): + """Clean major-version surface should not preserve the old vendor loader name.""" + assert not hasattr(AWSConnector, "load_vendors_from_asm") diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 7a6918c..a673e22 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -50,7 +50,7 @@ (AWSConnector.delete_secret, ExtendedDict), (AWSConnector.delete_secrets_matching, ExtendedList[ExtendedString]), (AWSConnector.copy_secrets_to_s3, ExtendedString), - (AWSConnector.load_vendors_from_asm, ExtendedDict), + (AWSConnector.load_secrets_by_prefix, ExtendedDict), (AWSOrganizationsMixin.get_organization_accounts, ExtendedDict), (AWSOrganizationsMixin.get_controltower_accounts, ExtendedDict), (AWSOrganizationsMixin.get_accounts, ExtendedDict), From f7d161c0bf5c31e8f6149b794e320142b144f3b1 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 11:52:45 -0500 Subject: [PATCH 144/287] test: guard example command paths --- examples/connectors/langchain_tools.py | 4 ++-- examples/connectors/mcp_server.py | 2 +- examples/inputs/decorator_api.py | 2 +- examples/inputs/encoding_decoding.py | 2 +- tests/examples/test_safe_examples.py | 18 ++++++++++++++++++ 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/examples/connectors/langchain_tools.py b/examples/connectors/langchain_tools.py index e22378d..3111172 100644 --- a/examples/connectors/langchain_tools.py +++ b/examples/connectors/langchain_tools.py @@ -6,7 +6,7 @@ Requirements: pip install extended-data[meshy,langchain] - pip install langchain-anthropic # For Claude as the LLM + pip install langchain-anthropic langgraph # For Claude as the LLM and agent loop Environment Variables: MESHY_API_KEY: Your Meshy API key @@ -39,7 +39,7 @@ def main() -> int: from extended_data.connectors.meshy.tools import get_tools except ImportError: print("Error: Could not import required packages.") - print("Install with: pip install extended-data[meshy,langchain] langchain-anthropic") + print("Install with: pip install extended-data[meshy,langchain] langchain-anthropic langgraph") return 1 # Get Meshy tools for LangChain diff --git a/examples/connectors/mcp_server.py b/examples/connectors/mcp_server.py index eff1495..d188098 100644 --- a/examples/connectors/mcp_server.py +++ b/examples/connectors/mcp_server.py @@ -13,7 +13,7 @@ Usage: # Run the server (connects via stdio) - python examples/mcp_server.py + python examples/connectors/mcp_server.py # Or use the installed command meshy-mcp diff --git a/examples/inputs/decorator_api.py b/examples/inputs/decorator_api.py index 1b5f7f3..29b1fc3 100644 --- a/examples/inputs/decorator_api.py +++ b/examples/inputs/decorator_api.py @@ -8,7 +8,7 @@ - JSON decoding from inputs Run with: - python -m examples.decorator_api + python examples/inputs/decorator_api.py """ from __future__ import annotations diff --git a/examples/inputs/encoding_decoding.py b/examples/inputs/encoding_decoding.py index 3ea5879..c37e39a 100644 --- a/examples/inputs/encoding_decoding.py +++ b/examples/inputs/encoding_decoding.py @@ -8,7 +8,7 @@ - Combined Base64 + JSON/YAML decoding Run with: - python -m examples.encoding_decoding + python examples/inputs/encoding_decoding.py """ from __future__ import annotations diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index 3a5e648..07246f5 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -37,6 +37,11 @@ "examples/connectors/mcp_server.py", ] ALL_EXAMPLES = SAFE_EXAMPLES + CONNECTOR_EXAMPLES +STALE_EXAMPLE_COMMANDS = ( + "python examples/mcp_server.py", + "python -m examples.decorator_api", + "python -m examples.encoding_decoding", +) def _readme_usage_snippet() -> str: @@ -84,6 +89,19 @@ def test_readme_usage_snippet_runs(tmp_path: Path) -> None: assert result.returncode == 0, f"README usage snippet failed\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" +def test_examples_do_not_document_stale_command_paths() -> None: + """Example command snippets should point at the current directory layout.""" + offenders: list[str] = [] + + for example_path in ALL_EXAMPLES: + text = (REPO_ROOT / example_path).read_text(encoding="utf-8") + for command in STALE_EXAMPLE_COMMANDS: + if command in text: + offenders.append(f"{example_path}: {command}") + + assert offenders == [] + + @pytest.mark.parametrize("example_path", ALL_EXAMPLES) def test_example_compiles(example_path: str, tmp_path: Path) -> None: """Every example should at least remain syntactically valid.""" From 89831ac376d62de699b6a7f711cb5e6b8dbc1fd7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:03:19 -0500 Subject: [PATCH 145/287] refactor: unify google connector registry --- README.md | 2 ++ docs/package-surface.md | 4 +++ pyproject.toml | 3 --- src/extended_data/__init__.py | 9 ------- src/extended_data/connectors/__init__.py | 6 ----- src/extended_data/connectors/_optional.py | 6 ----- .../connectors/google/__init__.py | 15 ----------- src/extended_data/connectors/registry.py | 3 --- tests/connectors/test_google_connector.py | 26 ++++++++----------- tests/core/test_package_surface.py | 16 ++++++++++++ 10 files changed, 33 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 39bf2e1..fe73f65 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,8 @@ generic vendor lookup. Organizations, SSO, Workspace, Cloud Resource Manager, Billing, and services operations live on those connectors directly rather than on separate `*Full` classes. +Google registry names are unified as well: use `google` for Workspace, Cloud, +Billing, and service discovery rather than split `google_*` connector aliases. AWS Secrets Manager prefix loading is exposed as the generic `load_secrets_by_prefix()` data method rather than as a vendor-specific helper. Connector data payloads are promoted into Tier 2 containers at the boundary, so diff --git a/docs/package-surface.md b/docs/package-surface.md index c0b1f95..3abe506 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -179,6 +179,10 @@ major version: common S3, Organizations, SSO, Workspace, Cloud Resource Manager, Billing, and service-discovery operations live directly on those connectors. The old split between base connector classes and separate `*Full` connector classes is intentionally not preserved. +The Google registry surface is unified too: `google` is the first-class +connector name for Workspace, Cloud Resource Manager, Billing, and service +discovery operations. Split `google_cloud`, `google_workspace`, and +`google_billing` connector aliases are intentionally not preserved. AWS Secrets Manager prefix loading is generic too: use `AWSConnector.load_secrets_by_prefix()` when a workflow needs a promoted mapping of secret names to values. The old vendor-specific ASM loader name is diff --git a/pyproject.toml b/pyproject.toml index 25bd2d5..a98978d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -157,9 +157,6 @@ meshy-mcp = "extended_data.connectors.meshy.mcp:main" [project.entry-points."extended_data.connectors"] jules = "extended_data.connectors.google.jules:JulesConnector" google = "extended_data.connectors.google:GoogleConnector" -google_cloud = "extended_data.connectors.google:GoogleCloudConnector" -google_workspace = "extended_data.connectors.google:GoogleWorkspaceConnector" -google_billing = "extended_data.connectors.google:GoogleBillingConnector" cursor = "extended_data.connectors.cursor:CursorConnector" github = "extended_data.connectors.github:GitHubConnector" meshy = "extended_data.connectors.meshy:MeshyConnector" diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index a105117..dec81b6 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -138,10 +138,7 @@ ConnectorInfo, CursorConnector, GitHubConnector, - GoogleBillingConnector, - GoogleCloudConnector, GoogleConnector, - GoogleWorkspaceConnector, JulesConnector, MeshyConnector, SlackConnector, @@ -167,10 +164,7 @@ "CursorConnector": ("extended_data.connectors", "CursorConnector"), "ExitRunError": ("extended_data.logging", "ExitRunError"), "GitHubConnector": ("extended_data.connectors", "GitHubConnector"), - "GoogleBillingConnector": ("extended_data.connectors", "GoogleBillingConnector"), - "GoogleCloudConnector": ("extended_data.connectors", "GoogleCloudConnector"), "GoogleConnector": ("extended_data.connectors", "GoogleConnector"), - "GoogleWorkspaceConnector": ("extended_data.connectors", "GoogleWorkspaceConnector"), "InputProvider": ("extended_data.inputs", "InputProvider"), "JulesConnector": ("extended_data.connectors", "JulesConnector"), "KeyTransform": ("extended_data.logging", "KeyTransform"), @@ -222,10 +216,7 @@ def __getattr__(name: str) -> Any: "ExtendedTuple", "FilePath", "GitHubConnector", - "GoogleBillingConnector", - "GoogleCloudConnector", "GoogleConnector", - "GoogleWorkspaceConnector", "InputProvider", "JulesConnector", "KeyTransform", diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index 8adb225..ff2abed 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -70,13 +70,10 @@ class MyConnector(AWSConnector): from extended_data.connectors.cursor import CursorConnector from extended_data.connectors.github import GitHubConnector from extended_data.connectors.google import ( - GoogleBillingConnector, GoogleBillingMixin, - GoogleCloudConnector, GoogleCloudMixin, GoogleConnector, GoogleServicesMixin, - GoogleWorkspaceConnector, GoogleWorkspaceMixin, JulesConnector, ) @@ -97,13 +94,10 @@ class MyConnector(AWSConnector): "ConnectorInfo", "CursorConnector", "GitHubConnector", - "GoogleBillingConnector", "GoogleBillingMixin", - "GoogleCloudConnector", "GoogleCloudMixin", "GoogleConnector", "GoogleServicesMixin", - "GoogleWorkspaceConnector", "GoogleWorkspaceMixin", "JulesConnector", "MeshyConnector", diff --git a/src/extended_data/connectors/_optional.py b/src/extended_data/connectors/_optional.py index b3d45e0..f761dd9 100644 --- a/src/extended_data/connectors/_optional.py +++ b/src/extended_data/connectors/_optional.py @@ -202,9 +202,6 @@ def get_available_ai_frameworks() -> ExtendedList[ExtendedString]: "anthropic": ["anthropic"], "aws": ["boto3"], "google": ["googleapiclient"], - "google_billing": ["googleapiclient"], - "google_cloud": ["googleapiclient"], - "google_workspace": ["googleapiclient"], "github": ["github"], "jules": ["googleapiclient"], "slack": ["slack_sdk"], @@ -216,9 +213,6 @@ def get_available_ai_frameworks() -> ExtendedList[ExtendedString]: "aws": "aws", "cursor": "cursor", "google": "google", - "google_billing": "google", - "google_cloud": "google", - "google_workspace": "google", "github": "github", "jules": "google", "meshy": "meshy", diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index e31f992..3d237d9 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -586,18 +586,6 @@ def list_groups( ) -class GoogleCloudConnector(GoogleConnector): - """Google connector entry point for Cloud Resource Manager and IAM workflows.""" - - -class GoogleWorkspaceConnector(GoogleConnector): - """Google connector entry point for Admin Directory user and group workflows.""" - - -class GoogleBillingConnector(GoogleConnector): - """Google connector entry point for Cloud Billing account and project billing workflows.""" - - __all__ = [ "DEFAULT_DOMAIN", "DEFAULT_SCOPES", @@ -607,13 +595,10 @@ class GoogleBillingConnector(GoogleConnector): "GCP_REQUIRED_ORGANIZATION_ROLES", "GCP_REQUIRED_ROLES", "GCP_SECURITY_PROJECT", - "GoogleBillingConnector", "GoogleBillingMixin", - "GoogleCloudConnector", "GoogleCloudMixin", "GoogleConnector", "GoogleServicesMixin", - "GoogleWorkspaceConnector", "GoogleWorkspaceMixin", "JulesConnector", "JulesError", diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index e02540c..89a78fe 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -96,9 +96,6 @@ def as_dict(self) -> ExtendedDict: # Google connectors "jules": BuiltinConnectorSpec("extended_data.connectors.google.jules", "JulesConnector", "google"), "google": BuiltinConnectorSpec("extended_data.connectors.google", "GoogleConnector", "google"), - "google_cloud": BuiltinConnectorSpec("extended_data.connectors.google", "GoogleCloudConnector", "google"), - "google_workspace": BuiltinConnectorSpec("extended_data.connectors.google", "GoogleWorkspaceConnector", "google"), - "google_billing": BuiltinConnectorSpec("extended_data.connectors.google", "GoogleBillingConnector", "google"), # Other connectors "cursor": BuiltinConnectorSpec("extended_data.connectors.cursor", "CursorConnector", "cursor"), "github": BuiltinConnectorSpec("extended_data.connectors.github", "GitHubConnector", "github"), diff --git a/tests/connectors/test_google_connector.py b/tests/connectors/test_google_connector.py index ded1215..7d8883b 100644 --- a/tests/connectors/test_google_connector.py +++ b/tests/connectors/test_google_connector.py @@ -11,12 +11,7 @@ pytest.importorskip("googleapiclient") from extended_data.containers import ExtendedDict, ExtendedString -from extended_data.connectors.google import ( - GoogleBillingConnector, - GoogleCloudConnector, - GoogleConnector, - GoogleWorkspaceConnector, -) +from extended_data.connectors.google import GoogleConnector def _service_account(): @@ -240,19 +235,20 @@ def test_list_groups_key_by_email_and_filters(self, mock_get_service, base_conne assert "team@example.com" in result assert result["team@example.com"]["primaryEmail"] == "team@example.com" - def test_specialized_connector_exports_match_available_operations(self, base_connector_kwargs): - """Specialized Google connectors expose the operations their entry points advertise.""" + def test_unified_connector_exposes_all_google_operations(self, base_connector_kwargs): + """The single Google connector exposes Workspace, Cloud, and Billing operations.""" service_account = _service_account() - cloud = GoogleCloudConnector(service_account_info=service_account, **base_connector_kwargs) - workspace = GoogleWorkspaceConnector(service_account_info=service_account, **base_connector_kwargs) - billing = GoogleBillingConnector(service_account_info=service_account, **base_connector_kwargs) connector = GoogleConnector(service_account_info=service_account, **base_connector_kwargs) - assert hasattr(cloud, "list_projects") - assert hasattr(workspace, "list_users") - assert hasattr(billing, "list_billing_accounts") - assert hasattr(connector, "list_projects") assert hasattr(connector, "list_users") assert hasattr(connector, "list_billing_accounts") + + def test_specialized_google_connector_aliases_are_not_preserved(self): + """Clean major-version surface should keep Google operations on GoogleConnector.""" + import extended_data.connectors.google as google_module + + assert not hasattr(google_module, "GoogleCloudConnector") + assert not hasattr(google_module, "GoogleWorkspaceConnector") + assert not hasattr(google_module, "GoogleBillingConnector") diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 4ca0d33..92870fe 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -82,6 +82,12 @@ def test_clean_major_version_public_names() -> None: assert not hasattr(connectors, "VendorConnectors") assert not hasattr(connectors, "AWSConnectorFull") assert not hasattr(connectors, "GoogleConnectorFull") + assert not hasattr(connectors, "GoogleCloudConnector") + assert not hasattr(connectors, "GoogleWorkspaceConnector") + assert not hasattr(connectors, "GoogleBillingConnector") + assert not hasattr(extended_data, "GoogleCloudConnector") + assert not hasattr(extended_data, "GoogleWorkspaceConnector") + assert not hasattr(extended_data, "GoogleBillingConnector") assert not hasattr(primitives, "removeprefix") assert not hasattr(primitives, "removesuffix") assert not hasattr(primitives, "bytestostr") @@ -171,6 +177,16 @@ def test_first_class_connectors_keep_operation_mixins_without_optional_extras() assert callable(connectors.GoogleConnector.list_billing_accounts) +def test_google_registry_uses_single_first_class_connector() -> None: + """Google Workspace, Cloud, and Billing operations should not be split into connector aliases.""" + connector_names = set(connectors.list_connectors()) + + assert "google" in connector_names + assert "google_cloud" not in connector_names + assert "google_workspace" not in connector_names + assert "google_billing" not in connector_names + + def test_clean_major_version_does_not_preserve_duplicate_tool_modules() -> None: """Secrets tool factories live on the package root and connector implementation module.""" assert util.find_spec("extended_data.secrets.tools") is None From 6ba4b8522a331f65478f3ec1ae9c02d99243319c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:07:28 -0500 Subject: [PATCH 146/287] refactor: require aws secret prefix keyword --- README.md | 2 ++ docs/package-surface.md | 2 ++ src/extended_data/connectors/aws/__init__.py | 9 +----- tests/connectors/test_aws_connector.py | 30 ++++++++++++++------ 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index fe73f65..798ccc6 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,8 @@ Google registry names are unified as well: use `google` for Workspace, Cloud, Billing, and service discovery rather than split `google_*` connector aliases. AWS Secrets Manager prefix loading is exposed as the generic `load_secrets_by_prefix()` data method rather than as a vendor-specific helper. +AWS secret listing and deletion APIs use the canonical `prefix` keyword; the old +`name_prefix` convenience keyword is intentionally not preserved. Connector data payloads are promoted into Tier 2 containers at the boundary, so decoded files, HTTP response data, GraphQL responses, and SDK-shaped maps can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. diff --git a/docs/package-surface.md b/docs/package-surface.md index 3abe506..1b55c66 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -187,6 +187,8 @@ AWS Secrets Manager prefix loading is generic too: use `AWSConnector.load_secrets_by_prefix()` when a workflow needs a promoted mapping of secret names to values. The old vendor-specific ASM loader name is intentionally not preserved. +AWS secret listing and deletion methods use the canonical `prefix` keyword. The +old `name_prefix` convenience keyword is intentionally not preserved. ## Connector Fabric diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 9c097ea..94339b2 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -287,7 +287,6 @@ def list_secrets( skip_empty_secrets: bool = False, execution_role_arn: str | None = None, role_session_name: str | None = None, - **kwargs: Any, ) -> ExtendedDict: """List secrets from AWS Secrets Manager. @@ -298,7 +297,6 @@ def list_secrets( skip_empty_secrets: If True, skip secrets with empty values. execution_role_arn: ARN of role to assume for cross-account access. role_session_name: Session name for assumed role. - **kwargs: Support for 'name_prefix' alias. Returns: Dict mapping secret names to ARNs or values. @@ -308,8 +306,6 @@ def list_secrets( """ self.logger.info("Listing AWS Secrets Manager secrets") - prefix = prefix or kwargs.get("name_prefix") - if prefix and (".." in prefix or "\x00" in prefix): msg = "prefix contains invalid characters" raise ValueError(msg) @@ -471,10 +467,8 @@ def delete_secrets_matching( force_delete: bool = False, dry_run: bool = True, execution_role_arn: str | None = None, - **kwargs: Any, ) -> ExtendedList[ExtendedString]: """Delete all secrets that match the provided name prefix.""" - prefix = prefix or kwargs.get("name_prefix") if not prefix: msg = "prefix is required to delete matching secrets" raise ValueError(msg) @@ -482,9 +476,8 @@ def delete_secrets_matching( self.logger.info(f"Deleting secrets matching prefix: {prefix} (dry_run={dry_run})") role_arn = execution_role_arn or self.execution_role_arn - # Pass name_prefix to satisfy existing tests that mock this call secrets = self.list_secrets( - name_prefix=prefix, + prefix=prefix, execution_role_arn=role_arn, ) diff --git a/tests/connectors/test_aws_connector.py b/tests/connectors/test_aws_connector.py index 23abbda..8321141 100644 --- a/tests/connectors/test_aws_connector.py +++ b/tests/connectors/test_aws_connector.py @@ -138,7 +138,7 @@ def test_list_secrets_returns_arns_with_filters(self, base_connector_kwargs): connector.get_aws_client = MagicMock(return_value=mock_secretsmanager) filters = [{"Key": "description", "Values": ["prod"]}] - secrets = connector.list_secrets(filters=filters, name_prefix="/vendors/") + secrets = connector.list_secrets(filters=filters, prefix="/vendors/") assert isinstance(secrets, ExtendedDict) assert isinstance(secrets["/vendors/foo"], ExtendedString) @@ -218,21 +218,28 @@ def test_list_secrets_fetches_values_and_skips_empty(self, base_connector_kwargs ) def test_list_secrets_rejects_path_traversal(self, base_connector_kwargs): - """Ensure list_secrets rejects path traversal in name_prefix.""" + """Ensure list_secrets rejects path traversal in prefix.""" import pytest connector = AWSConnector(**base_connector_kwargs) # Should reject path traversal attempts with pytest.raises(ValueError, match="invalid characters"): - connector.list_secrets(name_prefix="../../../etc/passwd") + connector.list_secrets(prefix="../../../etc/passwd") with pytest.raises(ValueError, match="invalid characters"): - connector.list_secrets(name_prefix="secrets/../admin") + connector.list_secrets(prefix="secrets/../admin") # Should reject null bytes with pytest.raises(ValueError, match="invalid characters"): - connector.list_secrets(name_prefix="secrets\x00admin") + connector.list_secrets(prefix="secrets\x00admin") + + def test_list_secrets_does_not_preserve_name_prefix_alias(self, base_connector_kwargs): + """Clean major-version surface should keep prefix as the only prefix keyword.""" + connector = AWSConnector(**base_connector_kwargs) + + with pytest.raises(TypeError, match="name_prefix"): + connector.list_secrets(name_prefix="/vendors/") # type: ignore[call-arg] def test_get_secret_returns_extended_string(self, base_connector_kwargs): """Ensure get_secret promotes returned secret strings.""" @@ -352,7 +359,7 @@ def test_delete_secrets_matching_dry_run(self, base_connector_kwargs): connector.delete_secret = MagicMock() to_delete = connector.delete_secrets_matching( - name_prefix="/vendors/", + prefix="/vendors/", dry_run=True, force_delete=False, execution_role_arn="arn:role:override", @@ -363,7 +370,7 @@ def test_delete_secrets_matching_dry_run(self, base_connector_kwargs): assert to_delete == ["arn:a", "arn:b"] connector.delete_secret.assert_not_called() connector.list_secrets.assert_called_once_with( - name_prefix="/vendors/", + prefix="/vendors/", execution_role_arn="arn:role:override", ) @@ -376,7 +383,7 @@ def test_delete_secrets_matching_executes_delete(self, base_connector_kwargs): ) deleted = connector.delete_secrets_matching( - name_prefix="/vendors/", + prefix="/vendors/", dry_run=False, force_delete=True, execution_role_arn="arn:role:override", @@ -402,6 +409,13 @@ def test_delete_secrets_matching_executes_delete(self, base_connector_kwargs): ] ) + def test_delete_secrets_matching_does_not_preserve_name_prefix_alias(self, base_connector_kwargs): + """Clean major-version surface should keep prefix as the only deletion keyword.""" + connector = AWSConnector(**base_connector_kwargs) + + with pytest.raises(TypeError, match="name_prefix"): + connector.delete_secrets_matching(name_prefix="/vendors/") # type: ignore[call-arg] + def test_copy_secrets_to_s3_unwraps_extended_data(self, base_connector_kwargs): """Ensure copy_secrets_to_s3 uploads JSON built from plain containers.""" connector = AWSConnector(**base_connector_kwargs) From 09f8ea5a6a804d8f931116a4301db8b7538db0d1 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:11:46 -0500 Subject: [PATCH 147/287] test: guard secrets connector data surface --- tests/connectors/test_connector_payload_contracts.py | 9 +++++++++ tests/core/test_package_surface.py | 3 +++ 2 files changed, 12 insertions(+) diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index a673e22..a011e2b 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -28,6 +28,7 @@ from extended_data.connectors.google.workspace import GoogleWorkspaceMixin from extended_data.connectors.meshy.connector import MeshyConnector from extended_data.connectors.registry import BUILTIN_CONNECTORS +from extended_data.connectors.secrets import SecretsConnector from extended_data.connectors.slack import SlackConnector from extended_data.connectors.surface import connector_data_methods, is_connector_data_method from extended_data.connectors.vault import VaultConnector @@ -178,6 +179,14 @@ (MeshyConnector.rig_model, ExtendedDict | ExtendedString), (MeshyConnector.apply_animation, ExtendedDict | ExtendedString), (MeshyConnector.retexture_model, ExtendedDict | ExtendedString), + (SecretsConnector.validate_config, ExtendedDict), + (SecretsConnector.get_config_info, ExtendedDict), + (SecretsConnector.run_pipeline, ExtendedDict), + (SecretsConnector.dry_run, ExtendedDict), + (SecretsConnector.merge, ExtendedDict), + (SecretsConnector.sync, ExtendedDict), + (SecretsConnector.get_targets, ExtendedDict), + (SecretsConnector.get_sources, ExtendedDict), (SlackConnector.send_message, ExtendedString | ExtendedDict), (SlackConnector.get_bot_channels, ExtendedDict), (SlackConnector.list_users, ExtendedDict), diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 92870fe..88a6164 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -191,4 +191,7 @@ def test_clean_major_version_does_not_preserve_duplicate_tool_modules() -> None: """Secrets tool factories live on the package root and connector implementation module.""" assert util.find_spec("extended_data.secrets.tools") is None assert callable(secrets.get_tools) + assert callable(secrets.get_langchain_tools) + assert callable(secrets.get_crewai_tools) + assert callable(secrets.get_strands_tools) assert callable(connectors.SecretsConnector) From d51e3463def52c45b22baca0226e4ab08d10681e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:22:50 -0500 Subject: [PATCH 148/287] refactor: keep sorted default dict internal --- README.md | 2 ++ docs/package-surface.md | 3 +++ src/extended_data/__init__.py | 2 -- src/extended_data/primitives/__init__.py | 2 -- tests/core/test_package_surface.py | 2 ++ 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 798ccc6..21c4570 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,8 @@ Tier 1 primitive names are explicit in this major version. Use string conversion. The old `bytestostr` and `strto*` helper names are not preserved. Old package import namespaces are not shimmed; missing imports are intentional so remaining migration work fails fast. +Tier 1 public exports stay function-oriented; use `get_default_dict()` for +nested or sorted default mappings instead of importing the internal helper class. Vendor connectors are first-class adapters in the data fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging diff --git a/docs/package-surface.md b/docs/package-surface.md index 1b55c66..565b566 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -53,6 +53,9 @@ family (`string_to_bool()`, `string_to_int()`, `string_to_float()`, `string_to_path()`, `string_to_date()`, `string_to_datetime()`, and `string_to_time()`). The old `bytestostr` and `strto*` helper names are intentionally not preserved. +Tier 1 public exports stay function-oriented; use `get_default_dict()` when a +workflow needs nested or sorted default mappings rather than importing the +internal sorted-default mapping helper class. Direct JSON, YAML, TOML, and HCL decode failures raise `DataDecodeError` with format and position context while preserving the parser exception as the cause; diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index dec81b6..8b58037 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -58,7 +58,6 @@ update_docstring, ) from extended_data.primitives.mappings import ( - SortedDefaultDict, all_values_from_map, create_merger, deduplicate_map, @@ -225,7 +224,6 @@ def __getattr__(name: str) -> Any: "OutputFormat", "SecretsConnector", "SlackConnector", - "SortedDefaultDict", "StepLike", "SyncOperation", "SyncOptions", diff --git a/src/extended_data/primitives/__init__.py b/src/extended_data/primitives/__init__.py index 8d22168..1562cdc 100644 --- a/src/extended_data/primitives/__init__.py +++ b/src/extended_data/primitives/__init__.py @@ -20,7 +20,6 @@ update_docstring, ) from extended_data.primitives.mappings import ( - SortedDefaultDict, all_values_from_map, create_merger, deduplicate_map, @@ -92,7 +91,6 @@ __all__ = [ - "SortedDefaultDict", "all_non_empty", "all_non_empty_in_dict", "all_non_empty_in_list", diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 88a6164..0775fb5 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -88,6 +88,8 @@ def test_clean_major_version_public_names() -> None: assert not hasattr(extended_data, "GoogleCloudConnector") assert not hasattr(extended_data, "GoogleWorkspaceConnector") assert not hasattr(extended_data, "GoogleBillingConnector") + assert not hasattr(primitives, "SortedDefaultDict") + assert not hasattr(extended_data, "SortedDefaultDict") assert not hasattr(primitives, "removeprefix") assert not hasattr(primitives, "removesuffix") assert not hasattr(primitives, "bytestostr") From ebc96c0e44639da4b3aae786067be0c7795ecc29 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:30:50 -0500 Subject: [PATCH 149/287] refactor: require vault role prefix keyword --- README.md | 5 +++-- docs/package-surface.md | 5 +++-- src/extended_data/connectors/vault/__init__.py | 8 ++++---- tests/connectors/test_vault_connector.py | 11 ++++++++++- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 21c4570..8bb34ae 100644 --- a/README.md +++ b/README.md @@ -123,8 +123,9 @@ Google registry names are unified as well: use `google` for Workspace, Cloud, Billing, and service discovery rather than split `google_*` connector aliases. AWS Secrets Manager prefix loading is exposed as the generic `load_secrets_by_prefix()` data method rather than as a vendor-specific helper. -AWS secret listing and deletion APIs use the canonical `prefix` keyword; the old -`name_prefix` convenience keyword is intentionally not preserved. +AWS secret listing/deletion and Vault role filtering APIs use the canonical +`prefix` keyword; the old `name_prefix` convenience keyword is intentionally not +preserved. Connector data payloads are promoted into Tier 2 containers at the boundary, so decoded files, HTTP response data, GraphQL responses, and SDK-shaped maps can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. diff --git a/docs/package-surface.md b/docs/package-surface.md index 565b566..44016a6 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -190,8 +190,9 @@ AWS Secrets Manager prefix loading is generic too: use `AWSConnector.load_secrets_by_prefix()` when a workflow needs a promoted mapping of secret names to values. The old vendor-specific ASM loader name is intentionally not preserved. -AWS secret listing and deletion methods use the canonical `prefix` keyword. The -old `name_prefix` convenience keyword is intentionally not preserved. +AWS secret listing/deletion and Vault role filtering use the canonical `prefix` +keyword. The old `name_prefix` convenience keyword is intentionally not +preserved. ## Connector Fabric diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 7f36945..39ee8f3 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -405,13 +405,13 @@ def write_secret( def list_aws_iam_roles( self, mount_point: str = "aws", - name_prefix: str | None = None, + prefix: str | None = None, ) -> ExtendedList[ExtendedString]: """List AWS IAM roles configured in Vault's AWS secrets engine. Args: mount_point: AWS secrets engine mount point (default: "aws"). - name_prefix: Optional prefix filter for role names. + prefix: Optional prefix filter for role names. Returns: List of role names available for credential generation. @@ -428,8 +428,8 @@ def list_aws_iam_roles( return self.extend_result([]) role_names = response.get("data", {}).get("keys", []) or [] - if name_prefix: - role_names = [role for role in role_names if role.startswith(name_prefix)] + if prefix: + role_names = [role for role in role_names if role.startswith(prefix)] self.logger.info(f"Found {len(role_names)} AWS IAM roles under mount {mount_point}") return self.extend_result(role_names) diff --git a/tests/connectors/test_vault_connector.py b/tests/connectors/test_vault_connector.py index 6a7c48f..cd321a2 100644 --- a/tests/connectors/test_vault_connector.py +++ b/tests/connectors/test_vault_connector.py @@ -176,13 +176,22 @@ def test_list_aws_iam_roles_filters_prefix(self, base_connector_kwargs): mock_client.secrets.aws.list_roles.return_value = {"data": {"keys": ["prod-sync", "dev-sync"]}} - roles = connector.list_aws_iam_roles(name_prefix="prod") + roles = connector.list_aws_iam_roles(prefix="prod") assert isinstance(roles, ExtendedList) assert isinstance(roles[0], ExtendedString) assert roles == ["prod-sync"] mock_client.secrets.aws.list_roles.assert_called_once_with(mount_point="aws") + def test_list_aws_iam_roles_does_not_preserve_name_prefix_alias(self, base_connector_kwargs): + """Clean major-version surface should not preserve the old name_prefix keyword.""" + connector = VaultConnector( + vault_url="https://vault.example.com", vault_token="test-token", **base_connector_kwargs + ) + + with pytest.raises(TypeError, match="name_prefix"): + connector.list_aws_iam_roles(name_prefix="prod") # type: ignore[call-arg] + def test_list_aws_iam_roles_handles_errors(self, base_connector_kwargs): """Vault errors while listing roles should return an empty list.""" connector = VaultConnector( From 1883824b588ccbd7bd72dedd5a6cd6de4e7fcae7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:34:34 -0500 Subject: [PATCH 150/287] feat: expose extended list value filtering --- README.md | 2 ++ docs/package-surface.md | 1 + src/extended_data/containers/sequences.py | 11 ++++++++++- src/extended_data/primitives/sequences.py | 22 +++++++++++++--------- tests/core/test_containers.py | 4 ++++ tests/core/test_list_data_type.py | 6 ++++++ 6 files changed, 36 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 8bb34ae..e4305e2 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,8 @@ Tier 3 decoders return Tier 2 containers by default, so data files, Base64 payloads, and directed inputs can immediately use `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, `ExtendedSet`, and `ExtendedString` methods. +`ExtendedList.filter_values()` exposes the Tier 1 allowlist/denylist list +filtering primitive as a chainable container operation. Generic type routing can still ask for plain data roles with `typeof(value, primitive_only=True)`, which treats Extended containers as their underlying `str`, `list`, `dict`, and `set` roles. diff --git a/docs/package-surface.md b/docs/package-surface.md index 44016a6..51059c5 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -65,6 +65,7 @@ the public error message does not echo the raw payload. name = ExtendedString("API Response Value").to_snake_case() payload = ExtendedDict({"outer": {"inner": 1}}).flatten() items = ExtendedList([1, [2, [3]]]).flatten() +services = ExtendedList(["api", "worker", "db"]).filter_values(allowlist=["api", "worker"]) aliases = ExtendedTuple(("api", ("gateway",))).flatten() tags = ExtendedSet({"prod", "prod", ""}).compact() words = number_to_words(42) diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index e3a3745..fdbe86d 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -7,7 +7,7 @@ from operator import index as operator_index from typing import Any, SupportsIndex, TypeVar, cast, overload -from extended_data.primitives.sequences import flatten_list +from extended_data.primitives.sequences import filter_list, flatten_list from extended_data.primitives.state import is_nothing from extended_data.primitives.types import make_hashable @@ -85,6 +85,15 @@ def filter(self, predicate: Callable[[T], bool]) -> ExtendedList[T]: """Return a copy containing items accepted by a predicate.""" return ExtendedList(item for item in self.data if predicate(item)) + def filter_values( + self, + *, + allowlist: Iterable[T] | None = None, + denylist: Iterable[T] | None = None, + ) -> ExtendedList[T]: + """Return a copy filtered by explicit allowed and denied values.""" + return ExtendedList(filter_list(self.data, allowlist=allowlist, denylist=denylist)) + def unique(self) -> ExtendedList[T]: """Return a copy with duplicate values removed while preserving order.""" seen: set[Any] = set() diff --git a/src/extended_data/primitives/sequences.py b/src/extended_data/primitives/sequences.py index 7aee032..54c3375 100644 --- a/src/extended_data/primitives/sequences.py +++ b/src/extended_data/primitives/sequences.py @@ -10,7 +10,11 @@ from __future__ import annotations -from typing import Any +from collections.abc import Iterable +from typing import Any, TypeVar + + +T = TypeVar("T") def flatten_list(matrix: list[Any]) -> list[Any]: @@ -44,19 +48,19 @@ def _flatten(lst: list[Any]) -> list[Any]: def filter_list( - items: list[str] | None, - allowlist: list[str] | None = None, - denylist: list[str] | None = None, -) -> list[str]: + items: Iterable[T] | None, + allowlist: Iterable[T] | None = None, + denylist: Iterable[T] | None = None, +) -> list[T]: """Filters a list based on allowlist and denylist. Args: - items (list[str] | None): The list to filter. - allowlist (list[str] | None): The list of allowed items. - denylist (list[str] | None): The list of denied items. + items: The values to filter. + allowlist: The allowed values. + denylist: The denied values. Returns: - list[str]: The filtered list. + The filtered list. """ if items is None: items = [] diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 823581b..89f3814 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -132,6 +132,10 @@ def test_extended_list_composes_sequence_primitives() -> None: assert value.unique() == [1, [2, [3]], "", 2] assert value.filter(lambda item: isinstance(item, int)) == [1, 2] assert ExtendedList([1, 2]).map(lambda item: item * 2) == [2, 4] + assert ExtendedList(["api", "worker", "db"]).filter_values( + allowlist=["api", "worker"], + denylist=["worker"], + ) == ["api"] def test_extended_list_promotes_nested_values_on_mutation() -> None: diff --git a/tests/core/test_list_data_type.py b/tests/core/test_list_data_type.py index f94fe23..5ec5108 100644 --- a/tests/core/test_list_data_type.py +++ b/tests/core/test_list_data_type.py @@ -121,6 +121,12 @@ def test_filter_list_empty_allowlist_behaves_like_no_filter( assert result == test_list +def test_filter_list_handles_non_string_values() -> None: + """Filtering is generic across hashable value types.""" + result = filter_list([1, 2, 3, 4], allowlist={1, 2, 4}, denylist={4}) + assert result == [1, 2] + + def test_filter_list_denylist(test_list: list[str], denylist: list[str]) -> None: """Tests filtering a list with a denylist. From d66c91f428cf24681accc021627def1618dc0012 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:35:28 -0500 Subject: [PATCH 151/287] fix: support unhashable list value filters --- src/extended_data/primitives/sequences.py | 13 +++++-------- tests/core/test_list_data_type.py | 12 +++++++++++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/extended_data/primitives/sequences.py b/src/extended_data/primitives/sequences.py index 54c3375..b87ee04 100644 --- a/src/extended_data/primitives/sequences.py +++ b/src/extended_data/primitives/sequences.py @@ -66,21 +66,18 @@ def filter_list( items = [] allowlist_provided = allowlist is not None - allowlist = allowlist or [] - denylist = denylist or [] + allowlist = list(allowlist or []) + denylist = list(denylist or []) - allowed_set = set(allowlist) - denied_set = set(denylist) - - enforce_allowlist = allowlist_provided and bool(allowed_set) + enforce_allowlist = allowlist_provided and bool(allowlist) filtered = [] for elem in items: - if enforce_allowlist and elem not in allowed_set: + if enforce_allowlist and elem not in allowlist: continue - if elem in denied_set: + if elem in denylist: continue filtered.append(elem) diff --git a/tests/core/test_list_data_type.py b/tests/core/test_list_data_type.py index 5ec5108..d81e254 100644 --- a/tests/core/test_list_data_type.py +++ b/tests/core/test_list_data_type.py @@ -122,11 +122,21 @@ def test_filter_list_empty_allowlist_behaves_like_no_filter( def test_filter_list_handles_non_string_values() -> None: - """Filtering is generic across hashable value types.""" + """Filtering is generic across value types.""" result = filter_list([1, 2, 3, 4], allowlist={1, 2, 4}, denylist={4}) assert result == [1, 2] +def test_filter_list_handles_unhashable_values() -> None: + """Filtering should not require set-compatible values.""" + api = {"name": "api"} + worker = {"name": "worker"} + db = {"name": "db"} + + result = filter_list([api, worker, db], allowlist=[api, worker], denylist=[worker]) + assert result == [api] + + def test_filter_list_denylist(test_list: list[str], denylist: list[str]) -> None: """Tests filtering a list with a denylist. From 29acc3622d1fe08d8c8b2c4e589fd9678b6f8630 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:36:49 -0500 Subject: [PATCH 152/287] test: dogfood extended list filtering in workflows --- tests/core/test_workflows.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 28a2c15..07d85c0 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -9,6 +9,7 @@ from extended_data import ( DataWorkflow, ExtendedDict, + ExtendedList, ExtendedTuple, WorkflowResult, WorkflowStep, @@ -61,18 +62,23 @@ def test_data_workflow_runs_named_value_transforms() -> None: """DataWorkflow can normalize in-memory API payloads through named steps.""" raw_payload = { "HTTPResponseCode": 200, - "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), + "SelectedServices": ["api", "worker", "db"], "Tags": ["api", "api", "docs"], } + def select_services(data: ExtendedDict) -> ExtendedDict: + return data | {"SelectedServices": data["SelectedServices"].filter_values(denylist=["db"])} + workflow = DataWorkflow.from_value(raw_payload).run( + ("select-services", select_services), ("deduplicate", lambda data: data.deduplicate()), ("unhump", lambda data: data.unhump()), ) result = workflow.result() - assert workflow.steps == ("value", "deduplicate", "unhump") + assert workflow.steps == ("value", "select-services", "deduplicate", "unhump") assert isinstance(workflow.value, ExtendedDict) + assert isinstance(workflow.value["selected_services"], ExtendedList) assert result.as_builtin() == { "http_response_code": 200, "selected_services": ["api", "worker"], From dd0eed3c21bb7409c70aefe280629cd021f98013 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:38:38 -0500 Subject: [PATCH 153/287] refactor: type connector names as extended strings --- src/extended_data/connectors/connectors.py | 4 ++-- src/extended_data/connectors/registry.py | 4 ++-- tests/core/test_package_surface.py | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 3e6f040..ff046cf 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -22,7 +22,7 @@ list_connectors as list_registered_connectors, ) from extended_data.connectors.zoom import ZoomConnector -from extended_data.containers import ExtendedDict, ExtendedList +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -91,7 +91,7 @@ def _set_cached_client(self, client_type: str, client: Any, **kwargs: Any) -> No cache_key = self._get_cache_key(**kwargs) self._client_cache[client_type][cache_key] = client - def list_connectors(self) -> ExtendedList[Any]: + def list_connectors(self) -> ExtendedList[ExtendedString]: """List connector names available in the current environment.""" return list_registered_connectors() diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 89a78fe..0d07a97 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -41,7 +41,7 @@ get_extra_for_connector, get_missing_connector_requirements, ) -from extended_data.containers import ExtendedDict, ExtendedList, extend_data +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data if TYPE_CHECKING: @@ -185,7 +185,7 @@ def _list_connector_classes() -> dict[str, builtins.type[VendorConnectorBase]]: return _discover_connectors().copy() -def list_connectors() -> ExtendedList[Any]: +def list_connectors() -> ExtendedList[ExtendedString]: """List available connector names. Returns: diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 0775fb5..fe46f3c 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -5,6 +5,7 @@ from importlib import util from importlib.metadata import version from types import ModuleType +from typing import get_type_hints import extended_data import extended_data.logging as lifecycle_logging @@ -148,6 +149,8 @@ def test_root_exports_first_class_integrated_primitives() -> None: connector_names = extended_data.list_connectors() assert isinstance(connector_names, ExtendedList) assert isinstance(connector_names[0], ExtendedString) + assert get_type_hints(connectors.list_connectors)["return"] == ExtendedList[ExtendedString] + assert get_type_hints(ConnectorFabric.list_connectors)["return"] == ExtendedList[ExtendedString] assert "github" in connector_names From 8186290806ccfa000e221d304eeb306e50d95df6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:49:00 -0500 Subject: [PATCH 154/287] feat: expose split by type on containers --- README.md | 2 ++ docs/package-surface.md | 5 ++++- src/extended_data/containers/mappings.py | 8 ++++++++ src/extended_data/containers/sequences.py | 9 +++++++++ tests/core/test_containers.py | 18 ++++++++++++++++++ 5 files changed, 41 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e4305e2..5da9647 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,8 @@ data files, Base64 payloads, and directed inputs can immediately use `ExtendedString` methods. `ExtendedList.filter_values()` exposes the Tier 1 allowlist/denylist list filtering primitive as a chainable container operation. +`ExtendedList.split_by_type()` and `ExtendedDict.split_by_type()` expose the +Tier 1 type-splitting primitives as type-name keyed `ExtendedDict` results. Generic type routing can still ask for plain data roles with `typeof(value, primitive_only=True)`, which treats Extended containers as their underlying `str`, `list`, `dict`, and `set` roles. diff --git a/docs/package-surface.md b/docs/package-surface.md index 51059c5..63e2590 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -66,6 +66,7 @@ name = ExtendedString("API Response Value").to_snake_case() payload = ExtendedDict({"outer": {"inner": 1}}).flatten() items = ExtendedList([1, [2, [3]]]).flatten() services = ExtendedList(["api", "worker", "db"]).filter_values(allowlist=["api", "worker"]) +typed_items = ExtendedList(["api", 2, True]).split_by_type(primitive_only=True) aliases = ExtendedTuple(("api", ("gateway",))).flatten() tags = ExtendedSet({"prod", "prod", ""}).compact() words = number_to_words(42) @@ -96,7 +97,9 @@ values containing `ExtendedString` parts, while `partition()` and Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected `ExtendedDict` values, and `ExtendedDict.all_values()` returns an -`ExtendedList`. +`ExtendedList`. `ExtendedList.split_by_type()` and +`ExtendedDict.split_by_type()` expose the Tier 1 split helpers as type-name +keyed `ExtendedDict` results. Generic type routing can still ask for plain data roles: `typeof(value, primitive_only=True)` reports Extended strings, lists, tuples, mappings, and sets as `str`, `list`, `list`, `dict`, and `set`. diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index 52f3997..150f91d 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -23,6 +23,7 @@ flatten_map, unhump_map, ) +from extended_data.primitives.splitting import split_dict_by_type from extended_data.primitives.state import all_non_empty_in_dict @@ -130,6 +131,13 @@ def all_values(self) -> ExtendedList[Any]: return extend_data(all_values_from_map(to_builtin(self.data))) + def split_by_type(self, *, primitive_only: bool = False) -> ExtendedDict: + """Return mapping entries grouped by value type name.""" + from extended_data.containers.factory import extend_data, to_builtin + + grouped = split_dict_by_type(to_builtin(self.data), primitive_only=primitive_only) + return extend_data({type_key.__name__: values for type_key, values in grouped.items()}) + def first_non_empty_value(self, *keys: str) -> Any: """Return the first non-empty value for the provided keys.""" from extended_data.containers.factory import to_builtin diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index fdbe86d..0501212 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -7,7 +7,9 @@ from operator import index as operator_index from typing import Any, SupportsIndex, TypeVar, cast, overload +from extended_data.containers.mappings import ExtendedDict from extended_data.primitives.sequences import filter_list, flatten_list +from extended_data.primitives.splitting import split_list_by_type from extended_data.primitives.state import is_nothing from extended_data.primitives.types import make_hashable @@ -94,6 +96,13 @@ def filter_values( """Return a copy filtered by explicit allowed and denied values.""" return ExtendedList(filter_list(self.data, allowlist=allowlist, denylist=denylist)) + def split_by_type(self, *, primitive_only: bool = False) -> ExtendedDict: + """Return values grouped by type name.""" + from extended_data.containers.factory import extend_data, to_builtin + + grouped = split_list_by_type(to_builtin(self.data), primitive_only=primitive_only) + return extend_data({type_key.__name__: values for type_key, values in grouped.items()}) + def unique(self) -> ExtendedList[T]: """Return a copy with duplicate values removed while preserving order.""" seen: set[Any] = set() diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 89f3814..d96c78c 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -62,16 +62,21 @@ def test_extended_string_chains_primitive_transforms() -> None: def test_extended_dict_composes_mapping_primitives() -> None: """ExtendedDict composes Tier 1 mapping primitives.""" value = ExtendedDict({"outer": {"inner": 1}, "items": [1, 1, 2], "empty": ""}) + typed = ExtendedDict({"service": "api", "retries": 2, "enabled": True, "ports": [80, 443]}) merged = value.deep_merge({"outer": {"other": 2}}) filtered = merged.filter(allowlist=["outer"]) accepted, rejected = filtered all_values = value.all_values() + split = typed.split_by_type(primitive_only=True) assert isinstance(filtered, ExtendedTuple) assert isinstance(accepted, ExtendedDict) assert isinstance(rejected, ExtendedDict) assert isinstance(all_values, ExtendedList) + assert isinstance(split, ExtendedDict) + assert isinstance(split["str"], ExtendedDict) + assert isinstance(split["list"], ExtendedDict) assert merged["outer"] == {"inner": 1, "other": 2} assert value["outer"] == {"inner": 1} assert value.flatten() == {"outer.inner": 1, "items.0": 1, "items.1": 1, "items.2": 2, "empty": ""} @@ -81,6 +86,10 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert "items" in rejected assert all_values == [1, 1, 1, 2, ""] assert isinstance(all_values[-1], ExtendedString) + assert split["str"] == {"service": "api"} + assert split["int"] == {"retries": 2} + assert split["bool"] == {"enabled": True} + assert split["list"] == {"ports": [80, 443]} def test_extended_dict_promotes_nested_values_on_mutation() -> None: @@ -126,6 +135,7 @@ def test_extended_dict_promotes_nested_values_on_mutation() -> None: def test_extended_list_composes_sequence_primitives() -> None: """ExtendedList composes Tier 1 sequence primitives.""" value = ExtendedList([1, [2, [3]], "", 2]) + typed = ExtendedList(["api", 2, True, ["nested"]]) assert value.flatten() == [1, 2, 3, "", 2] assert value.compact() == [1, [2, [3]], 2] @@ -136,6 +146,14 @@ def test_extended_list_composes_sequence_primitives() -> None: allowlist=["api", "worker"], denylist=["worker"], ) == ["api"] + split = typed.split_by_type(primitive_only=True) + assert isinstance(split, ExtendedDict) + assert isinstance(split["str"], ExtendedList) + assert isinstance(split["list"], ExtendedList) + assert split["str"] == ["api"] + assert split["int"] == [2] + assert split["bool"] == [True] + assert split["list"] == [["nested"]] def test_extended_list_promotes_nested_values_on_mutation() -> None: From b468d398d5ae618ec34fa3a998b9a6829b161470 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:50:28 -0500 Subject: [PATCH 155/287] feat: expose tuple split by type --- README.md | 5 +++-- docs/package-surface.md | 8 +++++--- src/extended_data/containers/sequences.py | 7 +++++++ tests/core/test_containers.py | 9 +++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5da9647..07a312e 100644 --- a/README.md +++ b/README.md @@ -169,8 +169,9 @@ data files, Base64 payloads, and directed inputs can immediately use `ExtendedString` methods. `ExtendedList.filter_values()` exposes the Tier 1 allowlist/denylist list filtering primitive as a chainable container operation. -`ExtendedList.split_by_type()` and `ExtendedDict.split_by_type()` expose the -Tier 1 type-splitting primitives as type-name keyed `ExtendedDict` results. +`ExtendedList.split_by_type()`, `ExtendedTuple.split_by_type()`, and +`ExtendedDict.split_by_type()` expose the Tier 1 type-splitting primitives as +type-name keyed `ExtendedDict` results. Generic type routing can still ask for plain data roles with `typeof(value, primitive_only=True)`, which treats Extended containers as their underlying `str`, `list`, `dict`, and `set` roles. diff --git a/docs/package-surface.md b/docs/package-surface.md index 63e2590..cb475ba 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -67,6 +67,7 @@ payload = ExtendedDict({"outer": {"inner": 1}}).flatten() items = ExtendedList([1, [2, [3]]]).flatten() services = ExtendedList(["api", "worker", "db"]).filter_values(allowlist=["api", "worker"]) typed_items = ExtendedList(["api", 2, True]).split_by_type(primitive_only=True) +typed_aliases = ExtendedTuple(("api", 2, True)).split_by_type(primitive_only=True) aliases = ExtendedTuple(("api", ("gateway",))).flatten() tags = ExtendedSet({"prod", "prod", ""}).compact() words = number_to_words(42) @@ -97,9 +98,10 @@ values containing `ExtendedString` parts, while `partition()` and Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected `ExtendedDict` values, and `ExtendedDict.all_values()` returns an -`ExtendedList`. `ExtendedList.split_by_type()` and -`ExtendedDict.split_by_type()` expose the Tier 1 split helpers as type-name -keyed `ExtendedDict` results. +`ExtendedList`. `ExtendedList.split_by_type()`, +`ExtendedTuple.split_by_type()`, and `ExtendedDict.split_by_type()` expose the +Tier 1 split helpers as type-name keyed `ExtendedDict` results; tuple inputs +keep tuple-shaped grouped values. Generic type routing can still ask for plain data roles: `typeof(value, primitive_only=True)` reports Extended strings, lists, tuples, mappings, and sets as `str`, `list`, `list`, `dict`, and `set`. diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index 0501212..ac4fe84 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -208,6 +208,13 @@ def unique(self) -> ExtendedTuple[T]: values.append(item) return ExtendedTuple(values) + def split_by_type(self, *, primitive_only: bool = False) -> ExtendedDict: + """Return values grouped by type name while keeping tuple-shaped groups.""" + from extended_data.containers.factory import extend_data, to_builtin + + grouped = split_list_by_type(list(to_builtin(self)), primitive_only=primitive_only) + return extend_data({type_key.__name__: tuple(values) for type_key, values in grouped.items()}) + def to_tuple(self) -> tuple[T, ...]: """Return a plain tuple copy.""" return tuple(self) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index d96c78c..11f8bad 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -228,12 +228,21 @@ def test_extended_set_named_mutators_preserve_extended_values() -> None: def test_extended_tuple_preserves_immutable_sequence_shape() -> None: """ExtendedTuple composes sequence primitives without becoming an ExtendedList.""" value = ExtendedTuple((1, (2, [3]), "", 2)) + typed = ExtendedTuple(("api", 2, True, ["nested"])) + split = typed.split_by_type(primitive_only=True) assert value.flatten() == (1, 2, 3, "", 2) assert value.compact() == (1, (2, [3]), 2) assert value.unique() == (1, (2, [3]), "", 2) assert value.filter(lambda item: isinstance(item, int)) == (1, 2) assert value.map(lambda item: item * 2 if isinstance(item, int) else item) == (2, (2, [3]), "", 4) + assert isinstance(split, ExtendedDict) + assert isinstance(split["str"], ExtendedTuple) + assert isinstance(split["list"], ExtendedTuple) + assert split["str"] == ("api",) + assert split["int"] == (2,) + assert split["bool"] == (True,) + assert split["list"] == (["nested"],) def test_extended_tuple_promotes_nested_values() -> None: From f11bec0c72296b70f8deeb867a3cb8cee561fb5b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:54:49 -0500 Subject: [PATCH 156/287] feat: expose matching on extended strings --- README.md | 4 +++- docs/package-surface.md | 5 ++++- src/extended_data/containers/strings.py | 10 ++++++++++ tests/core/test_containers.py | 4 ++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 07a312e..d0ed3e5 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,9 @@ Generic type routing can still ask for plain data roles with underlying `str`, `list`, `dict`, and `set` roles. String tokenization stays inside the same surface: `ExtendedString.split()` returns an `ExtendedList` of `ExtendedString` values, and partition operations -return `ExtendedTuple` values. +return `ExtendedTuple` values. `ExtendedString.is_partial_match()` and +`ExtendedString.is_non_empty_match()` expose the Tier 1 matching primitives +without requiring callers to drop back to function-only utility code. Format encoders lower extended containers, including extended mapping keys, at the serialization boundary. `read_data_file()` is the direct file boundary for one-step read plus decode diff --git a/docs/package-surface.md b/docs/package-surface.md index cb475ba..4e6cab9 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -63,6 +63,7 @@ the public error message does not echo the raw payload. ```python name = ExtendedString("API Response Value").to_snake_case() +matched = ExtendedString("api-gateway").is_partial_match("gateway") payload = ExtendedDict({"outer": {"inner": 1}}).flatten() items = ExtendedList([1, [2, [3]]]).flatten() services = ExtendedList(["api", "worker", "db"]).filter_values(allowlist=["api", "worker"]) @@ -93,7 +94,9 @@ String tokenization and partitioning paths are covered too: `ExtendedString.split()`, `rsplit()`, and `splitlines()` return `ExtendedList` values containing `ExtendedString` parts, while `partition()` and `rpartition()` return `ExtendedTuple` values. String formatting paths -`format()` and `format_map()` return `ExtendedString`. +`format()` and `format_map()` return `ExtendedString`. String matching paths +`is_partial_match()` and `is_non_empty_match()` expose the Tier 1 matching +helpers through `ExtendedString`. Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index 573ae7e..b1e4cd9 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -6,6 +6,8 @@ from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING +import extended_data.primitives.matching as primitive_matching + from extended_data.primitives.string_transforms import ( humanize, ordinalize, @@ -148,6 +150,14 @@ def join(self, seq: Iterable[str | UserString]) -> ExtendedString: # type: igno """Join string-like values into an extended string.""" return ExtendedString(self.data.join(_coerce_string_argument(item) for item in seq)) + def is_partial_match(self, other: str | None, *, check_prefix_only: bool = False) -> bool: + """Return whether this string partially matches another string.""" + return primitive_matching.is_partial_match(self.data, other, check_prefix_only=check_prefix_only) + + def is_non_empty_match(self, other: object) -> bool: + """Return whether this string matches another non-empty string value.""" + return primitive_matching.is_non_empty_match(self.data, other) + def is_url(self) -> bool: """Return whether the string is a URL.""" return is_url(self.data) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 11f8bad..2100de6 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -38,6 +38,10 @@ def test_extended_string_chains_primitive_transforms() -> None: assert value.to_kebab_case() == "api-response-value" assert ExtendedString("1").ordinalize() == "1st" assert ExtendedString("yes").to_bool() is True + assert ExtendedString("api-gateway").is_partial_match("gateway") is True + assert ExtendedString("api").is_partial_match("gateway", check_prefix_only=True) is False + assert ExtendedString("API").is_non_empty_match("api") is True + assert ExtendedString("").is_non_empty_match("api") is False assert isinstance(partitioned, ExtendedTuple) assert isinstance(partitioned[0], ExtendedString) assert partitioned == ("api", ".", "gateway.worker") From dfc2f658432b3f7005ca72d3a45a6df61cc9f9fe Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:56:50 -0500 Subject: [PATCH 157/287] fix: preserve extended first non-empty values --- README.md | 3 +++ docs/package-surface.md | 4 +++- src/extended_data/containers/mappings.py | 4 ++-- tests/core/test_containers.py | 6 ++++++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d0ed3e5..8e3df39 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,9 @@ filtering primitive as a chainable container operation. `ExtendedList.split_by_type()`, `ExtendedTuple.split_by_type()`, and `ExtendedDict.split_by_type()` expose the Tier 1 type-splitting primitives as type-name keyed `ExtendedDict` results. +`ExtendedDict.first_non_empty_value()` returns the first matching non-empty +value as promoted Tier 2 data, so selected nested maps and lists remain +chainable. Generic type routing can still ask for plain data roles with `typeof(value, primitive_only=True)`, which treats Extended containers as their underlying `str`, `list`, `dict`, and `set` roles. diff --git a/docs/package-surface.md b/docs/package-surface.md index 4e6cab9..02a70d1 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -104,7 +104,9 @@ Container methods that return derived collections stay in Tier 2 as well: `ExtendedList`. `ExtendedList.split_by_type()`, `ExtendedTuple.split_by_type()`, and `ExtendedDict.split_by_type()` expose the Tier 1 split helpers as type-name keyed `ExtendedDict` results; tuple inputs -keep tuple-shaped grouped values. +keep tuple-shaped grouped values. `ExtendedDict.first_non_empty_value()` +returns promoted Tier 2 values when it selects nested maps, lists, tuples, sets, +or strings. Generic type routing can still ask for plain data roles: `typeof(value, primitive_only=True)` reports Extended strings, lists, tuples, mappings, and sets as `str`, `list`, `list`, `dict`, and `set`. diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index 150f91d..2d29ebb 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -140,6 +140,6 @@ def split_by_type(self, *, primitive_only: bool = False) -> ExtendedDict: def first_non_empty_value(self, *keys: str) -> Any: """Return the first non-empty value for the provided keys.""" - from extended_data.containers.factory import to_builtin + from extended_data.containers.factory import extend_data, to_builtin - return first_non_empty_value_from_map(to_builtin(self.data), *keys) + return extend_data(first_non_empty_value_from_map(to_builtin(self.data), *keys)) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 2100de6..845b69b 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -73,6 +73,8 @@ def test_extended_dict_composes_mapping_primitives() -> None: accepted, rejected = filtered all_values = value.all_values() split = typed.split_by_type(primitive_only=True) + first_scalar = typed.first_non_empty_value("missing", "service") + first_nested = value.first_non_empty_value("missing", "outer") assert isinstance(filtered, ExtendedTuple) assert isinstance(accepted, ExtendedDict) @@ -81,6 +83,8 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert isinstance(split, ExtendedDict) assert isinstance(split["str"], ExtendedDict) assert isinstance(split["list"], ExtendedDict) + assert isinstance(first_scalar, ExtendedString) + assert isinstance(first_nested, ExtendedDict) assert merged["outer"] == {"inner": 1, "other": 2} assert value["outer"] == {"inner": 1} assert value.flatten() == {"outer.inner": 1, "items.0": 1, "items.1": 1, "items.2": 2, "empty": ""} @@ -94,6 +98,8 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert split["int"] == {"retries": 2} assert split["bool"] == {"enabled": True} assert split["list"] == {"ports": [80, 443]} + assert first_scalar.upper_first() == "Api" + assert first_nested["inner"] == 1 def test_extended_dict_promotes_nested_values_on_mutation() -> None: From 3f943f8b63c2084a94ed2616a61387b505057ea9 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 12:58:33 -0500 Subject: [PATCH 158/287] feat: expose non-empty entry selectors --- README.md | 4 +++- docs/package-surface.md | 4 +++- src/extended_data/containers/mappings.py | 14 +++++++++++++- tests/core/test_containers.py | 8 ++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8e3df39..d53ccae 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,9 @@ filtering primitive as a chainable container operation. type-name keyed `ExtendedDict` results. `ExtendedDict.first_non_empty_value()` returns the first matching non-empty value as promoted Tier 2 data, so selected nested maps and lists remain -chainable. +chainable. Use `ExtendedDict.first_non_empty_entry()` and +`ExtendedDict.non_empty_entries()` when callers need selected key/value entries +instead of just the selected value. Generic type routing can still ask for plain data roles with `typeof(value, primitive_only=True)`, which treats Extended containers as their underlying `str`, `list`, `dict`, and `set` roles. diff --git a/docs/package-surface.md b/docs/package-surface.md index 02a70d1..15e161e 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -106,7 +106,9 @@ Container methods that return derived collections stay in Tier 2 as well: Tier 1 split helpers as type-name keyed `ExtendedDict` results; tuple inputs keep tuple-shaped grouped values. `ExtendedDict.first_non_empty_value()` returns promoted Tier 2 values when it selects nested maps, lists, tuples, sets, -or strings. +or strings. `ExtendedDict.first_non_empty_entry()` and +`ExtendedDict.non_empty_entries()` return promoted keyed entries for workflows +that need to preserve the selected key context. Generic type routing can still ask for plain data roles: `typeof(value, primitive_only=True)` reports Extended strings, lists, tuples, mappings, and sets as `str`, `list`, `list`, `dict`, and `set`. diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index 2d29ebb..9c35c16 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -24,7 +24,7 @@ unhump_map, ) from extended_data.primitives.splitting import split_dict_by_type -from extended_data.primitives.state import all_non_empty_in_dict +from extended_data.primitives.state import all_non_empty_in_dict, any_non_empty, yield_non_empty class ExtendedDict(UserDict[str, Any]): @@ -143,3 +143,15 @@ def first_non_empty_value(self, *keys: str) -> Any: from extended_data.containers.factory import extend_data, to_builtin return extend_data(first_non_empty_value_from_map(to_builtin(self.data), *keys)) + + def first_non_empty_entry(self, *keys: str) -> ExtendedDict: + """Return the first non-empty keyed entry for the provided keys.""" + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(any_non_empty(to_builtin(self.data), *keys)) + + def non_empty_entries(self, *keys: str) -> ExtendedList[ExtendedDict]: + """Return all non-empty keyed entries for the provided keys.""" + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(list(yield_non_empty(to_builtin(self.data), *keys))) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 845b69b..d16bf7f 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -75,6 +75,8 @@ def test_extended_dict_composes_mapping_primitives() -> None: split = typed.split_by_type(primitive_only=True) first_scalar = typed.first_non_empty_value("missing", "service") first_nested = value.first_non_empty_value("missing", "outer") + first_entry = typed.first_non_empty_entry("missing", "service", "ports") + entries = typed.non_empty_entries("missing", "service", "ports") assert isinstance(filtered, ExtendedTuple) assert isinstance(accepted, ExtendedDict) @@ -85,6 +87,9 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert isinstance(split["list"], ExtendedDict) assert isinstance(first_scalar, ExtendedString) assert isinstance(first_nested, ExtendedDict) + assert isinstance(first_entry, ExtendedDict) + assert isinstance(entries, ExtendedList) + assert all(isinstance(entry, ExtendedDict) for entry in entries) assert merged["outer"] == {"inner": 1, "other": 2} assert value["outer"] == {"inner": 1} assert value.flatten() == {"outer.inner": 1, "items.0": 1, "items.1": 1, "items.2": 2, "empty": ""} @@ -100,6 +105,9 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert split["list"] == {"ports": [80, 443]} assert first_scalar.upper_first() == "Api" assert first_nested["inner"] == 1 + assert first_entry["service"].upper_first() == "Api" + assert entries == [{"service": "api"}, {"ports": [80, 443]}] + assert isinstance(entries[1]["ports"], ExtendedList) def test_extended_dict_promotes_nested_values_on_mutation() -> None: From 31675f09d7a5a661b32f832fcce4e7cf34737a38 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:00:41 -0500 Subject: [PATCH 159/287] feat: expose first non-empty on ordered containers --- README.md | 2 ++ docs/package-surface.md | 4 +++- src/extended_data/containers/sequences.py | 9 +++++++++ tests/core/test_containers.py | 6 ++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d53ccae..beecbb0 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,8 @@ filtering primitive as a chainable container operation. `ExtendedList.split_by_type()`, `ExtendedTuple.split_by_type()`, and `ExtendedDict.split_by_type()` expose the Tier 1 type-splitting primitives as type-name keyed `ExtendedDict` results. +`ExtendedList.first_non_empty()` and `ExtendedTuple.first_non_empty()` expose +ordered non-empty selection while preserving promoted nested values. `ExtendedDict.first_non_empty_value()` returns the first matching non-empty value as promoted Tier 2 data, so selected nested maps and lists remain chainable. Use `ExtendedDict.first_non_empty_entry()` and diff --git a/docs/package-surface.md b/docs/package-surface.md index 15e161e..96a32df 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -104,7 +104,9 @@ Container methods that return derived collections stay in Tier 2 as well: `ExtendedList`. `ExtendedList.split_by_type()`, `ExtendedTuple.split_by_type()`, and `ExtendedDict.split_by_type()` expose the Tier 1 split helpers as type-name keyed `ExtendedDict` results; tuple inputs -keep tuple-shaped grouped values. `ExtendedDict.first_non_empty_value()` +keep tuple-shaped grouped values. `ExtendedList.first_non_empty()` and +`ExtendedTuple.first_non_empty()` return the first ordered non-empty value +without lowering promoted nested data. `ExtendedDict.first_non_empty_value()` returns promoted Tier 2 values when it selects nested maps, lists, tuples, sets, or strings. `ExtendedDict.first_non_empty_entry()` and `ExtendedDict.non_empty_entries()` return promoted keyed entries for workflows diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index ac4fe84..81acacc 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -10,6 +10,7 @@ from extended_data.containers.mappings import ExtendedDict from extended_data.primitives.sequences import filter_list, flatten_list from extended_data.primitives.splitting import split_list_by_type +from extended_data.primitives.state import first_non_empty as primitive_first_non_empty from extended_data.primitives.state import is_nothing from extended_data.primitives.types import make_hashable @@ -103,6 +104,10 @@ def split_by_type(self, *, primitive_only: bool = False) -> ExtendedDict: grouped = split_list_by_type(to_builtin(self.data), primitive_only=primitive_only) return extend_data({type_key.__name__: values for type_key, values in grouped.items()}) + def first_non_empty(self) -> T | None: + """Return the first value not considered empty.""" + return cast(T | None, primitive_first_non_empty(*self.data)) + def unique(self) -> ExtendedList[T]: """Return a copy with duplicate values removed while preserving order.""" seen: set[Any] = set() @@ -215,6 +220,10 @@ def split_by_type(self, *, primitive_only: bool = False) -> ExtendedDict: grouped = split_list_by_type(list(to_builtin(self)), primitive_only=primitive_only) return extend_data({type_key.__name__: tuple(values) for type_key, values in grouped.items()}) + def first_non_empty(self) -> T | None: + """Return the first value not considered empty.""" + return cast(T | None, primitive_first_non_empty(*self)) + def to_tuple(self) -> tuple[T, ...]: """Return a plain tuple copy.""" return tuple(self) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index d16bf7f..22e4e85 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -154,10 +154,13 @@ def test_extended_list_composes_sequence_primitives() -> None: """ExtendedList composes Tier 1 sequence primitives.""" value = ExtendedList([1, [2, [3]], "", 2]) typed = ExtendedList(["api", 2, True, ["nested"]]) + first_nested = ExtendedList([None, "", {"service": "api"}]).first_non_empty() assert value.flatten() == [1, 2, 3, "", 2] assert value.compact() == [1, [2, [3]], 2] assert value.unique() == [1, [2, [3]], "", 2] + assert isinstance(first_nested, ExtendedDict) + assert first_nested["service"].upper_first() == "Api" assert value.filter(lambda item: isinstance(item, int)) == [1, 2] assert ExtendedList([1, 2]).map(lambda item: item * 2) == [2, 4] assert ExtendedList(["api", "worker", "db"]).filter_values( @@ -247,11 +250,14 @@ def test_extended_tuple_preserves_immutable_sequence_shape() -> None: """ExtendedTuple composes sequence primitives without becoming an ExtendedList.""" value = ExtendedTuple((1, (2, [3]), "", 2)) typed = ExtendedTuple(("api", 2, True, ["nested"])) + first_nested = ExtendedTuple((None, "", {"service": "api"})).first_non_empty() split = typed.split_by_type(primitive_only=True) assert value.flatten() == (1, 2, 3, "", 2) assert value.compact() == (1, (2, [3]), 2) assert value.unique() == (1, (2, [3]), "", 2) + assert isinstance(first_nested, ExtendedDict) + assert first_nested["service"].upper_first() == "Api" assert value.filter(lambda item: isinstance(item, int)) == (1, 2) assert value.map(lambda item: item * 2 if isinstance(item, int) else item) == (2, (2, [3]), "", 4) assert isinstance(split, ExtendedDict) From 0baf4d957cd9e85939443fe5f8e2cdbb7295f0be Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:02:41 -0500 Subject: [PATCH 160/287] feat: expose zipmap on ordered containers --- README.md | 2 ++ docs/package-surface.md | 4 +++- src/extended_data/containers/sequences.py | 17 +++++++++++++++++ tests/core/test_containers.py | 8 ++++++++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index beecbb0..82386b9 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,8 @@ filtering primitive as a chainable container operation. type-name keyed `ExtendedDict` results. `ExtendedList.first_non_empty()` and `ExtendedTuple.first_non_empty()` expose ordered non-empty selection while preserving promoted nested values. +`ExtendedList.zipmap()` and `ExtendedTuple.zipmap()` compose ordered key +containers with value iterables and return promoted `ExtendedDict` mappings. `ExtendedDict.first_non_empty_value()` returns the first matching non-empty value as promoted Tier 2 data, so selected nested maps and lists remain chainable. Use `ExtendedDict.first_non_empty_entry()` and diff --git a/docs/package-surface.md b/docs/package-surface.md index 96a32df..06fc73a 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -106,7 +106,9 @@ Container methods that return derived collections stay in Tier 2 as well: Tier 1 split helpers as type-name keyed `ExtendedDict` results; tuple inputs keep tuple-shaped grouped values. `ExtendedList.first_non_empty()` and `ExtendedTuple.first_non_empty()` return the first ordered non-empty value -without lowering promoted nested data. `ExtendedDict.first_non_empty_value()` +without lowering promoted nested data. `ExtendedList.zipmap()` and +`ExtendedTuple.zipmap()` return promoted `ExtendedDict` mappings from ordered +key containers and value iterables. `ExtendedDict.first_non_empty_value()` returns promoted Tier 2 values when it selects nested maps, lists, tuples, sets, or strings. `ExtendedDict.first_non_empty_entry()` and `ExtendedDict.non_empty_entries()` return promoted keyed entries for workflows diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index 81acacc..3b35181 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -8,6 +8,7 @@ from typing import Any, SupportsIndex, TypeVar, cast, overload from extended_data.containers.mappings import ExtendedDict +from extended_data.primitives.mappings import zipmap as primitive_zipmap from extended_data.primitives.sequences import filter_list, flatten_list from extended_data.primitives.splitting import split_list_by_type from extended_data.primitives.state import first_non_empty as primitive_first_non_empty @@ -108,6 +109,14 @@ def first_non_empty(self) -> T | None: """Return the first value not considered empty.""" return cast(T | None, primitive_first_non_empty(*self.data)) + def zipmap(self, values: Iterable[str]) -> ExtendedDict: + """Return an extended mapping from this list's values to provided values.""" + from extended_data.containers.factory import extend_data, to_builtin + + keys = [str(item) for item in to_builtin(self.data)] + mapped_values = [str(item) for item in to_builtin(list(values))] + return extend_data(primitive_zipmap(keys, mapped_values)) + def unique(self) -> ExtendedList[T]: """Return a copy with duplicate values removed while preserving order.""" seen: set[Any] = set() @@ -224,6 +233,14 @@ def first_non_empty(self) -> T | None: """Return the first value not considered empty.""" return cast(T | None, primitive_first_non_empty(*self)) + def zipmap(self, values: Iterable[str]) -> ExtendedDict: + """Return an extended mapping from this tuple's values to provided values.""" + from extended_data.containers.factory import extend_data, to_builtin + + keys = [str(item) for item in to_builtin(tuple(self))] + mapped_values = [str(item) for item in to_builtin(list(values))] + return extend_data(primitive_zipmap(keys, mapped_values)) + def to_tuple(self) -> tuple[T, ...]: """Return a plain tuple copy.""" return tuple(self) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 22e4e85..dbc16ab 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -155,12 +155,16 @@ def test_extended_list_composes_sequence_primitives() -> None: value = ExtendedList([1, [2, [3]], "", 2]) typed = ExtendedList(["api", 2, True, ["nested"]]) first_nested = ExtendedList([None, "", {"service": "api"}]).first_non_empty() + mapped = ExtendedList(["service", "region", "ignored"]).zipmap(["api", "us-east-1"]) assert value.flatten() == [1, 2, 3, "", 2] assert value.compact() == [1, [2, [3]], 2] assert value.unique() == [1, [2, [3]], "", 2] assert isinstance(first_nested, ExtendedDict) assert first_nested["service"].upper_first() == "Api" + assert isinstance(mapped, ExtendedDict) + assert mapped == {"service": "api", "region": "us-east-1"} + assert mapped["service"].upper_first() == "Api" assert value.filter(lambda item: isinstance(item, int)) == [1, 2] assert ExtendedList([1, 2]).map(lambda item: item * 2) == [2, 4] assert ExtendedList(["api", "worker", "db"]).filter_values( @@ -251,6 +255,7 @@ def test_extended_tuple_preserves_immutable_sequence_shape() -> None: value = ExtendedTuple((1, (2, [3]), "", 2)) typed = ExtendedTuple(("api", 2, True, ["nested"])) first_nested = ExtendedTuple((None, "", {"service": "api"})).first_non_empty() + mapped = ExtendedTuple(("service", "region", "ignored")).zipmap(("api", "us-east-1")) split = typed.split_by_type(primitive_only=True) assert value.flatten() == (1, 2, 3, "", 2) @@ -258,6 +263,9 @@ def test_extended_tuple_preserves_immutable_sequence_shape() -> None: assert value.unique() == (1, (2, [3]), "", 2) assert isinstance(first_nested, ExtendedDict) assert first_nested["service"].upper_first() == "Api" + assert isinstance(mapped, ExtendedDict) + assert mapped == {"service": "api", "region": "us-east-1"} + assert mapped["service"].upper_first() == "Api" assert value.filter(lambda item: isinstance(item, int)) == (1, 2) assert value.map(lambda item: item * 2 if isinstance(item, int) else item) == (2, (2, [3]), "", 4) assert isinstance(split, ExtendedDict) From b4166cb5d3a6a87a28a66141c5e4e006ee020918 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:04:15 -0500 Subject: [PATCH 161/287] test: guard integrated container method surface --- tests/core/test_package_surface.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index fe46f3c..c122656 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -13,7 +13,7 @@ from extended_data import connectors, containers, inputs, io, primitives, secrets, workflows from extended_data.connectors.connectors import ConnectorFabric from extended_data.connectors.registry import BUILTIN_CONNECTORS -from extended_data.containers import ExtendedList, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -154,6 +154,25 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert "github" in connector_names +def test_tier2_container_methods_expose_integrated_primitives() -> None: + """Tier 2 containers should expose common primitive operations directly.""" + matched = ExtendedString("api-gateway").is_partial_match("gateway") + typed = ExtendedList(["api", 2]).split_by_type(primitive_only=True) + mapped = ExtendedTuple(("service", "region")).zipmap(("api", "us-east-1")) + first_entry = ExtendedDict({"empty": "", "service": "api"}).first_non_empty_entry("empty", "service") + selected = ExtendedList([None, "", {"service": "api"}]).first_non_empty() + + assert matched is True + assert isinstance(typed, ExtendedDict) + assert typed["str"] == ["api"] + assert isinstance(mapped, ExtendedDict) + assert mapped["service"].upper_first() == "Api" + assert isinstance(first_entry, ExtendedDict) + assert first_entry["service"].upper_first() == "Api" + assert isinstance(selected, ExtendedDict) + assert selected["service"].upper_first() == "Api" + + def test_connectors_root_exports_builtin_connector_classes() -> None: """Every built-in registry connector class is exported from the connector package root.""" for spec in BUILTIN_CONNECTORS.values(): From aa185ff66d762dec0c291cb25fe4442c1563913d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:09:21 -0500 Subject: [PATCH 162/287] feat: expose scalar conversions on extended strings --- README.md | 3 ++ docs/package-surface.md | 5 +++- src/extended_data/containers/strings.py | 37 ++++++++++++++++++++++++- tests/core/test_containers.py | 17 ++++++++++++ tests/core/test_package_surface.py | 2 ++ 5 files changed, 62 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 82386b9..c5c7044 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,9 @@ returns an `ExtendedList` of `ExtendedString` values, and partition operations return `ExtendedTuple` values. `ExtendedString.is_partial_match()` and `ExtendedString.is_non_empty_match()` expose the Tier 1 matching primitives without requiring callers to drop back to function-only utility code. +`ExtendedString.to_bool()`, `to_int()`, `to_float()`, `to_path()`, +`to_date()`, `to_datetime()`, and `to_time()` expose the Tier 1 scalar +conversion family as direct string-container methods. Format encoders lower extended containers, including extended mapping keys, at the serialization boundary. `read_data_file()` is the direct file boundary for one-step read plus decode diff --git a/docs/package-surface.md b/docs/package-surface.md index 06fc73a..c231173 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -96,7 +96,10 @@ values containing `ExtendedString` parts, while `partition()` and `rpartition()` return `ExtendedTuple` values. String formatting paths `format()` and `format_map()` return `ExtendedString`. String matching paths `is_partial_match()` and `is_non_empty_match()` expose the Tier 1 matching -helpers through `ExtendedString`. +helpers through `ExtendedString`. Scalar conversion paths `to_bool()`, +`to_int()`, `to_float()`, `to_path()`, `to_date()`, `to_datetime()`, and +`to_time()` expose the Tier 1 `string_to_*()` family directly on +`ExtendedString`. Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index b1e4cd9..583fe3e 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -2,8 +2,11 @@ from __future__ import annotations +import datetime + from collections import UserString from collections.abc import Iterable, Mapping +from pathlib import Path from typing import TYPE_CHECKING import extended_data.primitives.matching as primitive_matching @@ -27,7 +30,15 @@ truncate, upper_first_char, ) -from extended_data.primitives.types import string_to_bool +from extended_data.primitives.types import ( + string_to_bool, + string_to_date, + string_to_datetime, + string_to_float, + string_to_int, + string_to_path, + string_to_time, +) if TYPE_CHECKING: @@ -165,3 +176,27 @@ def is_url(self) -> bool: def to_bool(self, *, raise_on_error: bool = False) -> bool | None: """Return a boolean parsed from the string.""" return string_to_bool(self.data, raise_on_error=raise_on_error) + + def to_float(self, *, raise_on_error: bool = False) -> float | None: + """Return a float parsed from the string.""" + return string_to_float(self.data, raise_on_error=raise_on_error) + + def to_int(self, *, raise_on_error: bool = False) -> int | None: + """Return an integer parsed from the string.""" + return string_to_int(self.data, raise_on_error=raise_on_error) + + def to_path(self, *, raise_on_error: bool = False) -> Path | None: + """Return a path parsed from the string.""" + return string_to_path(self.data, raise_on_error=raise_on_error) + + def to_date(self, *, raise_on_error: bool = False) -> datetime.date | None: + """Return a date parsed from the string.""" + return string_to_date(self.data, raise_on_error=raise_on_error) + + def to_datetime(self, *, raise_on_error: bool = False) -> datetime.datetime | None: + """Return a datetime parsed from the string.""" + return string_to_datetime(self.data, raise_on_error=raise_on_error) + + def to_time(self, *, raise_on_error: bool = False) -> datetime.time | None: + """Return a time parsed from the string.""" + return string_to_time(self.data, raise_on_error=raise_on_error) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index dbc16ab..1a14d4b 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -2,6 +2,9 @@ from __future__ import annotations +import datetime + +from pathlib import Path from typing import Any import extended_data @@ -38,6 +41,20 @@ def test_extended_string_chains_primitive_transforms() -> None: assert value.to_kebab_case() == "api-response-value" assert ExtendedString("1").ordinalize() == "1st" assert ExtendedString("yes").to_bool() is True + assert ExtendedString("42").to_int() == 42 + assert ExtendedString("3.14").to_float() == 3.14 + assert ExtendedString("/tmp/service.yaml").to_path() == Path("/tmp/service.yaml") + assert ExtendedString("2026-06-10").to_date() == datetime.date(2026, 6, 10) + assert ExtendedString("2026-06-10T12:30:00").to_datetime() == datetime.datetime( + 2026, + 6, + 10, + 12, + 30, + 0, + tzinfo=datetime.timezone.utc, + ) + assert ExtendedString("12:30").to_time() == datetime.time(12, 30) assert ExtendedString("api-gateway").is_partial_match("gateway") is True assert ExtendedString("api").is_partial_match("gateway", check_prefix_only=True) is False assert ExtendedString("API").is_non_empty_match("api") is True diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index c122656..b774ea9 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -157,12 +157,14 @@ def test_root_exports_first_class_integrated_primitives() -> None: def test_tier2_container_methods_expose_integrated_primitives() -> None: """Tier 2 containers should expose common primitive operations directly.""" matched = ExtendedString("api-gateway").is_partial_match("gateway") + parsed_int = ExtendedString("42").to_int() typed = ExtendedList(["api", 2]).split_by_type(primitive_only=True) mapped = ExtendedTuple(("service", "region")).zipmap(("api", "us-east-1")) first_entry = ExtendedDict({"empty": "", "service": "api"}).first_non_empty_entry("empty", "service") selected = ExtendedList([None, "", {"service": "api"}]).first_non_empty() assert matched is True + assert parsed_int == 42 assert isinstance(typed, ExtendedDict) assert typed["str"] == ["api"] assert isinstance(mapped, ExtendedDict) From cc4f4b2f6f7e2402291ffe2d611872e94dd26396 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:14:05 -0500 Subject: [PATCH 163/287] feat: reconstruct special types on containers --- README.md | 3 +++ docs/package-surface.md | 5 ++++- src/extended_data/containers/mappings.py | 7 +++++++ src/extended_data/containers/sequences.py | 20 +++++++++++++++++- src/extended_data/containers/strings.py | 7 +++++++ tests/core/test_containers.py | 25 +++++++++++++++++++++++ tests/core/test_integration_workflows.py | 7 ++++--- tests/core/test_package_surface.py | 3 +++ 8 files changed, 72 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index c5c7044..5ca7e14 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,9 @@ without requiring callers to drop back to function-only utility code. `ExtendedString.to_bool()`, `to_int()`, `to_float()`, `to_path()`, `to_date()`, `to_datetime()`, and `to_time()` expose the Tier 1 scalar conversion family as direct string-container methods. +`ExtendedString.reconstruct_special_type()` and the container +`reconstruct_special_types()` methods restore booleans, numbers, dates, times, +paths, and structured JSON/YAML values while staying in promoted Tier 2 data. Format encoders lower extended containers, including extended mapping keys, at the serialization boundary. `read_data_file()` is the direct file boundary for one-step read plus decode diff --git a/docs/package-surface.md b/docs/package-surface.md index c231173..5b3f3a4 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -99,7 +99,10 @@ values containing `ExtendedString` parts, while `partition()` and helpers through `ExtendedString`. Scalar conversion paths `to_bool()`, `to_int()`, `to_float()`, `to_path()`, `to_date()`, `to_datetime()`, and `to_time()` expose the Tier 1 `string_to_*()` family directly on -`ExtendedString`. +`ExtendedString`. `ExtendedString.reconstruct_special_type()` and container +`reconstruct_special_types()` methods restore booleans, numbers, paths, dates, +times, and structured JSON/YAML values while keeping reconstructed collections +inside Tier 2 containers. Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index 9c35c16..e4b1f8e 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -25,6 +25,7 @@ ) from extended_data.primitives.splitting import split_dict_by_type from extended_data.primitives.state import all_non_empty_in_dict, any_non_empty, yield_non_empty +from extended_data.primitives.types import reconstruct_special_types class ExtendedDict(UserDict[str, Any]): @@ -155,3 +156,9 @@ def non_empty_entries(self, *keys: str) -> ExtendedList[ExtendedDict]: from extended_data.containers.factory import extend_data, to_builtin return extend_data(list(yield_non_empty(to_builtin(self.data), *keys))) + + def reconstruct_special_types(self, *, fail_silently: bool = False) -> ExtendedDict: + """Return a copy with string-like special values reconstructed.""" + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(reconstruct_special_types(to_builtin(self.data), fail_silently=fail_silently)) diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index 3b35181..88b7157 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -13,7 +13,7 @@ from extended_data.primitives.splitting import split_list_by_type from extended_data.primitives.state import first_non_empty as primitive_first_non_empty from extended_data.primitives.state import is_nothing -from extended_data.primitives.types import make_hashable +from extended_data.primitives.types import make_hashable, reconstruct_special_types T = TypeVar("T") @@ -117,6 +117,12 @@ def zipmap(self, values: Iterable[str]) -> ExtendedDict: mapped_values = [str(item) for item in to_builtin(list(values))] return extend_data(primitive_zipmap(keys, mapped_values)) + def reconstruct_special_types(self, *, fail_silently: bool = False) -> ExtendedList[Any]: + """Return a copy with string-like special values reconstructed.""" + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(reconstruct_special_types(to_builtin(self.data), fail_silently=fail_silently)) + def unique(self) -> ExtendedList[T]: """Return a copy with duplicate values removed while preserving order.""" seen: set[Any] = set() @@ -241,6 +247,12 @@ def zipmap(self, values: Iterable[str]) -> ExtendedDict: mapped_values = [str(item) for item in to_builtin(list(values))] return extend_data(primitive_zipmap(keys, mapped_values)) + def reconstruct_special_types(self, *, fail_silently: bool = False) -> ExtendedTuple[Any]: + """Return a copy with string-like special values reconstructed.""" + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(reconstruct_special_types(to_builtin(tuple(self)), fail_silently=fail_silently)) + def to_tuple(self) -> tuple[T, ...]: """Return a plain tuple copy.""" return tuple(self) @@ -300,6 +312,12 @@ def compact(self) -> ExtendedSet[T]: """Return a copy without values considered empty.""" return ExtendedSet(item for item in self._data if not is_nothing(item)) + def reconstruct_special_types(self, *, fail_silently: bool = False) -> ExtendedSet[Any]: + """Return a copy with string-like special values reconstructed.""" + from extended_data.containers.factory import extend_data, to_builtin + + return extend_data(reconstruct_special_types(to_builtin(self._data), fail_silently=fail_silently)) + def union(self, *others: Iterable[T]) -> ExtendedSet[T]: """Return a union with other iterables.""" result = set(self._data) diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index 583fe3e..7d0059f 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -31,6 +31,7 @@ upper_first_char, ) from extended_data.primitives.types import ( + reconstruct_special_type, string_to_bool, string_to_date, string_to_datetime, @@ -200,3 +201,9 @@ def to_datetime(self, *, raise_on_error: bool = False) -> datetime.datetime | No def to_time(self, *, raise_on_error: bool = False) -> datetime.time | None: """Return a time parsed from the string.""" return string_to_time(self.data, raise_on_error=raise_on_error) + + def reconstruct_special_type(self, *, fail_silently: bool = False) -> object: + """Return the string reconstructed as a known scalar or structured value.""" + from extended_data.containers.factory import extend_data + + return extend_data(reconstruct_special_type(self.data, fail_silently=fail_silently)) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 1a14d4b..582eacb 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -45,6 +45,10 @@ def test_extended_string_chains_primitive_transforms() -> None: assert ExtendedString("3.14").to_float() == 3.14 assert ExtendedString("/tmp/service.yaml").to_path() == Path("/tmp/service.yaml") assert ExtendedString("2026-06-10").to_date() == datetime.date(2026, 6, 10) + assert ExtendedString("2026-06-10").reconstruct_special_type() == datetime.date(2026, 6, 10) + reconstructed_json = ExtendedString('{"service": "api"}').reconstruct_special_type() + assert isinstance(reconstructed_json, ExtendedDict) + assert reconstructed_json["service"].upper_first() == "Api" assert ExtendedString("2026-06-10T12:30:00").to_datetime() == datetime.datetime( 2026, 6, @@ -84,6 +88,9 @@ def test_extended_dict_composes_mapping_primitives() -> None: """ExtendedDict composes Tier 1 mapping primitives.""" value = ExtendedDict({"outer": {"inner": 1}, "items": [1, 1, 2], "empty": ""}) typed = ExtendedDict({"service": "api", "retries": 2, "enabled": True, "ports": [80, 443]}) + reconstructed = ExtendedDict( + {"enabled": "true", "retries": "5", "service": {"launched": "2026-06-10"}, "ports": ["80"]} + ).reconstruct_special_types() merged = value.deep_merge({"outer": {"other": 2}}) filtered = merged.filter(allowlist=["outer"]) @@ -107,6 +114,9 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert isinstance(first_entry, ExtendedDict) assert isinstance(entries, ExtendedList) assert all(isinstance(entry, ExtendedDict) for entry in entries) + assert isinstance(reconstructed, ExtendedDict) + assert isinstance(reconstructed["service"], ExtendedDict) + assert isinstance(reconstructed["ports"], ExtendedList) assert merged["outer"] == {"inner": 1, "other": 2} assert value["outer"] == {"inner": 1} assert value.flatten() == {"outer.inner": 1, "items.0": 1, "items.1": 1, "items.2": 2, "empty": ""} @@ -125,6 +135,10 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert first_entry["service"].upper_first() == "Api" assert entries == [{"service": "api"}, {"ports": [80, 443]}] assert isinstance(entries[1]["ports"], ExtendedList) + assert reconstructed["enabled"] is True + assert reconstructed["retries"] == 5 + assert reconstructed["service"]["launched"] == datetime.date(2026, 6, 10) + assert reconstructed["ports"] == [80] def test_extended_dict_promotes_nested_values_on_mutation() -> None: @@ -173,6 +187,7 @@ def test_extended_list_composes_sequence_primitives() -> None: typed = ExtendedList(["api", 2, True, ["nested"]]) first_nested = ExtendedList([None, "", {"service": "api"}]).first_non_empty() mapped = ExtendedList(["service", "region", "ignored"]).zipmap(["api", "us-east-1"]) + reconstructed = ExtendedList(["true", "5", {"launched": "2026-06-10"}]).reconstruct_special_types() assert value.flatten() == [1, 2, 3, "", 2] assert value.compact() == [1, [2, [3]], 2] @@ -182,6 +197,9 @@ def test_extended_list_composes_sequence_primitives() -> None: assert isinstance(mapped, ExtendedDict) assert mapped == {"service": "api", "region": "us-east-1"} assert mapped["service"].upper_first() == "Api" + assert isinstance(reconstructed, ExtendedList) + assert isinstance(reconstructed[2], ExtendedDict) + assert reconstructed == [True, 5, {"launched": datetime.date(2026, 6, 10)}] assert value.filter(lambda item: isinstance(item, int)) == [1, 2] assert ExtendedList([1, 2]).map(lambda item: item * 2) == [2, 4] assert ExtendedList(["api", "worker", "db"]).filter_values( @@ -229,10 +247,13 @@ def test_extended_list_promotes_nested_values_on_mutation() -> None: def test_extended_set_composes_set_operations() -> None: """ExtendedSet provides chainable set operations.""" value = ExtendedSet({1, 2, 3, None}) + reconstructed = ExtendedSet({"true", "2026-06-10"}).reconstruct_special_types() compact_repr = repr(value.compact()) assert compact_repr.startswith("ExtendedSet(") assert "object at" not in compact_repr + assert isinstance(reconstructed, ExtendedSet) + assert reconstructed.to_set() == {True, datetime.date(2026, 6, 10)} assert value.compact().to_set() == {1, 2, 3} assert value.union({4}).to_set() == {1, 2, 3, 4, None} assert value.intersection({2, 3, 5}).to_set() == {2, 3} @@ -273,6 +294,7 @@ def test_extended_tuple_preserves_immutable_sequence_shape() -> None: typed = ExtendedTuple(("api", 2, True, ["nested"])) first_nested = ExtendedTuple((None, "", {"service": "api"})).first_non_empty() mapped = ExtendedTuple(("service", "region", "ignored")).zipmap(("api", "us-east-1")) + reconstructed = ExtendedTuple(("true", "5", {"launched": "2026-06-10"})).reconstruct_special_types() split = typed.split_by_type(primitive_only=True) assert value.flatten() == (1, 2, 3, "", 2) @@ -283,6 +305,9 @@ def test_extended_tuple_preserves_immutable_sequence_shape() -> None: assert isinstance(mapped, ExtendedDict) assert mapped == {"service": "api", "region": "us-east-1"} assert mapped["service"].upper_first() == "Api" + assert isinstance(reconstructed, ExtendedTuple) + assert isinstance(reconstructed[2], ExtendedDict) + assert reconstructed == (True, 5, {"launched": datetime.date(2026, 6, 10)}) assert value.filter(lambda item: isinstance(item, int)) == (1, 2) assert value.map(lambda item: item * 2 if isinstance(item, int) else item) == (2, (2, [3]), "", 4) assert isinstance(split, ExtendedDict) diff --git a/tests/core/test_integration_workflows.py b/tests/core/test_integration_workflows.py index 6f3b78a..27645e8 100644 --- a/tests/core/test_integration_workflows.py +++ b/tests/core/test_integration_workflows.py @@ -33,12 +33,13 @@ def test_integration_workflow_serialization_transformation_export(): # 3. Transform: Convert types and transform strings transformed = { - "name": edt.to_pascal_case(loaded_data["project_name"]), - "config": edt.reconstruct_special_types(loaded_data["settings"]), - "item_list": [edt.humanize(i) for i in loaded_data["items"]], + "name": loaded_data["project_name"].to_pascal_case(), + "config": loaded_data["settings"].reconstruct_special_types(), + "item_list": [item.humanize() for item in loaded_data["items"]], } assert transformed["name"] == "MyGreatProject" + assert isinstance(transformed["config"], edt.ExtendedDict) assert transformed["config"]["enable_feature"] is True assert transformed["config"]["max_retries"] == 5 assert transformed["item_list"] == ["Item one", "Item two"] diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index b774ea9..04d88fc 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -162,6 +162,7 @@ def test_tier2_container_methods_expose_integrated_primitives() -> None: mapped = ExtendedTuple(("service", "region")).zipmap(("api", "us-east-1")) first_entry = ExtendedDict({"empty": "", "service": "api"}).first_non_empty_entry("empty", "service") selected = ExtendedList([None, "", {"service": "api"}]).first_non_empty() + reconstructed = ExtendedDict({"enabled": "true", "retries": "5"}).reconstruct_special_types() assert matched is True assert parsed_int == 42 @@ -173,6 +174,8 @@ def test_tier2_container_methods_expose_integrated_primitives() -> None: assert first_entry["service"].upper_first() == "Api" assert isinstance(selected, ExtendedDict) assert selected["service"].upper_first() == "Api" + assert isinstance(reconstructed, ExtendedDict) + assert reconstructed == {"enabled": True, "retries": 5} def test_connectors_root_exports_builtin_connector_classes() -> None: From 0e0b32bacbab5eba54ba4fb6e0753f6cd6b96727 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:16:50 -0500 Subject: [PATCH 164/287] docs: dogfood container filtering in workflows --- examples/core/composed_workflows.py | 4 ++-- tests/core/test_workflows.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index 4db2576..efabe5f 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -13,11 +13,11 @@ from extended_data import ( DataWorkflow, ExtendedDict, + ExtendedList, base64_decode, base64_encode, decode_hcl2, encode_hcl2, - filter_list, read_data_file, read_file, write_file, @@ -92,7 +92,7 @@ def demonstrate_api_payload_workflow() -> None: payload = ExtendedDict( { "HTTPResponseCode": 200, - "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), + "SelectedServices": ExtendedList(["api", "worker", "db"]).filter_values(denylist=["db"]), "Tags": ["api", "api", "docs"], } ) diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 07d85c0..144d7cd 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -17,7 +17,6 @@ base64_encode, decode_hcl2, encode_hcl2, - filter_list, read_data_file, write_file, ) @@ -206,7 +205,7 @@ def test_api_payload_normalization_workflow_round_trip(tmp_path: Path) -> None: payload = ExtendedDict( { "HTTPResponseCode": 200, - "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), + "SelectedServices": ExtendedList(["api", "worker", "db"]).filter_values(denylist=["db"]), "Tags": ["api", "api", "docs"], } ) @@ -228,7 +227,7 @@ def test_api_payload_factory_workflow_round_trip(tmp_path: Path) -> None: """Promote decoded API payloads into containers before normalization.""" raw_payload = { "HTTPResponseCode": 200, - "SelectedServices": filter_list(["api", "worker", "db"], denylist=["db"]), + "SelectedServices": ExtendedList(["api", "worker", "db"]).filter_values(denylist=["db"]), "Tags": ["api", "api", "docs"], } From 4d537e1617717f2c6b710026274e230cb1f7ba08 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:22:11 -0500 Subject: [PATCH 165/287] feat: expose export boundaries on containers --- README.md | 3 ++ docs/package-surface.md | 4 ++- src/extended_data/containers/mappings.py | 12 ++++++++ src/extended_data/containers/sequences.py | 36 +++++++++++++++++++++++ src/extended_data/containers/strings.py | 14 ++++++++- tests/core/test_containers.py | 15 ++++++++++ tests/core/test_integration_workflows.py | 2 +- tests/core/test_package_surface.py | 2 ++ 8 files changed, 85 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5ca7e14..c47a2ab 100644 --- a/README.md +++ b/README.md @@ -195,6 +195,9 @@ conversion family as direct string-container methods. `ExtendedString.reconstruct_special_type()` and the container `reconstruct_special_types()` methods restore booleans, numbers, dates, times, paths, and structured JSON/YAML values while staying in promoted Tier 2 data. +Container `to_export_safe()` and `wrap_for_export()` methods expose the Tier 3 +export boundary directly from promoted values for JSON, YAML, TOML, HCL, and +raw string output. Format encoders lower extended containers, including extended mapping keys, at the serialization boundary. `read_data_file()` is the direct file boundary for one-step read plus decode diff --git a/docs/package-surface.md b/docs/package-surface.md index 5b3f3a4..9b87e05 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -102,7 +102,9 @@ helpers through `ExtendedString`. Scalar conversion paths `to_bool()`, `ExtendedString`. `ExtendedString.reconstruct_special_type()` and container `reconstruct_special_types()` methods restore booleans, numbers, paths, dates, times, and structured JSON/YAML values while keeping reconstructed collections -inside Tier 2 containers. +inside Tier 2 containers. Container `to_export_safe()` and `wrap_for_export()` +methods expose the Tier 3 export boundary directly from promoted values for +JSON, YAML, TOML, HCL, and raw string output. Container methods that return derived collections stay in Tier 2 as well: `ExtendedDict.filter()` returns an `ExtendedTuple` of accepted and rejected diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index e4b1f8e..bbf2fad 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -162,3 +162,15 @@ def reconstruct_special_types(self, *, fail_silently: bool = False) -> ExtendedD from extended_data.containers.factory import extend_data, to_builtin return extend_data(reconstruct_special_types(to_builtin(self.data), fail_silently=fail_silently)) + + def to_export_safe(self, *, export_to_yaml: bool = False) -> Any: + """Return this mapping converted to export-safe primitive data.""" + from extended_data.io.exporters import make_raw_data_export_safe + + return make_raw_data_export_safe(self.data, export_to_yaml=export_to_yaml) + + def wrap_for_export(self, allow_encoding: bool | str = True, **format_opts: Any) -> str: + """Return this mapping wrapped as an encoded export string.""" + from extended_data.io.exporters import wrap_raw_data_for_export + + return wrap_raw_data_for_export(self.data, allow_encoding=allow_encoding, **format_opts) diff --git a/src/extended_data/containers/sequences.py b/src/extended_data/containers/sequences.py index 88b7157..54cadfd 100644 --- a/src/extended_data/containers/sequences.py +++ b/src/extended_data/containers/sequences.py @@ -123,6 +123,18 @@ def reconstruct_special_types(self, *, fail_silently: bool = False) -> ExtendedL return extend_data(reconstruct_special_types(to_builtin(self.data), fail_silently=fail_silently)) + def to_export_safe(self, *, export_to_yaml: bool = False) -> Any: + """Return this list converted to export-safe primitive data.""" + from extended_data.io.exporters import make_raw_data_export_safe + + return make_raw_data_export_safe(self.data, export_to_yaml=export_to_yaml) + + def wrap_for_export(self, allow_encoding: bool | str = True, **format_opts: Any) -> str: + """Return this list wrapped as an encoded export string.""" + from extended_data.io.exporters import wrap_raw_data_for_export + + return wrap_raw_data_for_export(self.data, allow_encoding=allow_encoding, **format_opts) + def unique(self) -> ExtendedList[T]: """Return a copy with duplicate values removed while preserving order.""" seen: set[Any] = set() @@ -253,6 +265,18 @@ def reconstruct_special_types(self, *, fail_silently: bool = False) -> ExtendedT return extend_data(reconstruct_special_types(to_builtin(tuple(self)), fail_silently=fail_silently)) + def to_export_safe(self, *, export_to_yaml: bool = False) -> Any: + """Return this tuple converted to export-safe primitive data.""" + from extended_data.io.exporters import make_raw_data_export_safe + + return make_raw_data_export_safe(tuple(self), export_to_yaml=export_to_yaml) + + def wrap_for_export(self, allow_encoding: bool | str = True, **format_opts: Any) -> str: + """Return this tuple wrapped as an encoded export string.""" + from extended_data.io.exporters import wrap_raw_data_for_export + + return wrap_raw_data_for_export(tuple(self), allow_encoding=allow_encoding, **format_opts) + def to_tuple(self) -> tuple[T, ...]: """Return a plain tuple copy.""" return tuple(self) @@ -318,6 +342,18 @@ def reconstruct_special_types(self, *, fail_silently: bool = False) -> ExtendedS return extend_data(reconstruct_special_types(to_builtin(self._data), fail_silently=fail_silently)) + def to_export_safe(self, *, export_to_yaml: bool = False) -> Any: + """Return this set converted to export-safe primitive data.""" + from extended_data.io.exporters import make_raw_data_export_safe + + return make_raw_data_export_safe(self._data, export_to_yaml=export_to_yaml) + + def wrap_for_export(self, allow_encoding: bool | str = True, **format_opts: Any) -> str: + """Return this set wrapped as an encoded export string.""" + from extended_data.io.exporters import wrap_raw_data_for_export + + return wrap_raw_data_for_export(self._data, allow_encoding=allow_encoding, **format_opts) + def union(self, *others: Iterable[T]) -> ExtendedSet[T]: """Return a union with other iterables.""" result = set(self._data) diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index 7d0059f..8071d12 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -7,7 +7,7 @@ from collections import UserString from collections.abc import Iterable, Mapping from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import extended_data.primitives.matching as primitive_matching @@ -207,3 +207,15 @@ def reconstruct_special_type(self, *, fail_silently: bool = False) -> object: from extended_data.containers.factory import extend_data return extend_data(reconstruct_special_type(self.data, fail_silently=fail_silently)) + + def to_export_safe(self, *, export_to_yaml: bool = False) -> Any: + """Return this value converted to export-safe primitive data.""" + from extended_data.io.exporters import make_raw_data_export_safe + + return make_raw_data_export_safe(self.data, export_to_yaml=export_to_yaml) + + def wrap_for_export(self, allow_encoding: bool | str = True, **format_opts: Any) -> str: + """Return this value wrapped as an encoded export string.""" + from extended_data.io.exporters import wrap_raw_data_for_export + + return wrap_raw_data_for_export(self.data, allow_encoding=allow_encoding, **format_opts) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 582eacb..8d266b1 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -3,6 +3,7 @@ from __future__ import annotations import datetime +import json from pathlib import Path from typing import Any @@ -46,6 +47,8 @@ def test_extended_string_chains_primitive_transforms() -> None: assert ExtendedString("/tmp/service.yaml").to_path() == Path("/tmp/service.yaml") assert ExtendedString("2026-06-10").to_date() == datetime.date(2026, 6, 10) assert ExtendedString("2026-06-10").reconstruct_special_type() == datetime.date(2026, 6, 10) + assert ExtendedString("echo one\necho two").to_export_safe(export_to_yaml=True) == "echo one\necho two" + assert json.loads(ExtendedString("api").wrap_for_export(allow_encoding="json")) == "api" reconstructed_json = ExtendedString('{"service": "api"}').reconstruct_special_type() assert isinstance(reconstructed_json, ExtendedDict) assert reconstructed_json["service"].upper_first() == "Api" @@ -91,6 +94,10 @@ def test_extended_dict_composes_mapping_primitives() -> None: reconstructed = ExtendedDict( {"enabled": "true", "retries": "5", "service": {"launched": "2026-06-10"}, "ports": ["80"]} ).reconstruct_special_types() + export_safe = ExtendedDict( + {"launched": datetime.date(2026, 6, 10), "path": Path("/tmp/service.yaml")} + ).to_export_safe() + wrapped_json = ExtendedDict({"service": "api", "retries": 2}).wrap_for_export(allow_encoding="json") merged = value.deep_merge({"outer": {"other": 2}}) filtered = merged.filter(allowlist=["outer"]) @@ -117,6 +124,8 @@ def test_extended_dict_composes_mapping_primitives() -> None: assert isinstance(reconstructed, ExtendedDict) assert isinstance(reconstructed["service"], ExtendedDict) assert isinstance(reconstructed["ports"], ExtendedList) + assert export_safe == {"launched": "2026-06-10", "path": "/tmp/service.yaml"} + assert json.loads(wrapped_json) == {"service": "api", "retries": 2} assert merged["outer"] == {"inner": 1, "other": 2} assert value["outer"] == {"inner": 1} assert value.flatten() == {"outer.inner": 1, "items.0": 1, "items.1": 1, "items.2": 2, "empty": ""} @@ -188,6 +197,7 @@ def test_extended_list_composes_sequence_primitives() -> None: first_nested = ExtendedList([None, "", {"service": "api"}]).first_non_empty() mapped = ExtendedList(["service", "region", "ignored"]).zipmap(["api", "us-east-1"]) reconstructed = ExtendedList(["true", "5", {"launched": "2026-06-10"}]).reconstruct_special_types() + export_safe = ExtendedList([datetime.date(2026, 6, 10), Path("/tmp/service.yaml")]).to_export_safe() assert value.flatten() == [1, 2, 3, "", 2] assert value.compact() == [1, [2, [3]], 2] @@ -200,6 +210,7 @@ def test_extended_list_composes_sequence_primitives() -> None: assert isinstance(reconstructed, ExtendedList) assert isinstance(reconstructed[2], ExtendedDict) assert reconstructed == [True, 5, {"launched": datetime.date(2026, 6, 10)}] + assert export_safe == ["2026-06-10", "/tmp/service.yaml"] assert value.filter(lambda item: isinstance(item, int)) == [1, 2] assert ExtendedList([1, 2]).map(lambda item: item * 2) == [2, 4] assert ExtendedList(["api", "worker", "db"]).filter_values( @@ -248,12 +259,14 @@ def test_extended_set_composes_set_operations() -> None: """ExtendedSet provides chainable set operations.""" value = ExtendedSet({1, 2, 3, None}) reconstructed = ExtendedSet({"true", "2026-06-10"}).reconstruct_special_types() + export_safe = ExtendedSet({datetime.date(2026, 6, 10)}).to_export_safe() compact_repr = repr(value.compact()) assert compact_repr.startswith("ExtendedSet(") assert "object at" not in compact_repr assert isinstance(reconstructed, ExtendedSet) assert reconstructed.to_set() == {True, datetime.date(2026, 6, 10)} + assert export_safe == ["2026-06-10"] assert value.compact().to_set() == {1, 2, 3} assert value.union({4}).to_set() == {1, 2, 3, 4, None} assert value.intersection({2, 3, 5}).to_set() == {2, 3} @@ -295,6 +308,7 @@ def test_extended_tuple_preserves_immutable_sequence_shape() -> None: first_nested = ExtendedTuple((None, "", {"service": "api"})).first_non_empty() mapped = ExtendedTuple(("service", "region", "ignored")).zipmap(("api", "us-east-1")) reconstructed = ExtendedTuple(("true", "5", {"launched": "2026-06-10"})).reconstruct_special_types() + export_safe = ExtendedTuple((datetime.date(2026, 6, 10), Path("/tmp/service.yaml"))).to_export_safe() split = typed.split_by_type(primitive_only=True) assert value.flatten() == (1, 2, 3, "", 2) @@ -308,6 +322,7 @@ def test_extended_tuple_preserves_immutable_sequence_shape() -> None: assert isinstance(reconstructed, ExtendedTuple) assert isinstance(reconstructed[2], ExtendedDict) assert reconstructed == (True, 5, {"launched": datetime.date(2026, 6, 10)}) + assert export_safe == ["2026-06-10", "/tmp/service.yaml"] assert value.filter(lambda item: isinstance(item, int)) == (1, 2) assert value.map(lambda item: item * 2 if isinstance(item, int) else item) == (2, (2, [3]), "", 4) assert isinstance(split, ExtendedDict) diff --git a/tests/core/test_integration_workflows.py b/tests/core/test_integration_workflows.py index 27645e8..90da1c4 100644 --- a/tests/core/test_integration_workflows.py +++ b/tests/core/test_integration_workflows.py @@ -45,7 +45,7 @@ def test_integration_workflow_serialization_transformation_export(): assert transformed["item_list"] == ["Item one", "Item two"] # 4. Export: Make safe for export (e.g. GitHub Actions) - export_safe = edt.make_raw_data_export_safe(transformed) + export_safe = edt.ExtendedDict(transformed).to_export_safe() assert isinstance(export_safe, dict) # Verify it's still equivalent assert export_safe["name"] == "MyGreatProject" diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 04d88fc..1be76ea 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -163,6 +163,7 @@ def test_tier2_container_methods_expose_integrated_primitives() -> None: first_entry = ExtendedDict({"empty": "", "service": "api"}).first_non_empty_entry("empty", "service") selected = ExtendedList([None, "", {"service": "api"}]).first_non_empty() reconstructed = ExtendedDict({"enabled": "true", "retries": "5"}).reconstruct_special_types() + export_safe = ExtendedDict({"launched": "2026-06-10"}).reconstruct_special_types().to_export_safe() assert matched is True assert parsed_int == 42 @@ -176,6 +177,7 @@ def test_tier2_container_methods_expose_integrated_primitives() -> None: assert selected["service"].upper_first() == "Api" assert isinstance(reconstructed, ExtendedDict) assert reconstructed == {"enabled": True, "retries": 5} + assert export_safe == {"launched": "2026-06-10"} def test_connectors_root_exports_builtin_connector_classes() -> None: From 5ce7a786714f07966567b55c2299ae147e8f465a Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:29:19 -0500 Subject: [PATCH 166/287] fix: promote decoded input fallbacks --- README.md | 4 ++- docs/package-surface.md | 24 +++++++------ examples/inputs/README.md | 3 +- examples/inputs/encoding_decoding.py | 24 ++++++++----- src/extended_data/inputs/__main__.py | 23 ++++++------ tests/inputs/test_main.py | 53 ++++++++++++++++++++++++++++ 6 files changed, 100 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index c47a2ab..2c7619c 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,9 @@ methods. `snapshot_inputs()` returns detached active or frozen snapshots, and `replace_inputs()` installs a new active snapshot while clearing stale frozen state by default. `get_input()` remains the scalar coercion boundary for booleans, numbers, paths, datetimes, and credential strings; pass -`as_extended=True` when an injected raw input value should stay in Tier 2 form. +`as_extended=True` when an injected raw or fallback input value should stay in +Tier 2 form and keep using container methods such as `reconstruct_special_types()` +and `to_export_safe()`. `Logging` stores marked log message collections as `ExtendedDict` and `ExtendedSet` values while keeping Python logger and handler objects plain. diff --git a/docs/package-surface.md b/docs/package-surface.md index 9b87e05..21c2172 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -172,22 +172,26 @@ writes raise `ValueError` unless `allow_empty=True` is passed. `InputProvider` loads input data from explicit mappings, environment variables, and stdin, then decodes or coerces values through the primitive layer. Its `decode_input(..., as_extended=True)` path gives input-driven workflows the same -container bridge as file and Base64 decoding. Requested input coercions are -strict, and diagnostics identify the input key and failed operation without -echoing raw values from environment variables, stdin, JSON, YAML, or Base64 -payloads. Active, frozen, shifted, and merged input snapshots are `ExtendedDict` -values, and input decorator metadata/options are promoted the same way. The old -case-insensitive input mapping is intentionally not preserved; exact keys keep -configuration wiring explicit while still letting direct snapshots use Tier 2 -methods. Use `snapshot_inputs()` for a detached promoted copy of active or -frozen state, and `replace_inputs()` when a workflow should install a new -active snapshot instead of mutating `.inputs` directly. +container bridge as file and Base64 decoding; fallback values use that same +promotion rule, so defaults do not silently drop back to plain dictionaries. +Requested input coercions are strict, and diagnostics identify the input key and +failed operation without echoing raw values from environment variables, stdin, +JSON, YAML, or Base64 payloads. Active, frozen, shifted, and merged input +snapshots are `ExtendedDict` values, and input decorator metadata/options are +promoted the same way. The old case-insensitive input mapping is intentionally +not preserved; exact keys keep configuration wiring explicit while still +letting direct snapshots use Tier 2 methods. Use `snapshot_inputs()` for a +detached promoted copy of active or frozen state, and `replace_inputs()` when a +workflow should install a new active snapshot instead of mutating `.inputs` +directly. ```python inputs = InputProvider(inputs={"service": {"name": "api"}}, from_environment=False) assert inputs.inputs["service"]["name"].upper_first() == "Api" assert isinstance(inputs.merge_inputs({"service": {"region": "us-east-1"}}), ExtendedDict) assert inputs.snapshot_inputs()["service"]["region"].upper_first() == "Us-east-1" +fallback = inputs.decode_input("missing", default={"enabled": "true"}, as_extended=True) +assert fallback.reconstruct_special_types()["enabled"] is True ``` `get_input()` is the scalar coercion boundary for booleans, numbers, paths, diff --git a/examples/inputs/README.md b/examples/inputs/README.md index 5636b70..7089fcb 100644 --- a/examples/inputs/README.md +++ b/examples/inputs/README.md @@ -48,4 +48,5 @@ Demonstrates input decoding capabilities: - YAML decoding - Base64 decoding - Combined Base64 + JSON/YAML decoding -- Default values for missing inputs +- Tier 2 reconstruction/export methods on decoded inputs +- Promoted default values for missing inputs diff --git a/examples/inputs/encoding_decoding.py b/examples/inputs/encoding_decoding.py index c37e39a..f590dc2 100644 --- a/examples/inputs/encoding_decoding.py +++ b/examples/inputs/encoding_decoding.py @@ -21,7 +21,7 @@ def main() -> None: """Demonstrate encoding/decoding features.""" # Prepare encoded test data - json_data = '{"database": "postgres", "port": 5432}' + json_data = '{"database": "postgres", "port": "5432", "enabled": "true"}' yaml_data = "server:\n host: localhost\n port: 8080" base64_json = base64.b64encode(json_data.encode()).decode() base64_yaml = base64.b64encode(yaml_data.encode()).decode() @@ -38,34 +38,42 @@ def main() -> None: ) # JSON decoding - inputs.decode_input("json_config", decode_from_json=True, as_extended=True) + json_config = inputs.decode_input("json_config", decode_from_json=True, as_extended=True) + json_config.reconstruct_special_types().to_export_safe() # YAML decoding - inputs.decode_input("yaml_config", decode_from_yaml=True, as_extended=True) + yaml_config = inputs.decode_input("yaml_config", decode_from_yaml=True, as_extended=True) + yaml_config["server"]["host"].upper_first() # Base64 + JSON decoding - inputs.decode_input( + base64_decoded_json = inputs.decode_input( "base64_json_config", decode_from_base64=True, decode_from_json=True, + as_extended=True, ) + base64_decoded_json.wrap_for_export(allow_encoding="json") # Base64 + YAML decoding - inputs.decode_input( + base64_decoded_yaml = inputs.decode_input( "base64_yaml_config", decode_from_base64=True, decode_from_yaml=True, + as_extended=True, ) + base64_decoded_yaml.to_export_safe() # Plain text (no decoding) - inputs.get_input("plain_text") + inputs.get_input("plain_text", as_extended=True).upper_first() # Missing input with default - inputs.decode_input( + fallback = inputs.decode_input( "nonexistent", - default={"fallback": True}, + default={"fallback": "true"}, decode_from_json=True, + as_extended=True, ) + fallback.reconstruct_special_types() if __name__ == "__main__": diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 2e062d9..4771514 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -159,6 +159,10 @@ def _format_available_keys(inputs: Mapping[str, Any]) -> str: keys = sorted(str(key) for key in inputs) return ", ".join(keys[:20]) + (f", ... ({len(keys)} total)" if len(keys) > 20 else "") + @staticmethod + def _return_value(value: Any, *, as_extended: bool) -> Any: + return extend_data(value) if as_extended else value + def get_input( self, k: str, @@ -274,20 +278,20 @@ def decode_input( if not source_present: if required: self.get_input(k, default=default, required=True) - return default + return self._return_value(default, as_extended=as_extended) conf = to_builtin(raw_input) if conf is None: - return default if not allow_none else None + return self._return_value(default, as_extended=as_extended) if not allow_none else None if is_nothing(conf): if required: self.get_input(k, default=default, required=True) - return default + return self._return_value(default, as_extended=as_extended) conf = self._coerce_text(conf) if not isinstance(conf, str): - return extend_data(conf) if as_extended else conf + return self._return_value(conf, as_extended=as_extended) if decode_from_base64: try: @@ -303,8 +307,8 @@ def decode_input( if not isinstance(conf, str): if conf is None and not allow_none: - return default - return extend_data(conf) if as_extended else conf + return self._return_value(default, as_extended=as_extended) + return self._return_value(conf, as_extended=as_extended) if decode_from_yaml: try: @@ -320,12 +324,9 @@ def decode_input( raise RuntimeError(message) from exc if conf is None and not allow_none: - return default - - if as_extended: - return extend_data(conf) + return self._return_value(default, as_extended=as_extended) - return conf + return self._return_value(conf, as_extended=as_extended) def freeze_inputs(self) -> ExtendedDict: """Freezes the current inputs, preventing further modifications until thawed. diff --git a/tests/inputs/test_main.py b/tests/inputs/test_main.py index 4f2de93..4b94c67 100644 --- a/tests/inputs/test_main.py +++ b/tests/inputs/test_main.py @@ -258,6 +258,26 @@ def test_decode_input_json_can_return_extended_containers(): assert decoded["name"].upper_first() == "Test" +def test_decode_input_extended_containers_can_use_tier2_export_methods(): + """Decoded input payloads can stay inside the integrated container surface.""" + dic = InputProvider(inputs={"json_key": '{"enabled": "true", "retries": "5", "service": {"name": "api"}}'}) + + decoded = dic.decode_input("json_key", decode_from_json=True, as_extended=True) + reconstructed = decoded.reconstruct_special_types() + + assert isinstance(decoded, ExtendedDict) + assert decoded.to_export_safe() == {"enabled": "true", "retries": "5", "service": {"name": "api"}} + assert json.loads(decoded.wrap_for_export(allow_encoding="json")) == { + "enabled": "true", + "retries": "5", + "service": {"name": "api"}, + } + assert isinstance(reconstructed, ExtendedDict) + assert reconstructed["enabled"] is True + assert reconstructed["retries"] == 5 + assert reconstructed["service"]["name"].upper_first() == "Api" + + def test_decode_input_decodes_present_value_that_equals_default(): """Defaults should not mask present input values that happen to be equal.""" raw_config = '{"name": "test"}' @@ -272,6 +292,23 @@ def test_decode_input_decodes_present_value_that_equals_default(): assert missing.decode_input("json_key", default=raw_config, decode_from_json=True) == raw_config +def test_decode_input_missing_default_can_return_extended_containers(): + """Missing decoded inputs promote fallback data when Tier 2 output is requested.""" + dic = InputProvider(from_environment=False) + + decoded = dic.decode_input( + "missing_key", + default={"enabled": "true", "service": {"name": "fallback"}}, + decode_from_json=True, + as_extended=True, + ) + + assert isinstance(decoded, ExtendedDict) + assert isinstance(decoded["service"], ExtendedDict) + assert decoded["service"]["name"].upper_first() == "Fallback" + assert decoded.reconstruct_special_types()["enabled"] is True + + def test_decode_input_honors_explicit_none_values(): """Present None inputs should obey allow_none instead of looking missing.""" dic = InputProvider(inputs={"json_key": None}, from_environment=False) @@ -282,6 +319,22 @@ def test_decode_input_honors_explicit_none_values(): assert missing.decode_input("json_key", default="fallback", decode_from_json=True, allow_none=True) == "fallback" +def test_decode_input_none_fallback_can_return_extended_containers(): + """Explicit None fallbacks use the same promotion rule when None is disallowed.""" + dic = InputProvider(inputs={"json_key": None}, from_environment=False) + + decoded = dic.decode_input( + "json_key", + default={"enabled": "false"}, + decode_from_json=True, + allow_none=False, + as_extended=True, + ) + + assert isinstance(decoded, ExtendedDict) + assert decoded.reconstruct_special_types()["enabled"] is False + + def test_decode_input_required_empty_value_raises(): """Required decode inputs still reject empty provided values.""" dic = InputProvider(inputs={"json_key": ""}, from_environment=False) From 67313b6a023f0f5a0ed7b224c37900df21ef58d6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:34:03 -0500 Subject: [PATCH 167/287] feat: decode structured data from extended strings --- README.md | 4 ++ docs/package-surface.md | 5 ++- src/extended_data/containers/strings.py | 55 +++++++++++++++++++++++++ tests/core/test_containers.py | 21 ++++++++++ tests/core/test_package_surface.py | 3 ++ 5 files changed, 87 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2c7619c..6d609a8 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,10 @@ without requiring callers to drop back to function-only utility code. `ExtendedString.to_bool()`, `to_int()`, `to_float()`, `to_path()`, `to_date()`, `to_datetime()`, and `to_time()` expose the Tier 1 scalar conversion family as direct string-container methods. +`ExtendedString.decode_json()`, `decode_yaml()`, `decode_toml()`, +`decode_hcl2()`, and `decode_base64()` expose structured text decoding from +the string container and promote decoded maps/lists into Tier 2 data by +default. `ExtendedString.reconstruct_special_type()` and the container `reconstruct_special_types()` methods restore booleans, numbers, dates, times, paths, and structured JSON/YAML values while staying in promoted Tier 2 data. diff --git a/docs/package-surface.md b/docs/package-surface.md index 21c2172..393fa19 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -99,7 +99,10 @@ values containing `ExtendedString` parts, while `partition()` and helpers through `ExtendedString`. Scalar conversion paths `to_bool()`, `to_int()`, `to_float()`, `to_path()`, `to_date()`, `to_datetime()`, and `to_time()` expose the Tier 1 `string_to_*()` family directly on -`ExtendedString`. `ExtendedString.reconstruct_special_type()` and container +`ExtendedString`. Structured text paths `decode_json()`, `decode_yaml()`, +`decode_toml()`, `decode_hcl2()`, and `decode_base64()` decode from the string +container and promote decoded maps/lists into Tier 2 data by default. +`ExtendedString.reconstruct_special_type()` and container `reconstruct_special_types()` methods restore booleans, numbers, paths, dates, times, and structured JSON/YAML values while keeping reconstructed collections inside Tier 2 containers. Container `to_export_safe()` and `wrap_for_export()` diff --git a/src/extended_data/containers/strings.py b/src/extended_data/containers/strings.py index 8071d12..2706a8f 100644 --- a/src/extended_data/containers/strings.py +++ b/src/extended_data/containers/strings.py @@ -208,6 +208,61 @@ def reconstruct_special_type(self, *, fail_silently: bool = False) -> object: return extend_data(reconstruct_special_type(self.data, fail_silently=fail_silently)) + def decode_json(self, *, as_extended: bool = True) -> Any: + """Decode this JSON string, promoting structured values by default.""" + from extended_data.containers.factory import extend_data + from extended_data.primitives.formats.json import decode_json + + decoded = decode_json(self.data) + return extend_data(decoded) if as_extended else decoded + + def decode_yaml(self, *, as_extended: bool = True) -> Any: + """Decode this YAML string, promoting structured values by default.""" + from extended_data.containers.factory import extend_data + from extended_data.primitives.formats.yaml import decode_yaml + + decoded = decode_yaml(self.data) + return extend_data(decoded) if as_extended else decoded + + def decode_toml(self, *, as_extended: bool = True) -> Any: + """Decode this TOML string, promoting structured values by default.""" + from extended_data.containers.factory import extend_data + from extended_data.primitives.formats.toml import decode_toml + + decoded = decode_toml(self.data) + return extend_data(decoded) if as_extended else decoded + + def decode_hcl2(self, *, as_extended: bool = True) -> Any: + """Decode this HCL2 string, promoting structured values by default.""" + from extended_data.containers.factory import extend_data + from extended_data.primitives.formats.hcl import decode_hcl2 + + decoded = decode_hcl2(self.data) + return extend_data(decoded) if as_extended else decoded + + def encode_base64(self, *, wrap_raw_data: bool = True) -> ExtendedString: + """Return this string encoded as Base64.""" + from extended_data.io.base64 import base64_encode + + return ExtendedString(base64_encode(self.data, wrap_raw_data=wrap_raw_data)) + + def decode_base64( + self, + unwrap_raw_data: bool = True, + encoding: str = "yaml", + *, + as_extended: bool = True, + ) -> Any: + """Decode this Base64 string, promoting structured values by default.""" + from extended_data.io.base64 import base64_decode + + return base64_decode( + self.data, + unwrap_raw_data=unwrap_raw_data, + encoding=encoding, + as_extended=as_extended, + ) + def to_export_safe(self, *, export_to_yaml: bool = False) -> Any: """Return this value converted to export-safe primitive data.""" from extended_data.io.exporters import make_raw_data_export_safe diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index 8d266b1..ec3f596 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -34,6 +34,13 @@ def test_extended_string_chains_primitive_transforms() -> None: formatted_map = ExtendedString("{service}.{component}").format_map( {"service": ExtendedString("api"), "component": "worker"} ) + decoded_json = ExtendedString('{"service": {"name": "api"}}').decode_json() + decoded_yaml = ExtendedString("service:\n name: api\n").decode_yaml() + decoded_toml = ExtendedString('service = { name = "api" }\n').decode_toml() + decoded_hcl = ExtendedString('locals { service = "api" }\n').decode_hcl2() + encoded_base64 = ExtendedString('{"service": {"name": "api"}}').encode_base64(wrap_raw_data=False) + decoded_base64 = encoded_base64.decode_base64(encoding="json") + plain_decoded_json = ExtendedString('{"service": "api"}').decode_json(as_extended=False) assert value.to_snake_case().remove_suffix("_value") == "api_response" assert value.to_snake_case().remove_prefix("api_") == "response_value" @@ -85,6 +92,20 @@ def test_extended_string_chains_primitive_transforms() -> None: assert formatted == "api.worker" assert isinstance(formatted_map, ExtendedString) assert formatted_map == "api.worker" + assert isinstance(decoded_json, ExtendedDict) + assert decoded_json["service"]["name"].upper_first() == "Api" + assert isinstance(decoded_yaml, ExtendedDict) + assert decoded_yaml["service"]["name"].upper_first() == "Api" + assert isinstance(decoded_toml, ExtendedDict) + assert decoded_toml["service"]["name"].upper_first() == "Api" + assert isinstance(decoded_hcl, ExtendedDict) + assert isinstance(decoded_hcl["locals"], ExtendedList) + assert decoded_hcl["locals"][0]["service"].upper_first() == "Api" + assert isinstance(encoded_base64, ExtendedString) + assert isinstance(decoded_base64, ExtendedDict) + assert decoded_base64["service"]["name"].upper_first() == "Api" + assert isinstance(plain_decoded_json, dict) + assert plain_decoded_json == {"service": "api"} def test_extended_dict_composes_mapping_primitives() -> None: diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 1be76ea..7ffc391 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -158,6 +158,7 @@ def test_tier2_container_methods_expose_integrated_primitives() -> None: """Tier 2 containers should expose common primitive operations directly.""" matched = ExtendedString("api-gateway").is_partial_match("gateway") parsed_int = ExtendedString("42").to_int() + decoded_string = ExtendedString('{"service": "api"}').decode_json() typed = ExtendedList(["api", 2]).split_by_type(primitive_only=True) mapped = ExtendedTuple(("service", "region")).zipmap(("api", "us-east-1")) first_entry = ExtendedDict({"empty": "", "service": "api"}).first_non_empty_entry("empty", "service") @@ -167,6 +168,8 @@ def test_tier2_container_methods_expose_integrated_primitives() -> None: assert matched is True assert parsed_int == 42 + assert isinstance(decoded_string, ExtendedDict) + assert decoded_string["service"].upper_first() == "Api" assert isinstance(typed, ExtendedDict) assert typed["str"] == ["api"] assert isinstance(mapped, ExtendedDict) From e9b2ffeec3a01e05f28c34dafa3d37fae07d0eb2 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:37:51 -0500 Subject: [PATCH 168/287] feat: snapshot stored logging messages --- README.md | 2 ++ docs/package-surface.md | 4 +++- examples/logging/markers_and_storage.py | 4 ++-- src/extended_data/logging/logging.py | 10 +++++++++- tests/core/test_package_surface.py | 19 ++++++++++++++++++- tests/logging/test_logging.py | 22 ++++++++++++++++++++++ 6 files changed, 56 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6d609a8..7f58b26 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,8 @@ Tier 2 form and keep using container methods such as `reconstruct_special_types( and `to_export_safe()`. `Logging` stores marked log message collections as `ExtendedDict` and `ExtendedSet` values while keeping Python logger and handler objects plain. +Use `get_stored_messages()` or `snapshot_stored_messages()` when downstream +data workflows need detached promoted copies of collected messages. More detail lives in [`docs/package-surface.md`](docs/package-surface.md). diff --git a/docs/package-surface.md b/docs/package-surface.md index 393fa19..30dbbdb 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -205,7 +205,9 @@ input value should remain in Tier 2 form. workflows without creating log files unless file output is explicitly enabled. Stored log message collections are exposed as `ExtendedDict` values keyed by storage marker, with each marker containing an `ExtendedSet` of promoted -messages. +messages. `get_stored_messages()` returns a detached promoted message set for +one marker, and `snapshot_stored_messages()` returns a detached `ExtendedDict` +copy of all stored collections for downstream export or workflow composition. `ConnectorFabric` caches and coordinates vendor connectors while sharing input loading, logging, data normalization, retry behavior, and serialization. diff --git a/examples/logging/markers_and_storage.py b/examples/logging/markers_and_storage.py index 6ab8a99..3509954 100644 --- a/examples/logging/markers_and_storage.py +++ b/examples/logging/markers_and_storage.py @@ -53,8 +53,8 @@ def main() -> None: log_level="info", ) - # Access stored messages - for messages in logger.stored_messages.values(): + # Access stored messages through a detached promoted snapshot + for messages in logger.snapshot_stored_messages().values(): for _msg in messages: print(_msg) diff --git a/src/extended_data/logging/logging.py b/src/extended_data/logging/logging.py index b628add..c4e8fc3 100644 --- a/src/extended_data/logging/logging.py +++ b/src/extended_data/logging/logging.py @@ -39,7 +39,7 @@ to_snake_case, wrap_raw_data_for_export, ) -from extended_data.containers import ExtendedDict, ExtendedSet +from extended_data.containers import ExtendedDict, ExtendedSet, to_builtin from extended_data.logging.const import VERBOSITY from extended_data.logging.handlers import add_console_handler, add_file_handler from extended_data.logging.log_types import LogLevel @@ -295,6 +295,14 @@ def _stored_messages_for(self, storage_marker: str) -> ExtendedSet[str]: self.stored_messages[storage_marker] = promoted_messages return promoted_messages + def get_stored_messages(self, storage_marker: str) -> ExtendedSet[str]: + """Return a detached promoted copy of messages for one storage marker.""" + return ExtendedSet[str](deepcopy(to_builtin(self.stored_messages.get(storage_marker, ExtendedSet())))) + + def snapshot_stored_messages(self) -> ExtendedDict: + """Return a detached Tier 2 snapshot of all stored message collections.""" + return ExtendedDict(deepcopy(to_builtin(self.stored_messages))) + def logged_statement( self, msg: str, diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 7ffc391..bbbb5ec 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -13,7 +13,7 @@ from extended_data import connectors, containers, inputs, io, primitives, secrets, workflows from extended_data.connectors.connectors import ConnectorFabric from extended_data.connectors.registry import BUILTIN_CONNECTORS -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet, ExtendedString, ExtendedTuple from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -154,6 +154,23 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert "github" in connector_names +def test_logging_exposes_stored_messages_as_detached_tier2_data() -> None: + """Stored log message collections should be consumable through Tier 2 containers.""" + logger = Logging(enable_console=False, enable_file=False) + + logger.logged_statement("Stored message", storage_marker="events", log_level="info") + messages = logger.get_stored_messages("events") + snapshot = logger.snapshot_stored_messages() + + messages.add("Local mutation") + + assert isinstance(messages, ExtendedSet) + assert isinstance(snapshot, ExtendedDict) + assert isinstance(snapshot["events"], ExtendedSet) + assert "Local mutation" not in logger.stored_messages["events"] + assert sorted(snapshot.to_export_safe()["events"]) == ["Stored message"] + + def test_tier2_container_methods_expose_integrated_primitives() -> None: """Tier 2 containers should expose common primitive operations directly.""" matched = ExtendedString("api-gateway").is_partial_match("gateway") diff --git a/tests/logging/test_logging.py b/tests/logging/test_logging.py index 872a630..d562065 100644 --- a/tests/logging/test_logging.py +++ b/tests/logging/test_logging.py @@ -79,6 +79,28 @@ def test_storage_marker(logger: Logging) -> None: assert isinstance(stored_msg, ExtendedString) +def test_stored_message_snapshots_are_detached_extended_collections(logger: Logging) -> None: + """Stored logging data can be consumed through promoted detached snapshots.""" + logger.logged_statement("First message", storage_marker="events", log_level="info") # type: ignore[arg-type] + logger.logged_statement("Second message", storage_marker="events", log_level="info") # type: ignore[arg-type] + + messages = logger.get_stored_messages("events") + snapshot = logger.snapshot_stored_messages() + missing = logger.get_stored_messages("missing") + + messages.add("Local mutation") + snapshot["events"].add("Snapshot mutation") + + assert isinstance(messages, ExtendedSet) + assert all(isinstance(message, ExtendedString) for message in messages) + assert isinstance(snapshot, ExtendedDict) + assert isinstance(snapshot["events"], ExtendedSet) + assert missing == set() + assert "Local mutation" not in logger.stored_messages["events"] + assert "Snapshot mutation" not in logger.stored_messages["events"] + assert sorted(logger.snapshot_stored_messages().to_export_safe()["events"]) == ["First message", "Second message"] + + def test_context_marker(logger: Logging) -> None: """Test message prefixing with context markers. From b7935d293613e3d42093c208552e99f353a53f40 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:42:15 -0500 Subject: [PATCH 169/287] refactor: drop legacy logging transform flag --- examples/logging/exit_run_formatting.py | 11 ++++------- src/extended_data/logging/logging.py | 16 ++++++---------- tests/logging/test_exit_run.py | 9 ++++----- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/examples/logging/exit_run_formatting.py b/examples/logging/exit_run_formatting.py index 9ee05cc..46e7f87 100644 --- a/examples/logging/exit_run_formatting.py +++ b/examples/logging/exit_run_formatting.py @@ -22,14 +22,11 @@ def main() -> None: } logger.exit_run(results, key_transform="snake_case", exit_on_completion=False) - # Example 2: Using unhump_results (shorthand for snake_case) - logger.exit_run(results, unhump_results=True, exit_on_completion=False) - - # Example 3: Transform to camelCase + # Example 2: Transform to camelCase snake_results = {"user_name": "john_doe", "email_address": "john@example.com"} logger.exit_run(snake_results, key_transform="camel_case", exit_on_completion=False) - # Example 4: Nested key transformation + # Example 3: Nested key transformation nested_results = { "userData": { "firstName": "John", @@ -39,7 +36,7 @@ def main() -> None: } logger.exit_run(nested_results, key_transform="snake_case", exit_on_completion=False) - # Example 5: Adding prefix to keys + # Example 4: Adding prefix to keys field_results = {"item1": {"fieldName": "value1", "otherField": "value2"}} logger.exit_run( field_results, @@ -47,7 +44,7 @@ def main() -> None: exit_on_completion=False, ) - # Example 6: Custom transform function + # Example 5: Custom transform function logger.exit_run( {"myKey": "value", "anotherKey": "data"}, key_transform=lambda k: k.upper(), diff --git a/src/extended_data/logging/logging.py b/src/extended_data/logging/logging.py index c4e8fc3..51903fe 100644 --- a/src/extended_data/logging/logging.py +++ b/src/extended_data/logging/logging.py @@ -402,14 +402,12 @@ def log_results( def _resolve_key_transform( self, key_transform: KeyTransform | str | None, - unhump_results: bool, prefix: str | None, ) -> KeyTransform | None: """Resolve key_transform parameter to a callable. Args: key_transform: User-provided transform (callable, string name, or None). - unhump_results: Legacy flag for snake_case transformation. prefix: If set, implies transformation is needed. Returns: @@ -425,8 +423,7 @@ def _resolve_key_transform( raise ValueError(f"Unknown key_transform '{key_transform}'. Available: {available}") return self.KEY_TRANSFORMS[key_transform] - # Legacy unhump_results flag - if unhump_results or prefix: + if prefix: return to_snake_case return None @@ -462,7 +459,6 @@ def _transform_nested_keys( def exit_run( self, results: Mapping[str, Any] | None = None, - unhump_results: bool = False, key_transform: KeyTransform | str | None = None, prefix: str | None = None, prefix_allowlist: Sequence[str] | None = None, @@ -488,14 +484,11 @@ def exit_run( Args: results: The results to format and output. Defaults to empty dict. - unhump_results: Convert camelCase keys to snake_case (shorthand for - key_transform="snake_case"). key_transform: Transform function for result keys. Can be: - A callable that takes a string and returns a string - A string naming a built-in transform: "snake_case", "camel_case", "pascal_case", "kebab_case" - None to skip transformation - When unhump_results=True, defaults to "snake_case". prefix: Prefix to add to result keys (implies key transformation). prefix_allowlist: Keys to include when prefixing. prefix_denylist: Keys to exclude when prefixing. @@ -519,7 +512,7 @@ def exit_run( Examples: # Simple snake_case transformation (most common) - logging.exit_run(results, unhump_results=True) + logging.exit_run(results, key_transform="snake_case") # Explicit transform logging.exit_run(results, key_transform="kebab_case") @@ -527,8 +520,11 @@ def exit_run( # Custom transform function logging.exit_run(results, key_transform=lambda k: k.upper()) """ + if "unhump_results" in format_opts: + raise TypeError("exit_run() got an unexpected keyword argument 'unhump_results'") + # Resolve key_transform from various inputs - transform_fn = self._resolve_key_transform(key_transform, unhump_results, prefix) + transform_fn = self._resolve_key_transform(key_transform, prefix) try: self.log_results(results, "results") diff --git a/tests/logging/test_exit_run.py b/tests/logging/test_exit_run.py index d965f25..2f64204 100644 --- a/tests/logging/test_exit_run.py +++ b/tests/logging/test_exit_run.py @@ -85,13 +85,12 @@ def test_exit_run_none_results(self, logger: Logging, tmp_path: Path) -> None: output = logger.exit_run(None, exit_on_completion=False) assert output == {} - def test_exit_run_unhump_results(self, logger: Logging, tmp_path: Path) -> None: - """Test that exit_run converts camelCase to snake_case.""" + def test_exit_run_unhump_results_is_not_preserved(self, logger: Logging, tmp_path: Path) -> None: + """The clean major-version API should not keep the old shorthand flag.""" os.chdir(tmp_path) results = {"myKey": {"nestedKey": "value"}} - output = logger.exit_run(results, unhump_results=True, exit_on_completion=False) - assert "my_key" in output - assert "nested_key" in output["my_key"] + with pytest.raises(TypeError, match="unhump_results"): + logger.exit_run(results, unhump_results=True, exit_on_completion=False) def test_exit_run_key_transform_snake_case(self, logger: Logging, tmp_path: Path) -> None: """Test key_transform with snake_case string.""" From 3266f350003530d0d947a2d20dc1861b5ef0e4f6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:47:46 -0500 Subject: [PATCH 170/287] feat: expose workflow result export boundaries --- README.md | 6 +++-- docs/package-surface.md | 7 +++++- examples/core/composed_workflows.py | 1 + src/extended_data/workflows/__init__.py | 14 ++++++++++-- tests/core/test_package_surface.py | 14 ++++++++++++ tests/core/test_workflows.py | 29 +++++++++++++++++++++++++ 6 files changed, 66 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7f58b26..585d4ef 100644 --- a/README.md +++ b/README.md @@ -208,8 +208,10 @@ the serialization boundary. workflows; it raises for missing files and promotes structured data into Tier 2 containers by default. `DataWorkflow` makes those compositions first-class: read or decode data, apply named transformations, write an output artifact, and -keep the step trail in a `WorkflowResult`. Missing workflow inputs and empty -writes fail loudly. +keep the step trail in a `WorkflowResult`. Completed workflow results expose +detached promoted views with `as_extended()` plus direct `to_export_safe()` and +`wrap_for_export()` helpers. Missing workflow inputs and empty writes fail +loudly. `InputProvider` stores its active, frozen, and merged input snapshots as `ExtendedDict` values, so direct input-data access can use Tier 2 container methods. `snapshot_inputs()` returns detached active or frozen snapshots, and diff --git a/docs/package-surface.md b/docs/package-surface.md index 30dbbdb..c3b90e1 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -154,7 +154,10 @@ plain strings before JSON handoff. processing. It reads or decodes structured data through the file and format processors, promotes values into Tier 2 containers by default, applies named transformation steps, writes output artifacts, and returns a `WorkflowResult` -with the completed value, output path, and step trail. +with the completed value, output path, and step trail. `WorkflowResult.as_extended()` +returns a detached promoted view of the completed value, and result-level +`to_export_safe()` / `wrap_for_export()` expose the same export boundary used by +Tier 2 containers. ```python from extended_data import DataWorkflow @@ -167,6 +170,8 @@ result = ( ) assert result.steps == ("read:config/base.yaml", "merge-env", "write:build/config.yaml") +assert result.as_extended()["service"]["name"].upper_first() == "Api" +assert result.to_export_safe()["service"]["name"] == "api" ``` Missing workflow input files raise `FileNotFoundError`, and empty workflow diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index efabe5f..260bcbc 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -51,6 +51,7 @@ def demonstrate_layered_config_workflow() -> None: .then(("merge-env", lambda data: data.deep_merge(env_data))) .write("build/config.yaml", tld=tld) ) + result.to_export_safe() merged_text = read_file("build/config.yaml", tld=tld) print(merged_text) diff --git a/src/extended_data/workflows/__init__.py b/src/extended_data/workflows/__init__.py index 7692a09..ac9c82c 100644 --- a/src/extended_data/workflows/__init__.py +++ b/src/extended_data/workflows/__init__.py @@ -3,11 +3,13 @@ from __future__ import annotations from collections.abc import Callable, Iterable +from copy import deepcopy from dataclasses import dataclass from pathlib import Path from typing import Any, TypeAlias from extended_data.containers import extend_data, to_builtin +from extended_data.io.exporters import make_raw_data_export_safe, wrap_raw_data_for_export from extended_data.io.files import FilePath, decode_file, read_data_file, write_file @@ -40,8 +42,16 @@ def as_builtin(self) -> Any: return to_builtin(self.value) def as_extended(self) -> Any: - """Return the workflow value promoted to Extended Data containers.""" - return extend_data(self.value) + """Return a detached workflow value promoted to Extended Data containers.""" + return extend_data(deepcopy(to_builtin(self.value))) + + def to_export_safe(self, *, export_to_yaml: bool = False) -> Any: + """Return the workflow value converted to export-safe primitive data.""" + return make_raw_data_export_safe(self.value, export_to_yaml=export_to_yaml) + + def wrap_for_export(self, allow_encoding: bool | str = True, **format_opts: Any) -> str: + """Return the workflow value wrapped as an encoded export string.""" + return wrap_raw_data_for_export(self.value, allow_encoding=allow_encoding, **format_opts) class DataWorkflow: diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index bbbb5ec..5d31661 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -171,6 +171,20 @@ def test_logging_exposes_stored_messages_as_detached_tier2_data() -> None: assert sorted(snapshot.to_export_safe()["events"]) == ["Stored message"] +def test_workflow_result_exposes_detached_export_boundaries() -> None: + """Workflow results should expose promoted and export-safe value boundaries.""" + result = extended_data.DataWorkflow.from_value({"service": {"name": "api"}}).result() + + promoted = result.as_extended() + promoted["service"]["name"] = "worker" + + assert isinstance(promoted, ExtendedDict) + assert result.value["service"]["name"] == "api" + assert result.as_extended()["service"]["name"].upper_first() == "Api" + assert result.to_export_safe() == {"service": {"name": "api"}} + assert '"service"' in result.wrap_for_export(allow_encoding="json") + + def test_tier2_container_methods_expose_integrated_primitives() -> None: """Tier 2 containers should expose common primitive operations directly.""" matched = ExtendedString("api-gateway").is_partial_match("gateway") diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 144d7cd..5ee3126 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -2,6 +2,9 @@ from __future__ import annotations +import datetime +import json + from pathlib import Path import pytest @@ -122,6 +125,32 @@ def test_data_workflow_can_lower_and_promote_values() -> None: assert extended.value["service"]["name"].upper_first() == "Api" +def test_workflow_result_extended_view_is_detached() -> None: + """WorkflowResult accessors expose promoted data without sharing mutable state.""" + result = DataWorkflow.from_value({"service": {"name": "api"}}).result() + + promoted = result.as_extended() + promoted["service"]["name"] = "worker" + + assert isinstance(promoted, ExtendedDict) + assert isinstance(result.value, ExtendedDict) + assert result.value["service"]["name"] == "api" + assert result.as_extended()["service"]["name"].upper_first() == "Api" + + +def test_workflow_result_exports_from_completed_value() -> None: + """Completed workflow results can be exported without leaving the result boundary.""" + result = DataWorkflow.from_value( + {"launched": datetime.date(2026, 6, 10), "service": {"name": "api"}}, + ).result() + + export_safe = result.to_export_safe() + wrapped = result.wrap_for_export(allow_encoding="json") + + assert export_safe == {"launched": "2026-06-10", "service": {"name": "api"}} + assert json.loads(wrapped) == {"launched": "2026-06-10", "service": {"name": "api"}} + + def test_data_workflow_preserves_tuples_until_serialization(tmp_path: Path) -> None: """Workflow values keep tuple shape in memory and serialize to JSON arrays at the edge.""" workflow = DataWorkflow.from_value({"aliases": ("api", "gateway")}) From a55e553273e24a9004993f0d26c43c4e9a4ce5fc Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:51:07 -0500 Subject: [PATCH 171/287] test: guard removed runtime keyword guidance --- tests/core/test_release_hygiene.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index e812448..019dd8e 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -24,6 +24,7 @@ "lifecyclelogging", "vendor_connectors", ) +REMOVED_PUBLIC_KEYWORDS = ("unhump_results",) def test_workflow_actions_are_pinned_to_exact_shas() -> None: @@ -83,3 +84,20 @@ def test_old_package_namespace_shims_do_not_exist() -> None: offenders.append(str(module_path.relative_to(REPO_ROOT))) assert offenders == [] + + +def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: + """Docs and examples should not keep teaching removed compatibility keywords.""" + offenders: list[str] = [] + paths = [REPO_ROOT / "README.md"] + paths.extend(path for root in (REPO_ROOT / "docs", REPO_ROOT / "examples") for path in root.rglob("*")) + + for path in sorted(path for path in paths if path.is_file()): + if path.suffix in {".pyc", ".png"}: + continue + text = path.read_text(encoding="utf-8") + for keyword in REMOVED_PUBLIC_KEYWORDS: + if keyword in text: + offenders.append(f"{path.relative_to(REPO_ROOT)}: {keyword}") + + assert offenders == [] From a88643d53945c780aac87060bed27fe6d46e26c7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 13:56:52 -0500 Subject: [PATCH 172/287] docs: lead core examples with containers --- examples/core/README.md | 2 +- examples/core/basic_usage.py | 35 ++++++++++++---------------- tests/examples/test_safe_examples.py | 19 +++++++++++++++ 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/examples/core/README.md b/examples/core/README.md index bebd7b1..78c6d3a 100644 --- a/examples/core/README.md +++ b/examples/core/README.md @@ -9,7 +9,7 @@ surface docs, so treat them as part of the documented contract. ### Basic Usage -- [`basic_usage.py`](basic_usage.py) - Common operations with strings, lists, and maps +- [`basic_usage.py`](basic_usage.py) - Common state helpers plus first-class `ExtendedString`, `ExtendedList`, and `ExtendedDict` operations - [`composed_workflows.py`](composed_workflows.py) - Layered config, Terraform-style HCL, YAML-native tags, and payload pipelines - [`serialization.py`](serialization.py) - YAML, JSON, TOML, HCL, and Base64 encoding/decoding - [`file_operations.py`](file_operations.py) - File path utilities and Git repository helpers diff --git a/examples/core/basic_usage.py b/examples/core/basic_usage.py index 18742b0..4a0bd82 100644 --- a/examples/core/basic_usage.py +++ b/examples/core/basic_usage.py @@ -4,18 +4,13 @@ from __future__ import annotations from extended_data import ( + ExtendedDict, + ExtendedList, ExtendedString, all_non_empty, any_non_empty, - deep_merge, - filter_list, - filter_map, first_non_empty, - flatten_list, - flatten_map, is_nothing, - sanitize_key, - truncate, ) @@ -32,26 +27,26 @@ def demonstrate_state_utilities() -> None: def demonstrate_list_utilities() -> None: """Demonstrate list flattening and allowlist/denylist filtering.""" print("\n=== List Utilities ===") - nested = ["api", ["worker", ["scheduler"]], "docs"] - print("Flattened:", flatten_list(nested)) + nested = ExtendedList(["api", ["worker", ["scheduler"]], "docs"]) + print("Flattened:", nested.flatten()) - items = ["apple", "banana", "apricot", "cherry"] - print("Allowlist:", filter_list(items, allowlist=["apple", "apricot"])) - print("Denylist:", filter_list(items, denylist=["banana"])) + items = ExtendedList(["apple", "banana", "apricot", "cherry"]) + print("Allowlist:", items.filter_values(allowlist=["apple", "apricot"])) + print("Denylist:", items.filter_values(denylist=["banana"])) def demonstrate_map_utilities() -> None: """Demonstrate map merge, flatten, and filtering helpers.""" print("\n=== Map Utilities ===") - base = {"service": {"debug": False, "host": "localhost"}} + base = ExtendedDict({"service": {"debug": False, "host": "localhost"}}) override = {"service": {"debug": True, "port": 8080}} - print("Deep merge:", deep_merge(base, override)) + print("Deep merge:", base.deep_merge(override)) - nested = {"service": {"http": {"port": 8080}}, "enabled": True} - print("Flattened:", flatten_map(nested)) + nested = ExtendedDict({"service": {"http": {"port": 8080}}, "enabled": True}) + print("Flattened:", nested.flatten()) - payload = {"name": "api", "age": 30, "city": "Chicago", "active": True} - kept, removed = filter_map(payload, allowlist=["name", "city"]) + payload = ExtendedDict({"name": "api", "age": 30, "city": "Chicago", "active": True}) + kept, removed = payload.filter(allowlist=["name", "city"]) print("Filtered map:", kept) print("Removed map:", removed) @@ -62,8 +57,8 @@ def demonstrate_string_utilities() -> None: text = ExtendedString("prefix_content_suffix") print("Remove prefix:", text.remove_prefix("prefix_")) print("Remove suffix:", text.remove_suffix("_suffix")) - print("Truncate:", truncate("This value is intentionally too long", 20)) - print("Sanitize key:", sanitize_key("User Name (Primary)")) + print("Truncate:", ExtendedString("This value is intentionally too long").truncate(20)) + print("Sanitize key:", ExtendedString("User Name (Primary)").sanitize()) if __name__ == "__main__": diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index 07246f5..269e74c 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -42,6 +42,15 @@ "python -m examples.decorator_api", "python -m examples.encoding_decoding", ) +FUNCTION_FIRST_BASIC_USAGE_HELPERS = ( + "deep_merge", + "filter_list", + "filter_map", + "flatten_list", + "flatten_map", + "sanitize_key", + "truncate", +) def _readme_usage_snippet() -> str: @@ -102,6 +111,16 @@ def test_examples_do_not_document_stale_command_paths() -> None: assert offenders == [] +def test_basic_core_example_uses_container_first_operations() -> None: + """The basic example should lead with Tier 2 methods for list/map/string workflows.""" + text = (REPO_ROOT / "examples/core/basic_usage.py").read_text(encoding="utf-8") + import_block = text.split("from extended_data import (", maxsplit=1)[1].split(")", maxsplit=1)[0] + + offenders = [name for name in FUNCTION_FIRST_BASIC_USAGE_HELPERS if name in import_block] + + assert offenders == [] + + @pytest.mark.parametrize("example_path", ALL_EXAMPLES) def test_example_compiles(example_path: str, tmp_path: Path) -> None: """Every example should at least remain syntactically valid.""" From 1ff0df193afb240fdac0e05915c4a6fe8aed36d3 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:07:17 -0500 Subject: [PATCH 173/287] feat: keep tier one utilities out of root exports --- README.md | 6 +- docs/package-surface.md | 19 +-- examples/core/basic_usage.py | 2 + examples/core/string_transformations.py | 2 +- src/extended_data/__init__.py | 143 ------------------ src/extended_data/connectors/aws/__init__.py | 2 +- .../connectors/aws/organizations.py | 2 +- src/extended_data/connectors/aws/s3.py | 2 +- src/extended_data/connectors/aws/sso.py | 2 +- src/extended_data/connectors/cloud_params.py | 2 +- src/extended_data/connectors/connectors.py | 3 +- .../connectors/github/__init__.py | 10 +- .../connectors/google/billing.py | 2 +- src/extended_data/connectors/google/cloud.py | 2 +- .../connectors/google/services.py | 2 +- .../connectors/google/workspace.py | 2 +- .../connectors/slack/__init__.py | 3 +- .../connectors/vault/__init__.py | 2 +- src/extended_data/logging/logging.py | 20 +-- src/extended_data/logging/utils.py | 2 +- tests/connectors/test_aws_s3.py | 2 +- tests/core/test_integration_workflows.py | 8 +- tests/core/test_package_surface.py | 33 +++- tests/examples/test_safe_examples.py | 34 +++++ 24 files changed, 113 insertions(+), 194 deletions(-) diff --git a/README.md b/README.md index 585d4ef..661e5b6 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,8 @@ CrewAI releases pull vulnerable `chromadb` versions transitively. ## Usage ```python -from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file, decode_json, encode_yaml, number_to_words +from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file, decode_json, encode_yaml +from extended_data.primitives import number_to_words logger = Logging(logger_name="example") inputs = InputProvider(inputs={"GITHUB_OWNER": "jbcom"}, from_environment=False) @@ -101,7 +102,8 @@ extended_data/ workflows/ Tier 3 higher-order workflow composition ``` -Tier 1 primitive names are explicit in this major version. Use +Tier 1 primitive names are explicit in this major version and live under +`extended_data.primitives`, not the package root. Use `bytes_to_string()` for bytes-like coercion and `string_to_bool()`, `string_to_int()`, `string_to_float()`, `string_to_path()`, `string_to_date()`, `string_to_datetime()`, and `string_to_time()` for scalar diff --git a/docs/package-surface.md b/docs/package-surface.md index c3b90e1..15de969 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -1,8 +1,9 @@ # Package Surface `extended-data` is one Python distribution with a single `extended_data` -namespace. The root package exposes the primitives and adapters users need most -often. +namespace. The root package exposes first-class containers, Tier 3 processors, +and integrated adapters; pure Tier 1 utilities are imported from +`extended_data.primitives`. The old `extended_data_types`, `lifecyclelogging`, `directed_inputs_class`, and `vendor_connectors` import namespaces are not preserved in this major version. @@ -27,11 +28,9 @@ from extended_data import ( decode_json, extend_data, encode_yaml, - flatten_map, - normalize_data_encoding, - number_to_words, to_builtin, ) +from extended_data.primitives import normalize_data_encoding, number_to_words ``` ## Tiers @@ -47,12 +46,14 @@ from extended_data import ( - Tier 3 processors use the first two tiers to handle files, imports, exports, inputs, API data, vendor integrations, and workflows. -Clean major-version primitive names prefer explicit Python words over inherited -helper spellings: use `bytes_to_string()` and the `string_to_*()` conversion -family (`string_to_bool()`, `string_to_int()`, `string_to_float()`, +Clean major-version primitive names live under `extended_data.primitives` and +prefer explicit Python words over inherited helper spellings: use +`bytes_to_string()` and the `string_to_*()` conversion family +(`string_to_bool()`, `string_to_int()`, `string_to_float()`, `string_to_path()`, `string_to_date()`, `string_to_datetime()`, and `string_to_time()`). The old `bytestostr` and `strto*` helper names are -intentionally not preserved. +intentionally not preserved, and pure utility functions are not re-exported +from the package root. Tier 1 public exports stay function-oriented; use `get_default_dict()` when a workflow needs nested or sorted default mappings rather than importing the internal sorted-default mapping helper class. diff --git a/examples/core/basic_usage.py b/examples/core/basic_usage.py index 4a0bd82..0772017 100644 --- a/examples/core/basic_usage.py +++ b/examples/core/basic_usage.py @@ -7,6 +7,8 @@ ExtendedDict, ExtendedList, ExtendedString, +) +from extended_data.primitives import ( all_non_empty, any_non_empty, first_non_empty, diff --git a/examples/core/string_transformations.py b/examples/core/string_transformations.py index 294e8d9..82c54aa 100755 --- a/examples/core/string_transformations.py +++ b/examples/core/string_transformations.py @@ -7,7 +7,7 @@ from __future__ import annotations -from extended_data import ( +from extended_data.primitives import ( humanize, ordinalize, pluralize, diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 8b58037..68d0252 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -49,83 +49,6 @@ from extended_data.primitives.formats.json import decode_json, encode_json from extended_data.primitives.formats.toml import decode_toml, encode_toml from extended_data.primitives.formats.yaml import decode_yaml, encode_yaml, is_yaml_data -from extended_data.primitives.introspection import ( - filter_methods, - get_available_methods, - get_caller, - get_inputs_from_docstring, - get_unique_signature, - update_docstring, -) -from extended_data.primitives.mappings import ( - all_values_from_map, - create_merger, - deduplicate_map, - deep_merge, - filter_map, - first_non_empty_value_from_map, - flatten_map, - get_default_dict, - unhump_map, - zipmap, -) -from extended_data.primitives.matching import is_non_empty_match, is_partial_match -from extended_data.primitives.numbers import ( - from_roman, - number_to_currency, - number_to_ordinal, - number_to_words, - to_roman, -) -from extended_data.primitives.sequences import filter_list, flatten_list -from extended_data.primitives.serialization import normalize_data_encoding -from extended_data.primitives.splitting import split_dict_by_type, split_list_by_type -from extended_data.primitives.state import ( - all_non_empty, - all_non_empty_in_dict, - all_non_empty_in_list, - any_non_empty, - are_nothing, - first_non_empty, - is_nothing, - yield_non_empty, -) -from extended_data.primitives.string_transforms import ( - humanize, - ordinalize, - pluralize, - singularize, - titleize, - to_camel_case, - to_kebab_case, - to_pascal_case, - to_snake_case, -) -from extended_data.primitives.strings import ( - bytes_to_string, - lower_first_char, - sanitize_key, - titleize_name, - truncate, - upper_first_char, -) -from extended_data.primitives.types import ( - convert_special_type, - convert_special_types, - get_default_value_for_type, - get_primitive_type_for_instance_type, - make_hashable, - reconstruct_special_type, - reconstruct_special_types, - string_to_bool, - string_to_date, - string_to_datetime, - string_to_float, - string_to_int, - string_to_path, - string_to_time, - typeof, -) from extended_data.workflows import DataWorkflow, StepLike, WorkflowAction, WorkflowResult, WorkflowStep @@ -235,26 +158,14 @@ def __getattr__(name: str) -> Any: "WorkflowStep", "ZoomConnector", "__version__", - "all_non_empty", - "all_non_empty_in_dict", - "all_non_empty_in_list", - "all_values_from_map", - "any_non_empty", - "are_nothing", "base64_decode", "base64_encode", - "bytes_to_string", "clone_repository_to_temp", - "convert_special_type", - "convert_special_types", - "create_merger", "decode_file", "decode_hcl2", "decode_json", "decode_toml", "decode_yaml", - "deduplicate_map", - "deep_merge", "delete_file", "directed_inputs", "encode_hcl2", @@ -264,79 +175,25 @@ def __getattr__(name: str) -> Any: "extend_data", "file_path_depth", "file_path_rel_to_root", - "filter_list", - "filter_map", - "filter_methods", - "first_non_empty", - "first_non_empty_value_from_map", - "flatten_list", - "flatten_map", - "from_roman", - "get_available_methods", - "get_caller", "get_connector", "get_connector_class", "get_connector_info", - "get_default_dict", - "get_default_value_for_type", "get_encoding_for_file_path", - "get_inputs_from_docstring", "get_parent_repository", - "get_primitive_type_for_instance_type", "get_repository_name", "get_tld", - "get_unique_signature", - "humanize", "input_config", - "is_non_empty_match", - "is_nothing", - "is_partial_match", "is_url", "is_yaml_data", "list_connector_info", "list_connectors", - "lower_first_char", - "make_hashable", "make_raw_data_export_safe", "match_file_extensions", - "normalize_data_encoding", - "number_to_currency", - "number_to_ordinal", - "number_to_words", - "ordinalize", - "pluralize", "read_data_file", "read_file", - "reconstruct_special_type", - "reconstruct_special_types", "resolve_local_path", - "sanitize_key", - "singularize", - "split_dict_by_type", - "split_list_by_type", - "string_to_bool", - "string_to_date", - "string_to_datetime", - "string_to_float", - "string_to_int", - "string_to_path", - "string_to_time", - "titleize", - "titleize_name", "to_builtin", - "to_camel_case", - "to_kebab_case", - "to_pascal_case", - "to_roman", - "to_snake_case", - "truncate", - "typeof", - "unhump_map", "unwrap_raw_data_from_import", - "update_docstring", - "upper_first_char", "wrap_raw_data_for_export", "write_file", - "yield_non_empty", - "zipmap", ] diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 94339b2..1ca1dbd 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -19,7 +19,6 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any -from extended_data import is_nothing from extended_data.connectors._optional import require_extra from extended_data.connectors.aws.organizations import AWSOrganizationsMixin from extended_data.connectors.aws.s3 import AWSS3Mixin @@ -27,6 +26,7 @@ from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging +from extended_data.primitives import is_nothing AWSSecretValue = str | ExtendedString | Mapping[str, Any] | None diff --git a/src/extended_data/connectors/aws/organizations.py b/src/extended_data/connectors/aws/organizations.py index 1f66868..315ab22 100644 --- a/src/extended_data/connectors/aws/organizations.py +++ b/src/extended_data/connectors/aws/organizations.py @@ -15,8 +15,8 @@ from deepmerge import always_merger -from extended_data import is_nothing, unhump_map from extended_data.containers import ExtendedDict, to_builtin +from extended_data.primitives import is_nothing, unhump_map if TYPE_CHECKING: diff --git a/src/extended_data/connectors/aws/s3.py b/src/extended_data/connectors/aws/s3.py index 0b1cc1c..7e02038 100644 --- a/src/extended_data/connectors/aws/s3.py +++ b/src/extended_data/connectors/aws/s3.py @@ -10,8 +10,8 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any -from extended_data import unhump_map from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin +from extended_data.primitives import unhump_map if TYPE_CHECKING: diff --git a/src/extended_data/connectors/aws/sso.py b/src/extended_data/connectors/aws/sso.py index 252e835..bcab456 100644 --- a/src/extended_data/connectors/aws/sso.py +++ b/src/extended_data/connectors/aws/sso.py @@ -12,8 +12,8 @@ from deepmerge import always_merger -from extended_data import is_nothing, unhump_map from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin +from extended_data.primitives import is_nothing, unhump_map if TYPE_CHECKING: diff --git a/src/extended_data/connectors/cloud_params.py b/src/extended_data/connectors/cloud_params.py index f1078c0..63668d8 100644 --- a/src/extended_data/connectors/cloud_params.py +++ b/src/extended_data/connectors/cloud_params.py @@ -16,8 +16,8 @@ from typing import Any -from extended_data import is_nothing, lower_first_char, upper_first_char from extended_data.containers import ExtendedDict, extend_data +from extended_data.primitives import is_nothing, lower_first_char, upper_first_char def get_cloud_call_params( diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index ff046cf..17507be 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -6,8 +6,6 @@ from typing import TYPE_CHECKING, Any -from extended_data import get_default_dict, get_unique_signature, make_hashable - # Import zoom directly (no extra deps) from extended_data.connectors.registry import ( get_connector_class, @@ -25,6 +23,7 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.inputs import InputProvider from extended_data.logging import Logging +from extended_data.primitives import get_default_dict, get_unique_signature, make_hashable # Optional connectors - imported lazily when methods are called diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index 723eadb..5b0a40e 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -11,16 +11,16 @@ from ruamel.yaml import YAML -from extended_data import ( +from extended_data.connectors._optional import require_extra +from extended_data.connectors.base import VendorConnectorBase +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple +from extended_data.io import ( decode_file, get_encoding_for_file_path, - is_nothing, wrap_raw_data_for_export, ) -from extended_data.connectors._optional import require_extra -from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple from extended_data.logging import Logging +from extended_data.primitives import is_nothing Auth: Any = None diff --git a/src/extended_data/connectors/google/billing.py b/src/extended_data/connectors/google/billing.py index ddab375..220df07 100644 --- a/src/extended_data/connectors/google/billing.py +++ b/src/extended_data/connectors/google/billing.py @@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Any, cast -from extended_data import unhump_map from extended_data.containers import ExtendedDict, ExtendedList, to_builtin +from extended_data.primitives import unhump_map class GoogleBillingMixin: diff --git a/src/extended_data/connectors/google/cloud.py b/src/extended_data/connectors/google/cloud.py index 350cdf6..91422e4 100644 --- a/src/extended_data/connectors/google/cloud.py +++ b/src/extended_data/connectors/google/cloud.py @@ -9,8 +9,8 @@ from collections.abc import Mapping from typing import TYPE_CHECKING, Any -from extended_data import unhump_map from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin +from extended_data.primitives import unhump_map class GoogleCloudMixin: diff --git a/src/extended_data/connectors/google/services.py b/src/extended_data/connectors/google/services.py index 73cd067..e65c606 100644 --- a/src/extended_data/connectors/google/services.py +++ b/src/extended_data/connectors/google/services.py @@ -11,8 +11,8 @@ from collections.abc import Mapping, MutableMapping from typing import TYPE_CHECKING, Any -from extended_data import unhump_map from extended_data.containers import ExtendedDict, ExtendedList +from extended_data.primitives import unhump_map _PROJECT_ACTIVITY_TIME_FIELDS = ( diff --git a/src/extended_data/connectors/google/workspace.py b/src/extended_data/connectors/google/workspace.py index 136cb1c..5665c59 100644 --- a/src/extended_data/connectors/google/workspace.py +++ b/src/extended_data/connectors/google/workspace.py @@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Any, cast -from extended_data import unhump_map from extended_data.containers import ExtendedDict, ExtendedList, to_builtin +from extended_data.primitives import unhump_map class GoogleWorkspaceMixin: diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index f916d6a..aedec1d 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -23,11 +23,12 @@ def batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: yield batch -from extended_data import is_nothing, wrap_raw_data_for_export from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data, to_builtin +from extended_data.io import wrap_raw_data_for_export from extended_data.logging import Logging +from extended_data.primitives import is_nothing class SlackFallbackError(Exception): diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 39ee8f3..6f3bf05 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -6,11 +6,11 @@ from datetime import datetime, timezone from typing import TYPE_CHECKING, Any -from extended_data import is_nothing from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.logging import Logging +from extended_data.primitives import is_nothing if TYPE_CHECKING: diff --git a/src/extended_data/logging/logging.py b/src/extended_data/logging/logging.py index 51903fe..09837cd 100644 --- a/src/extended_data/logging/logging.py +++ b/src/extended_data/logging/logging.py @@ -29,17 +29,8 @@ import orjson -from extended_data import ( - get_unique_signature, - is_nothing, - string_to_bool, - to_camel_case, - to_kebab_case, - to_pascal_case, - to_snake_case, - wrap_raw_data_for_export, -) from extended_data.containers import ExtendedDict, ExtendedSet, to_builtin +from extended_data.io import wrap_raw_data_for_export from extended_data.logging.const import VERBOSITY from extended_data.logging.handlers import add_console_handler, add_file_handler from extended_data.logging.log_types import LogLevel @@ -49,6 +40,15 @@ find_logger, get_log_level, ) +from extended_data.primitives import ( + get_unique_signature, + is_nothing, + string_to_bool, + to_camel_case, + to_kebab_case, + to_pascal_case, + to_snake_case, +) # Type alias for key transformation functions diff --git a/src/extended_data/logging/utils.py b/src/extended_data/logging/utils.py index a044805..0da694e 100644 --- a/src/extended_data/logging/utils.py +++ b/src/extended_data/logging/utils.py @@ -8,7 +8,7 @@ from copy import copy, deepcopy from typing import Any -from extended_data import make_raw_data_export_safe, wrap_raw_data_for_export +from extended_data.io import make_raw_data_export_safe, wrap_raw_data_for_export from extended_data.logging.const import DEFAULT_LOG_LEVEL diff --git a/tests/connectors/test_aws_s3.py b/tests/connectors/test_aws_s3.py index 2ad0c2f..aa82a3c 100644 --- a/tests/connectors/test_aws_s3.py +++ b/tests/connectors/test_aws_s3.py @@ -63,7 +63,7 @@ def test_list_s3_buckets_with_unhump(self, aws_connector): assert "bucket1" in result # unhump_map transforms CamelCase keys to snake_case # If unhump was applied, we should have snake_case keys - # The actual transformation happens in extended_data.unhump_map + # The actual transformation happens in extended_data.primitives.unhump_map def test_get_bucket_location(self, aws_connector): """Test getting bucket location.""" diff --git a/tests/core/test_integration_workflows.py b/tests/core/test_integration_workflows.py index 90da1c4..4d5466c 100644 --- a/tests/core/test_integration_workflows.py +++ b/tests/core/test_integration_workflows.py @@ -75,15 +75,15 @@ def test_integration_workflow_data_transformation_pipeline(): dict1 = {"a": {"b": 1}, "c": 3} dict2 = {"a": {"d": 4}, "e": 5} - # 1. Merge maps - merged = edt.deep_merge(dict1, dict2) + # 1. Merge maps through the Tier 2 container surface + merged = edt.ExtendedDict(dict1).deep_merge(dict2) assert merged["a"] == {"b": 1, "d": 4} # 2. Flatten map - flattened = edt.flatten_map(merged) + flattened = merged.flatten() assert flattened["a.b"] == 1 assert flattened["a.d"] == 4 # 3. Transform keys - unhumped = edt.unhump_map(merged) + unhumped = merged.unhump() assert "a" in unhumped diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 5d31661..01671b1 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -70,11 +70,37 @@ def test_public_all_exports_are_import_star_visible() -> None: def test_root_exports_tiered_data_surfaces() -> None: - """The root package should expose the integrated primitive, container, IO, and workflow surfaces.""" - for module in (primitives, containers, io, workflows): + """The root package should expose integrated container, IO, and workflow surfaces.""" + for module in (containers, io, workflows): assert set(module.__all__) <= set(extended_data.__all__), module.__name__ +def test_tier1_utility_functions_are_not_root_exports() -> None: + """Pure utility functions should be imported from extended_data.primitives.""" + tier1_utility_names = ( + "all_non_empty", + "any_non_empty", + "deep_merge", + "filter_list", + "filter_map", + "flatten_list", + "flatten_map", + "is_nothing", + "normalize_data_encoding", + "number_to_words", + "sanitize_key", + "string_to_bool", + "to_roman", + "truncate", + "unhump_map", + ) + + for name in tier1_utility_names: + assert hasattr(primitives, name), name + assert not hasattr(extended_data, name), name + assert name not in extended_data.__all__ + + def test_clean_major_version_public_names() -> None: """The public surface uses integrated extended-data names.""" assert inputs.InputProvider.__name__ == "InputProvider" @@ -140,9 +166,6 @@ def test_root_exports_first_class_integrated_primitives() -> None: assert extended_data.SyncOperation is secrets.SyncOperation assert extended_data.OutputFormat is secrets.OutputFormat assert callable(extended_data.directed_inputs) - assert extended_data.number_to_words(42) == "forty-two" - assert extended_data.to_roman(42) == "XLII" - assert extended_data.normalize_data_encoding("YML") == "yaml" assert callable(extended_data.read_data_file) assert callable(extended_data.get_connector) assert callable(extended_data.list_connector_info) diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index 269e74c..64aa6d2 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -2,6 +2,7 @@ from __future__ import annotations +import ast import importlib.util import os import py_compile @@ -13,6 +14,8 @@ import pytest +from extended_data import primitives + REPO_ROOT = Path(__file__).resolve().parents[2] SAFE_EXAMPLES = [ @@ -51,6 +54,18 @@ "sanitize_key", "truncate", ) +ROOT_ALLOWED_PRIMITIVE_IMPORTS = ( + "decode_hcl2", + "decode_json", + "decode_toml", + "decode_yaml", + "encode_hcl2", + "encode_json", + "encode_toml", + "encode_yaml", + "is_yaml_data", +) +ROOT_DISALLOWED_TIER1_IMPORTS = tuple(sorted(set(primitives.__all__) - set(ROOT_ALLOWED_PRIMITIVE_IMPORTS))) def _readme_usage_snippet() -> str: @@ -121,6 +136,25 @@ def test_basic_core_example_uses_container_first_operations() -> None: assert offenders == [] +def test_examples_do_not_import_tier1_utilities_from_root() -> None: + """Examples should import pure Tier 1 utilities from extended_data.primitives.""" + offenders: list[str] = [] + + for example_path in ALL_EXAMPLES: + text = (REPO_ROOT / example_path).read_text(encoding="utf-8") + tree = ast.parse(text) + for node in ast.walk(tree): + if not isinstance(node, ast.ImportFrom) or node.module != "extended_data": + continue + + imported_names = {alias.name for alias in node.names} + disallowed = sorted(imported_names.intersection(ROOT_DISALLOWED_TIER1_IMPORTS)) + if disallowed: + offenders.append(f"{example_path}: {', '.join(disallowed)}") + + assert offenders == [] + + @pytest.mark.parametrize("example_path", ALL_EXAMPLES) def test_example_compiles(example_path: str, tmp_path: Path) -> None: """Every example should at least remain syntactically valid.""" From e39ddd11fd091f4caece1b25db988aca32ad9e61 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:11:58 -0500 Subject: [PATCH 174/287] feat: remove primitive codecs from root exports --- README.md | 4 ++-- docs/package-surface.md | 18 ++++++++---------- examples/core/composed_workflows.py | 3 +-- examples/core/serialization.py | 2 ++ src/extended_data/__init__.py | 13 ------------- tests/core/test_package_surface.py | 24 +++--------------------- tests/core/test_workflows.py | 3 +-- tests/examples/test_safe_examples.py | 13 +------------ 8 files changed, 18 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 661e5b6..e7ad528 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,8 @@ CrewAI releases pull vulnerable `chromadb` versions transitively. ## Usage ```python -from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file, decode_json, encode_yaml -from extended_data.primitives import number_to_words +from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file +from extended_data.primitives import decode_json, encode_yaml, number_to_words logger = Logging(logger_name="example") inputs = InputProvider(inputs={"GITHUB_OWNER": "jbcom"}, from_environment=False) diff --git a/docs/package-surface.md b/docs/package-surface.md index 15de969..78fbee9 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -25,12 +25,10 @@ from extended_data import ( SecretsConnector, SlackConnector, SyncOptions, - decode_json, extend_data, - encode_yaml, to_builtin, ) -from extended_data.primitives import normalize_data_encoding, number_to_words +from extended_data.primitives import decode_json, encode_yaml, normalize_data_encoding, number_to_words ``` ## Tiers @@ -46,10 +44,10 @@ from extended_data.primitives import normalize_data_encoding, number_to_words - Tier 3 processors use the first two tiers to handle files, imports, exports, inputs, API data, vendor integrations, and workflows. -Clean major-version primitive names live under `extended_data.primitives` and -prefer explicit Python words over inherited helper spellings: use -`bytes_to_string()` and the `string_to_*()` conversion family -(`string_to_bool()`, `string_to_int()`, `string_to_float()`, +Clean major-version primitive names, including JSON/YAML/TOML/HCL codecs, live +under `extended_data.primitives` and prefer explicit Python words over +inherited helper spellings: use `bytes_to_string()` and the `string_to_*()` +conversion family (`string_to_bool()`, `string_to_int()`, `string_to_float()`, `string_to_path()`, `string_to_date()`, `string_to_datetime()`, and `string_to_time()`). The old `bytestostr` and `strto*` helper names are intentionally not preserved, and pure utility functions are not re-exported @@ -58,9 +56,9 @@ Tier 1 public exports stay function-oriented; use `get_default_dict()` when a workflow needs nested or sorted default mappings rather than importing the internal sorted-default mapping helper class. -Direct JSON, YAML, TOML, and HCL decode failures raise `DataDecodeError` with -format and position context while preserving the parser exception as the cause; -the public error message does not echo the raw payload. +Direct JSON, YAML, TOML, and HCL primitive decode failures raise +`DataDecodeError` with format and position context while preserving the parser +exception as the cause; the public error message does not echo the raw payload. ```python name = ExtendedString("API Response Value").to_snake_case() diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index 260bcbc..a49e175 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -16,12 +16,11 @@ ExtendedList, base64_decode, base64_encode, - decode_hcl2, - encode_hcl2, read_data_file, read_file, write_file, ) +from extended_data.primitives import decode_hcl2, encode_hcl2 from extended_data.primitives.formats.yaml import YamlTagged diff --git a/examples/core/serialization.py b/examples/core/serialization.py index a8a03e2..44f338b 100755 --- a/examples/core/serialization.py +++ b/examples/core/serialization.py @@ -10,6 +10,8 @@ from extended_data import ( base64_decode, base64_encode, +) +from extended_data.primitives import ( decode_hcl2, decode_json, decode_toml, diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 68d0252..77e9188 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -45,10 +45,6 @@ ) from extended_data.io.importers import unwrap_raw_data_from_import from extended_data.primitives.formats.errors import DataDecodeError -from extended_data.primitives.formats.hcl import decode_hcl2, encode_hcl2 -from extended_data.primitives.formats.json import decode_json, encode_json -from extended_data.primitives.formats.toml import decode_toml, encode_toml -from extended_data.primitives.formats.yaml import decode_yaml, encode_yaml, is_yaml_data from extended_data.workflows import DataWorkflow, StepLike, WorkflowAction, WorkflowResult, WorkflowStep @@ -162,16 +158,8 @@ def __getattr__(name: str) -> Any: "base64_encode", "clone_repository_to_temp", "decode_file", - "decode_hcl2", - "decode_json", - "decode_toml", - "decode_yaml", "delete_file", "directed_inputs", - "encode_hcl2", - "encode_json", - "encode_toml", - "encode_yaml", "extend_data", "file_path_depth", "file_path_rel_to_root", @@ -184,7 +172,6 @@ def __getattr__(name: str) -> Any: "get_tld", "input_config", "is_url", - "is_yaml_data", "list_connector_info", "list_connectors", "make_raw_data_export_safe", diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 01671b1..61c211e 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -75,27 +75,9 @@ def test_root_exports_tiered_data_surfaces() -> None: assert set(module.__all__) <= set(extended_data.__all__), module.__name__ -def test_tier1_utility_functions_are_not_root_exports() -> None: - """Pure utility functions should be imported from extended_data.primitives.""" - tier1_utility_names = ( - "all_non_empty", - "any_non_empty", - "deep_merge", - "filter_list", - "filter_map", - "flatten_list", - "flatten_map", - "is_nothing", - "normalize_data_encoding", - "number_to_words", - "sanitize_key", - "string_to_bool", - "to_roman", - "truncate", - "unhump_map", - ) - - for name in tier1_utility_names: +def test_tier1_primitives_are_not_root_exports() -> None: + """Tier 1 functions and codecs should be imported from extended_data.primitives.""" + for name in primitives.__all__: assert hasattr(primitives, name), name assert not hasattr(extended_data, name), name assert name not in extended_data.__all__ diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 5ee3126..dbc452c 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -18,11 +18,10 @@ WorkflowStep, base64_decode, base64_encode, - decode_hcl2, - encode_hcl2, read_data_file, write_file, ) +from extended_data.primitives import decode_hcl2, encode_hcl2 from extended_data.primitives.formats.yaml import YamlTagged diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index 64aa6d2..aa47abd 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -54,18 +54,7 @@ "sanitize_key", "truncate", ) -ROOT_ALLOWED_PRIMITIVE_IMPORTS = ( - "decode_hcl2", - "decode_json", - "decode_toml", - "decode_yaml", - "encode_hcl2", - "encode_json", - "encode_toml", - "encode_yaml", - "is_yaml_data", -) -ROOT_DISALLOWED_TIER1_IMPORTS = tuple(sorted(set(primitives.__all__) - set(ROOT_ALLOWED_PRIMITIVE_IMPORTS))) +ROOT_DISALLOWED_TIER1_IMPORTS = tuple(sorted(primitives.__all__)) def _readme_usage_snippet() -> str: From 92d465d9889d0691ff022e2a3964ef3ad3da442a Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:17:02 -0500 Subject: [PATCH 175/287] docs: name secrets sync integration roles precisely --- README.md | 8 +++--- docs/package-surface.md | 9 ++++--- .../connectors/secrets/__init__.py | 25 ++++++++++--------- tests/core/test_release_hygiene.py | 22 ++++++++++++++++ 4 files changed, 45 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index e7ad528..caa2767 100644 --- a/README.md +++ b/README.md @@ -143,9 +143,11 @@ secret leak by default. Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. -The `secrets` connector integrates with the standalone `secretsync` CLI or -native bindings. CLI fallback expects `secretsync pipeline --output json` to -return the stable pipeline result envelope used by this package. +The `secrets` connector integrates with the standalone SecretSync project +(`jbcom/secrets-sync`) through either the optional native `secretssync` Python +module or the `secretsync` CLI. CLI fallback expects +`secretsync pipeline --output json` to return the stable pipeline result +envelope used by this package. ```python from extended_data import SecretsConnector, SyncOptions diff --git a/docs/package-surface.md b/docs/package-surface.md index 78fbee9..2866905 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -292,10 +292,11 @@ payload = github.get_repository_file("service.json") assert payload["service"]["name"].upper_first() == "Api" ``` -The `secrets` adapter is the Python-facing bridge to the standalone -`secretsync` project. It uses native bindings when present and otherwise falls -back to the CLI, which must emit the stable `secretsync pipeline --output json` -result envelope for both dry-run and apply runs. +The `secrets` adapter is the Python-facing bridge to the standalone SecretSync +project (`jbcom/secrets-sync`). It uses the optional native `secretssync` +Python module when present and otherwise falls back to the `secretsync` CLI, +which must emit the stable `secretsync pipeline --output json` result envelope +for both dry-run and apply runs. Secrets tool factories are exported from `extended_data.secrets`; the duplicate `extended_data.secrets.tools` module path is intentionally not preserved. diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 2449398..fc89b43 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -1,13 +1,14 @@ -"""Secrets Connector - Enterprise-grade secret synchronization. +"""Secrets Connector - enterprise-grade SecretSync integration. -This connector provides Python bindings for secretssync, enabling -enterprise-grade secret synchronization from HashiCorp Vault to -AWS Secrets Manager with two-phase architecture, inheritance, -versioning, and CI/CD integration. +This connector integrates with the standalone SecretSync project +(`jbcom/secrets-sync`), enabling enterprise-grade secret synchronization from +HashiCorp Vault to AWS Secrets Manager with two-phase architecture, +inheritance, versioning, and CI/CD integration. The connector can operate in two modes: -1. Native mode: Uses gopy-generated Python bindings for maximum performance -2. CLI mode: Falls back to subprocess calls if bindings aren't available +1. Native mode: Uses the optional gopy-generated `secretssync` Python module +2. CLI mode: Falls back to the `secretsync` subprocess CLI when bindings are + not available Example usage: from extended_data.connectors.secrets import SecretsConnector @@ -175,11 +176,11 @@ def to_dict(self) -> ExtendedDict: class SecretsConnector(VendorConnectorBase): - """Enterprise-grade secret synchronization connector. + """Enterprise-grade SecretSync connector. - This connector wraps the secretssync Go library, providing Python - bindings for enterprise-grade secret synchronization between - HashiCorp Vault and AWS Secrets Manager. + This connector wraps the standalone SecretSync project + (`jbcom/secrets-sync`) through either the optional native `secretssync` + Python bindings or the `secretsync` CLI. Features: - Two-phase pipeline architecture (merge → sync) @@ -217,7 +218,7 @@ def __init__( self.logger.info(f"SecretsConnector initialized in {mode} mode") def _find_cli(self) -> str | None: - """Find the secretsync CLI binary.""" + """Find the SecretSync `secretsync` CLI binary.""" # Check common locations candidates = [ "secretsync", diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 019dd8e..6a2bcf9 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -25,6 +25,10 @@ "vendor_connectors", ) REMOVED_PUBLIC_KEYWORDS = ("unhump_results",) +SECRETSSYNC_PROJECT_PATTERNS = ( + re.compile(r"\bsecretssync\s+(?:Go\s+)?(?:project|library|repo|repository|CLI|connector|bindings?)\b", re.IGNORECASE), + re.compile(r"\b(?:project|library|repo|repository|CLI|connector|bindings?)\s+secretssync\b", re.IGNORECASE), +) def test_workflow_actions_are_pinned_to_exact_shas() -> None: @@ -101,3 +105,21 @@ def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: offenders.append(f"{path.relative_to(REPO_ROOT)}: {keyword}") assert offenders == [] + + +def test_public_guidance_names_secrets_sync_roles_precisely() -> None: + """Use SecretSync for the product and reserve exact names for CLI/native modules.""" + offenders: list[str] = [] + paths = [REPO_ROOT / "README.md"] + paths.extend(path for root in (REPO_ROOT / "docs", REPO_ROOT / "src") for path in root.rglob("*")) + + for path in sorted(path for path in paths if path.is_file()): + if path.suffix in {".pyc", ".png"}: + continue + text = path.read_text(encoding="utf-8") + for pattern in SECRETSSYNC_PROJECT_PATTERNS: + if pattern.search(text): + offenders.append(str(path.relative_to(REPO_ROOT))) + break + + assert offenders == [] From 8c0f30a38a8c5df5123862ceec127e8b0e8814e8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:30:33 -0500 Subject: [PATCH 176/287] test: guard root lazy export terminology --- src/extended_data/__init__.py | 2 +- tests/core/test_package_surface.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 77e9188..6565934 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -108,7 +108,7 @@ def __getattr__(name: str) -> Any: - """Lazily expose integrated subpackage primitives at the package root.""" + """Lazily expose integrated adapters and processors at the package root.""" if name not in _LAZY_EXPORTS: raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 61c211e..b29de94 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -83,6 +83,14 @@ def test_tier1_primitives_are_not_root_exports() -> None: assert name not in extended_data.__all__ +def test_root_lazy_exports_do_not_describe_tier1_primitives() -> None: + """The package root lazy loader should not present Tier 1 primitives as root exports.""" + lazy_loader_docs = extended_data.__getattr__.__doc__ or "" + + assert "primitives" not in lazy_loader_docs + assert "adapters and processors" in lazy_loader_docs + + def test_clean_major_version_public_names() -> None: """The public surface uses integrated extended-data names.""" assert inputs.InputProvider.__name__ == "InputProvider" @@ -132,7 +140,7 @@ def test_old_monorepo_import_namespaces_are_not_preserved() -> None: assert util.find_spec(namespace) is None -def test_root_exports_first_class_integrated_primitives() -> None: +def test_root_exports_first_class_integrated_surfaces() -> None: """Inputs, logging, and connector fabric are available from the root package.""" assert extended_data.DataDecodeError.__name__ == "DataDecodeError" assert extended_data.DataWorkflow.__name__ == "DataWorkflow" From ebb51e6cf68f36e1d6749cd8fbe6ef0a3d4dbfe7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:38:40 -0500 Subject: [PATCH 177/287] test: guard typed package marker --- tests/core/test_release_hygiene.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 6a2bcf9..70d93f6 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -4,8 +4,11 @@ import re +from importlib import resources from pathlib import Path +import tomlkit + REPO_ROOT = Path(__file__).resolve().parents[2] WORKFLOW_ROOT = REPO_ROOT / ".github" / "workflows" @@ -31,6 +34,10 @@ ) +def _pyproject() -> tomlkit.TOMLDocument: + return tomlkit.parse((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + + def test_workflow_actions_are_pinned_to_exact_shas() -> None: """Remote workflow actions should use immutable action commit SHAs.""" offenders: list[str] = [] @@ -90,6 +97,15 @@ def test_old_package_namespace_shims_do_not_exist() -> None: assert offenders == [] +def test_typed_classifier_has_pep561_marker() -> None: + """The typed package classifier should be backed by a PEP 561 marker.""" + classifiers = _pyproject()["project"]["classifiers"] + + assert "Typing :: Typed" in classifiers + assert (REPO_ROOT / "src" / "extended_data" / "py.typed").is_file() + assert resources.files("extended_data").joinpath("py.typed").is_file() + + def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: """Docs and examples should not keep teaching removed compatibility keywords.""" offenders: list[str] = [] From 10dc968d5add3fe1243817e5c2baa22d91dca0b0 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:45:07 -0500 Subject: [PATCH 178/287] test: guard all runtime extra coverage --- pyproject.toml | 3 +++ tests/core/test_release_hygiene.py | 19 +++++++++++++++++++ uv.lock | 6 ++++++ 3 files changed, 28 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index a98978d..2f0a566 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,11 +142,14 @@ all = [ "PyGithub>=2.9.1", "pyngrok>=8.0.0", "python-graphql-client>=0.4.3", + "pyyaml>=6.0.3", + "rich>=13.7.0,<15.0.0", "sentence-transformers>=5.4.1", "slack-sdk>=3.41.0", "sqlite-vec>=0.1.9", "strands-agents>=1.36.0", "uvicorn>=0.45.0", + "validators>=0.35.0", ] [project.scripts] diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 70d93f6..1e6c4f1 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -32,6 +32,7 @@ re.compile(r"\bsecretssync\s+(?:Go\s+)?(?:project|library|repo|repository|CLI|connector|bindings?)\b", re.IGNORECASE), re.compile(r"\b(?:project|library|repo|repository|CLI|connector|bindings?)\s+secretssync\b", re.IGNORECASE), ) +NON_RUNTIME_EXTRAS = {"all", "dev", "tests", "typing"} def _pyproject() -> tomlkit.TOMLDocument: @@ -106,6 +107,24 @@ def test_typed_classifier_has_pep561_marker() -> None: assert resources.files("extended_data").joinpath("py.typed").is_file() +def test_all_extra_contains_every_runtime_extra_dependency() -> None: + """The broad install target should be the union of runtime feature extras.""" + extras = _pyproject()["project"]["optional-dependencies"] + all_dependencies = {str(dependency) for dependency in extras["all"]} + missing: list[str] = [] + + for extra_name, dependencies in extras.items(): + if extra_name in NON_RUNTIME_EXTRAS: + continue + + for dependency in dependencies: + dependency_text = str(dependency) + if dependency_text not in all_dependencies: + missing.append(f"{extra_name}: {dependency_text}") + + assert missing == [] + + def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: """Docs and examples should not keep teaching removed compatibility keywords.""" offenders: list[str] = [] diff --git a/uv.lock b/uv.lock index 71fafe3..67bd16b 100644 --- a/uv.lock +++ b/uv.lock @@ -883,11 +883,14 @@ all = [ { name = "pygithub" }, { name = "pyngrok" }, { name = "python-graphql-client" }, + { name = "pyyaml" }, + { name = "rich" }, { name = "sentence-transformers" }, { name = "slack-sdk" }, { name = "sqlite-vec" }, { name = "strands-agents" }, { name = "uvicorn" }, + { name = "validators" }, ] anthropic = [ { name = "anthropic" }, @@ -1026,9 +1029,11 @@ requires-dist = [ { name = "python-graphql-client", marker = "extra == 'github'", specifier = ">=0.4.3" }, { name = "python-hcl2", specifier = ">=4.3.4" }, { name = "pyyaml", specifier = ">=6.0.1" }, + { name = "pyyaml", marker = "extra == 'all'", specifier = ">=6.0.3" }, { name = "pyyaml", marker = "extra == 'secrets'", specifier = ">=6.0.3" }, { name = "requests", specifier = ">=2.33.1" }, { name = "rich", specifier = ">=13.7.1" }, + { name = "rich", marker = "extra == 'all'", specifier = ">=13.7.0,<15.0.0" }, { name = "rich", marker = "extra == 'meshy'", specifier = ">=13.7.0,<15.0.0" }, { name = "ruamel-yaml", specifier = ">=0.18.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" }, @@ -1051,6 +1056,7 @@ requires-dist = [ { name = "uvicorn", marker = "extra == 'all'", specifier = ">=0.45.0" }, { name = "uvicorn", marker = "extra == 'webhooks'", specifier = ">=0.45.0" }, { name = "validators", specifier = ">=0.22.0" }, + { name = "validators", marker = "extra == 'all'", specifier = ">=0.35.0" }, { name = "validators", marker = "extra == 'meshy'", specifier = ">=0.35.0" }, { name = "wrapt", specifier = ">=1.16.0" }, ] From 5615c0b57f028f17ed2ba4fc654752133a9d7bcf Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:47:02 -0500 Subject: [PATCH 179/287] test: guard documented install extras --- tests/core/test_release_hygiene.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 1e6c4f1..c1ad3e4 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -32,6 +32,7 @@ re.compile(r"\bsecretssync\s+(?:Go\s+)?(?:project|library|repo|repository|CLI|connector|bindings?)\b", re.IGNORECASE), re.compile(r"\b(?:project|library|repo|repository|CLI|connector|bindings?)\s+secretssync\b", re.IGNORECASE), ) +EXTRA_REFERENCE_RE = re.compile(r"extended-data\[([^\]\n]+)\]") NON_RUNTIME_EXTRAS = {"all", "dev", "tests", "typing"} @@ -125,6 +126,31 @@ def test_all_extra_contains_every_runtime_extra_dependency() -> None: assert missing == [] +def test_public_install_guidance_names_known_extras() -> None: + """Static install examples should not teach extras that pyproject does not publish.""" + known_extras = set(_pyproject()["project"]["optional-dependencies"]) + offenders: list[str] = [] + paths = [REPO_ROOT / "README.md"] + paths.extend(path for root in (REPO_ROOT / "docs", REPO_ROOT / "examples", REPO_ROOT / "src") for path in root.rglob("*")) + + for path in sorted(path for path in paths if path.is_file()): + if path.suffix in {".pyc", ".png"}: + continue + + relative_path = path.relative_to(REPO_ROOT) + for line_number, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): + for match in EXTRA_REFERENCE_RE.finditer(line): + extra_group = match.group(1) + if "..." in extra_group or "{" in extra_group or "}" in extra_group: + continue + + for extra in (part.strip() for part in extra_group.split(",")): + if extra and extra not in known_extras: + offenders.append(f"{relative_path}:{line_number}: {extra} in extended-data[{extra_group}]") + + assert offenders == [] + + def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: """Docs and examples should not keep teaching removed compatibility keywords.""" offenders: list[str] = [] From 29017f2b4bfed72511225e3b47f48ea5d07e5514 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:48:25 -0500 Subject: [PATCH 180/287] test: guard console script targets --- tests/core/test_release_hygiene.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index c1ad3e4..1a5ce22 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -4,7 +4,7 @@ import re -from importlib import resources +from importlib import import_module, resources from pathlib import Path import tomlkit @@ -151,6 +151,30 @@ def test_public_install_guidance_names_known_extras() -> None: assert offenders == [] +def test_project_scripts_point_to_callables() -> None: + """Console-script metadata should resolve to importable callables.""" + scripts = _pyproject()["project"]["scripts"] + offenders: list[str] = [] + + for script_name, target in scripts.items(): + module_name, separator, attribute_name = str(target).partition(":") + if not separator: + offenders.append(f"{script_name}: {target} has no attribute separator") + continue + + try: + module = import_module(module_name) + except Exception as exc: + offenders.append(f"{script_name}: cannot import {module_name}: {exc}") + continue + + entry_point = getattr(module, attribute_name, None) + if not callable(entry_point): + offenders.append(f"{script_name}: {target} is not callable") + + assert offenders == [] + + def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: """Docs and examples should not keep teaching removed compatibility keywords.""" offenders: list[str] = [] From 97895abf3841fff167f8f59c64c27f30008aff6b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 14:58:17 -0500 Subject: [PATCH 181/287] test: guard package docs and examples --- tests/core/test_release_hygiene.py | 22 ++++++++++++++++++++++ tests/examples/test_safe_examples.py | 11 +++++++++++ 2 files changed, 33 insertions(+) diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 1a5ce22..2402dd6 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -34,6 +34,7 @@ ) EXTRA_REFERENCE_RE = re.compile(r"extended-data\[([^\]\n]+)\]") NON_RUNTIME_EXTRAS = {"all", "dev", "tests", "typing"} +PACKAGE_SHAPE_RE = re.compile(r"^ ([a-z_]+)/\s+") def _pyproject() -> tomlkit.TOMLDocument: @@ -175,6 +176,27 @@ def test_project_scripts_point_to_callables() -> None: assert offenders == [] +def test_readme_package_shape_matches_public_subpackages() -> None: + """The documented tier layout should match the actual top-level package directories.""" + source_root = REPO_ROOT / "src" / "extended_data" + actual_subpackages = { + path.name + for path in source_root.iterdir() + if path.is_dir() and not path.name.startswith("__") and (path / "__init__.py").is_file() + } + readme = (REPO_ROOT / "README.md").read_text(encoding="utf-8") + try: + package_shape = readme.split("## Package Shape", 1)[1].split("```", 2)[1] + except IndexError as exc: + raise AssertionError("README.md must document the package shape in a fenced block") from exc + + documented_subpackages = { + match.group(1) for line in package_shape.splitlines() if (match := PACKAGE_SHAPE_RE.match(line)) + } + + assert documented_subpackages == actual_subpackages + + def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: """Docs and examples should not keep teaching removed compatibility keywords.""" offenders: list[str] = [] diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index aa47abd..60a4a4d 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -65,6 +65,17 @@ def _readme_usage_snippet() -> str: return match.group("code") +def test_example_inventory_is_complete() -> None: + """Every Python example should be explicitly classified for test coverage.""" + discovered = sorted( + str(path.relative_to(REPO_ROOT)) + for path in (REPO_ROOT / "examples").rglob("*.py") + if path.name != "__init__.py" + ) + + assert sorted(ALL_EXAMPLES) == discovered + + @pytest.mark.parametrize("example_path", SAFE_EXAMPLES) def test_safe_example_runs(example_path: str, tmp_path: Path) -> None: """Keep runnable examples aligned with the installed package surface.""" From 28e826da9e7f4a204539f9b833a77f96160fa462 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 15:05:46 -0500 Subject: [PATCH 182/287] docs: add publishing checklist guard --- docs/PUBLISHING_CHECKLIST.md | 74 ++++++++++++++++++++++++++++++ tests/core/test_release_hygiene.py | 49 ++++++++++++++++++-- 2 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 docs/PUBLISHING_CHECKLIST.md diff --git a/docs/PUBLISHING_CHECKLIST.md b/docs/PUBLISHING_CHECKLIST.md new file mode 100644 index 0000000..fdd5203 --- /dev/null +++ b/docs/PUBLISHING_CHECKLIST.md @@ -0,0 +1,74 @@ +# Publishing Checklist + +`extended-data` releases are automated from `main` with release-please and +PyPI trusted publishing. Do not hand-edit versions, changelog entries, release +tags, or GitHub releases during the normal release path. + +## Release Model + +- `release-please` owns version detection, changelog updates, release PRs, and + Git tags. +- The package name is `extended-data`; PyPI publication uses the tighter + `extended-data` distribution name. +- The release workflow publishes only after release-please reports that a + release was created. +- The PyPI job uses OIDC trusted publishing through `uv publish`; no PyPI token + should be stored in repository secrets for the normal path. + +## Maintainer Preflight + +Run these before merging a release PR or manually dispatching release workflow +diagnostics: + +```bash +uv sync --extra tests --extra typing +uv run --with pip-audit==2.10.0 pip-audit --skip-editable +uv run ruff check . +uv run mypy src/extended_data +uv run pytest +uv build +``` + +## Workflow Hygiene + +- Keep `.github/workflows/*.yml` actions pinned to exact commit SHAs. +- Update adjacent version comments when refreshing action SHAs. +- Use `gh` to verify latest stable action releases before changing pins. +- Keep top-level `permissions: {}` and grant only job-scoped permissions. + +Current workflow action pins: + +| Action | Stable version | Commit SHA | +| --- | --- | --- | +| `actions/checkout` | `v6.0.3` | `df4cb1c069e1874edd31b4311f1884172cec0e10` | +| `actions/setup-python` | `v6.2.0` | `a309ff8b426b58ec0e2a45f0f869d46889d02405` | +| `astral-sh/setup-uv` | `v8.2.0` | `fac544c07dec837d0ccb6301d7b5580bf5edae39` | +| `googleapis/release-please-action` | `v5.0.0` | `45996ed1f6d02564a971a2fa1b5860e934307cf7` | + +## Publishing Flow + +1. Land normal feature, fix, docs, and maintenance commits using Conventional + Commit prefixes. +2. Let the release workflow open or update the release-please PR. +3. Review the release PR for the expected changelog and manifest updates. +4. Merge the release PR. +5. Confirm the release workflow created the GitHub release and published to + PyPI through trusted publishing. +6. Verify the package can be installed from PyPI: + +```bash +python -m pip install extended-data +python -c "import extended_data; print(extended_data.__version__)" +``` + +## Manual Repairs + +Manual tags or PyPI uploads are repair paths, not the release process. If a +release workflow fails after release-please creates a tag: + +1. Keep the failed tag intact while diagnosing unless the release is proven + unrecoverable. +2. Prefer rerunning the failed workflow job. +3. If a bad GitHub release was published, delete only the bad artifacts needed + for repair. +4. Document the repair in the PR or release notes. diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 2402dd6..fbc3864 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -12,8 +12,10 @@ REPO_ROOT = Path(__file__).resolve().parents[2] WORKFLOW_ROOT = REPO_ROOT / ".github" / "workflows" -ACTION_REF_RE = re.compile(r"^\s*(?:-\s*)?uses:\s*([^#\s]+)") +ACTION_REF_WITH_COMMENT_RE = re.compile(r"^\s*(?:-\s*)?uses:\s*([^#\s]+)(?:\s+#\s*(\S+))?") PINNED_SHA_RE = re.compile(r"^[0-9a-f]{40}$") +ACTION_VERSION_COMMENT_RE = re.compile(r"^v\d+\.\d+\.\d+$") +PIN_TABLE_RE = re.compile(r"^\|\s*`([^`]+)`\s*\|\s*`([^`]+)`\s*\|\s*`([0-9a-f]{40})`\s*\|$") PUBLIC_TEXT_ROOTS = ( REPO_ROOT / "src", REPO_ROOT / "docs", @@ -41,13 +43,13 @@ def _pyproject() -> tomlkit.TOMLDocument: return tomlkit.parse((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) -def test_workflow_actions_are_pinned_to_exact_shas() -> None: - """Remote workflow actions should use immutable action commit SHAs.""" +def _workflow_action_pins() -> dict[str, tuple[str, str]]: + pins: dict[str, tuple[str, str]] = {} offenders: list[str] = [] for path in sorted(WORKFLOW_ROOT.glob("*.yml")) + sorted(WORKFLOW_ROOT.glob("*.yaml")): for line_number, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): - match = ACTION_REF_RE.match(line) + match = ACTION_REF_WITH_COMMENT_RE.match(line) if match is None: continue @@ -55,12 +57,49 @@ def test_workflow_actions_are_pinned_to_exact_shas() -> None: if uses.startswith(("./", "docker://")): continue - _, separator, ref = uses.rpartition("@") + action, separator, ref = uses.rpartition("@") + version = match.group(2) if not separator or PINNED_SHA_RE.fullmatch(ref) is None: relative_path = path.relative_to(REPO_ROOT) offenders.append(f"{relative_path}:{line_number}: {uses}") + continue + if version is None or ACTION_VERSION_COMMENT_RE.fullmatch(version) is None: + relative_path = path.relative_to(REPO_ROOT) + offenders.append(f"{relative_path}:{line_number}: missing stable version comment for {uses}") + continue + + existing = pins.setdefault(action, (version, ref)) + if existing != (version, ref): + relative_path = path.relative_to(REPO_ROOT) + offenders.append(f"{relative_path}:{line_number}: conflicting pin for {action}") assert offenders == [] + return pins + + +def _publishing_checklist_pins() -> dict[str, tuple[str, str]]: + pins: dict[str, tuple[str, str]] = {} + checklist = (REPO_ROOT / "docs" / "PUBLISHING_CHECKLIST.md").read_text(encoding="utf-8") + + for line in checklist.splitlines(): + match = PIN_TABLE_RE.match(line.strip()) + if match is None: + continue + action, version, ref = match.groups() + pins[action] = (version, ref) + + assert pins, "docs/PUBLISHING_CHECKLIST.md must list current workflow action pins" + return pins + + +def test_workflow_actions_are_pinned_to_exact_shas() -> None: + """Remote workflow actions should use immutable action commit SHAs.""" + assert _workflow_action_pins() + + +def test_publishing_checklist_matches_workflow_action_pins() -> None: + """The release checklist should document the exact workflow action pins.""" + assert _publishing_checklist_pins() == _workflow_action_pins() def test_public_text_does_not_reference_old_project_origins() -> None: From 37b16e39c38026e58e567f5f9017f2f35c05a34f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 15:10:02 -0500 Subject: [PATCH 183/287] test: guard unpatched runtime vulnerabilities --- tests/core/test_release_hygiene.py | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index fbc3864..f8d86a4 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -37,12 +37,29 @@ EXTRA_REFERENCE_RE = re.compile(r"extended-data\[([^\]\n]+)\]") NON_RUNTIME_EXTRAS = {"all", "dev", "tests", "typing"} PACKAGE_SHAPE_RE = re.compile(r"^ ([a-z_]+)/\s+") +UNPATCHED_RUNTIME_VULNERABILITIES = { + "chromadb": "GHSA-f4j7-r4q5-qw2c", +} def _pyproject() -> tomlkit.TOMLDocument: return tomlkit.parse((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) +def _uv_lock() -> tomlkit.TOMLDocument: + return tomlkit.parse((REPO_ROOT / "uv.lock").read_text(encoding="utf-8")) + + +def _requirement_name(requirement: str) -> str: + name_chars: list[str] = [] + for char in requirement: + if char.isalnum() or char in {"-", "_", "."}: + name_chars.append(char) + continue + break + return "".join(name_chars).lower().replace("_", "-") + + def _workflow_action_pins() -> dict[str, tuple[str, str]]: pins: dict[str, tuple[str, str]] = {} offenders: list[str] = [] @@ -166,6 +183,31 @@ def test_all_extra_contains_every_runtime_extra_dependency() -> None: assert missing == [] +def test_dependency_manifests_do_not_lock_unpatched_runtime_vulnerabilities() -> None: + """Runtime dependency manifests should not carry known unpatched vulnerable packages.""" + vulnerable = set(UNPATCHED_RUNTIME_VULNERABILITIES) + offenders: list[str] = [] + project = _pyproject()["project"] + + for dependency in project["dependencies"]: + name = _requirement_name(str(dependency)) + if name in vulnerable: + offenders.append(f"pyproject.toml dependency {dependency}: {UNPATCHED_RUNTIME_VULNERABILITIES[name]}") + + for extra_name, dependencies in project["optional-dependencies"].items(): + for dependency in dependencies: + name = _requirement_name(str(dependency)) + if name in vulnerable: + offenders.append(f"pyproject.toml extra {extra_name} dependency {dependency}: {UNPATCHED_RUNTIME_VULNERABILITIES[name]}") + + for package in _uv_lock()["package"]: + name = str(package["name"]).lower().replace("_", "-") + if name in vulnerable: + offenders.append(f"uv.lock package {name}: {UNPATCHED_RUNTIME_VULNERABILITIES[name]}") + + assert offenders == [] + + def test_public_install_guidance_names_known_extras() -> None: """Static install examples should not teach extras that pyproject does not publish.""" known_extras = set(_pyproject()["project"]["optional-dependencies"]) From 0b0d1b981464da807f5137eba49bae32a38f2c09 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 15:20:19 -0500 Subject: [PATCH 184/287] test: guard trusted publishing release path --- tests/core/test_release_hygiene.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index f8d86a4..edd1943 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -119,6 +119,24 @@ def test_publishing_checklist_matches_workflow_action_pins() -> None: assert _publishing_checklist_pins() == _workflow_action_pins() +def test_release_workflow_uses_pypi_trusted_publishing() -> None: + """Publishing should use PyPI trusted publishing instead of repository tokens.""" + release_workflow = (WORKFLOW_ROOT / "release.yml").read_text(encoding="utf-8") + forbidden_token_markers = ( + "PYPI_API_TOKEN", + "PYPI_TOKEN", + "pypi-token", + "pypi_token", + "__token__", + "secrets.PYPI", + ) + + assert "id-token: write" in release_workflow + assert "uv publish --trusted-publishing always" in release_workflow + assert "uv publish" in release_workflow + assert all(marker not in release_workflow for marker in forbidden_token_markers) + + def test_public_text_does_not_reference_old_project_origins() -> None: """Public code/docs should describe current Extended Data surfaces, not origin packages.""" offenders: list[str] = [] From 55d889b47dbe87fc7311e91db9b896a06f45afca Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 15:23:34 -0500 Subject: [PATCH 185/287] test: compile documented python snippets --- tests/examples/test_safe_examples.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index 60a4a4d..e0f55bf 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -55,6 +55,7 @@ "truncate", ) ROOT_DISALLOWED_TIER1_IMPORTS = tuple(sorted(primitives.__all__)) +PYTHON_MARKDOWN_BLOCK_RE = re.compile(r"```python\n(?P.*?)\n```", re.DOTALL) def _readme_usage_snippet() -> str: @@ -113,6 +114,23 @@ def test_readme_usage_snippet_runs(tmp_path: Path) -> None: assert result.returncode == 0, f"README usage snippet failed\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" +def test_markdown_python_snippets_compile() -> None: + """Documentation snippets may be conceptual, but they should remain valid Python.""" + markdown_paths = [REPO_ROOT / "README.md", *(REPO_ROOT / "docs").rglob("*.md")] + offenders: list[str] = [] + + for path in sorted(markdown_paths): + text = path.read_text(encoding="utf-8") + for index, match in enumerate(PYTHON_MARKDOWN_BLOCK_RE.finditer(text), start=1): + code = match.group("code") + try: + compile(code, f"{path.relative_to(REPO_ROOT)}#python-block-{index}", "exec") + except SyntaxError as exc: + offenders.append(f"{path.relative_to(REPO_ROOT)} block {index}: {exc}") + + assert offenders == [] + + def test_examples_do_not_document_stale_command_paths() -> None: """Example command snippets should point at the current directory layout.""" offenders: list[str] = [] From 31a630a300f3cb348d03e782f0a8b5e0304be79a Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 15:27:10 -0500 Subject: [PATCH 186/287] docs: document all runtime extras --- README.md | 7 +++++++ docs/package-surface.md | 24 ++++++++++++++++++++++++ tests/core/test_release_hygiene.py | 20 ++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/README.md b/README.md index caa2767..8154d85 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,18 @@ Optional integrations are installed by feature: ```bash pip install "extended-data[aws,github,vault]" pip install "extended-data[google,slack,zoom]" +pip install "extended-data[anthropic,cursor]" pip install "extended-data[ai]" # LangChain, MCP, and Strands +pip install "extended-data[langchain,mcp,strands]" pip install "extended-data[meshy,mcp]" +pip install "extended-data[meshy,vector,webhooks]" pip install "extended-data[secrets]" ``` +Published runtime extras are `anthropic`, `aws`, `cursor`, `github`, `google`, +`langchain`, `mcp`, `meshy`, `secrets`, `slack`, `strands`, `vault`, `vector`, +`webhooks`, `zoom`, and aggregate `ai`. + CrewAI adapters remain available when `crewai` is installed independently, but `extended-data` intentionally does not publish a CrewAI extra while current CrewAI releases pull vulnerable `chromadb` versions transitively. diff --git a/docs/package-surface.md b/docs/package-surface.md index 2866905..afec02b 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -336,10 +336,34 @@ Install only the vendor or AI layers you need: ```bash pip install "extended-data[aws,github,vault]" pip install "extended-data[google,slack,zoom]" +pip install "extended-data[anthropic,cursor]" pip install "extended-data[ai]" # LangChain, MCP, and Strands +pip install "extended-data[langchain,mcp,strands]" pip install "extended-data[meshy,mcp]" +pip install "extended-data[meshy,vector,webhooks]" ``` +Published runtime extras: + +| Extra | Purpose | +| --- | --- | +| `extended-data[anthropic]` | Anthropic API connector and tools | +| `extended-data[aws]` | AWS connector operations | +| `extended-data[cursor]` | Cursor connector helpers | +| `extended-data[github]` | GitHub connector operations | +| `extended-data[google]` | Google Workspace, Cloud, Billing, and services | +| `extended-data[langchain]` | LangChain tool adapters | +| `extended-data[mcp]` | MCP server bridge | +| `extended-data[meshy]` | Meshy 3D asset connector | +| `extended-data[secrets]` | SecretSync Python bridge dependencies | +| `extended-data[slack]` | Slack connector operations | +| `extended-data[strands]` | Strands tool adapters | +| `extended-data[vault]` | Vault connector operations | +| `extended-data[vector]` | Local vector search for generated asset metadata | +| `extended-data[webhooks]` | Webhook listener support | +| `extended-data[zoom]` | Zoom connector operations | +| `extended-data[ai]` | Aggregate LangChain, MCP, and Strands install target | + CrewAI tool adapters are still importable when users install `crewai` directly, but `extended-data` does not expose a CrewAI extra while current CrewAI dependency trees pull vulnerable `chromadb` releases. diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index edd1943..04d05b0 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -251,6 +251,26 @@ def test_public_install_guidance_names_known_extras() -> None: assert offenders == [] +def test_public_install_guidance_documents_every_runtime_extra() -> None: + """Every runtime optional extra should be discoverable from public install guidance.""" + runtime_extras = set(_pyproject()["project"]["optional-dependencies"]) - NON_RUNTIME_EXTRAS + documented_extras: set[str] = set() + text = "\n".join( + [ + (REPO_ROOT / "README.md").read_text(encoding="utf-8"), + (REPO_ROOT / "docs" / "package-surface.md").read_text(encoding="utf-8"), + ], + ) + + for match in EXTRA_REFERENCE_RE.finditer(text): + extra_group = match.group(1) + if "..." in extra_group or "{" in extra_group or "}" in extra_group: + continue + documented_extras.update(extra.strip() for extra in extra_group.split(",") if extra.strip()) + + assert runtime_extras <= documented_extras + + def test_project_scripts_point_to_callables() -> None: """Console-script metadata should resolve to importable callables.""" scripts = _pyproject()["project"]["scripts"] From 10a577dd70a0df2d2e4201d95579954513a8126e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 15:57:53 -0500 Subject: [PATCH 187/287] fix: make secrets connector cli only --- README.md | 5 +- docs/package-surface.md | 9 +- .../connectors/secrets/__init__.py | 124 ++---------------- tests/connectors/test_secrets.py | 13 +- tests/core/test_release_hygiene.py | 4 +- 5 files changed, 29 insertions(+), 126 deletions(-) diff --git a/README.md b/README.md index 8154d85..f992389 100644 --- a/README.md +++ b/README.md @@ -151,15 +151,14 @@ Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. The `secrets` connector integrates with the standalone SecretSync project -(`jbcom/secrets-sync`) through either the optional native `secretssync` Python -module or the `secretsync` CLI. CLI fallback expects +(`jbcom/secrets-sync`) through the `secretsync` CLI. It expects `secretsync pipeline --output json` to return the stable pipeline result envelope used by this package. ```python from extended_data import SecretsConnector, SyncOptions -result = SecretsConnector(prefer_native=False).run_pipeline( +result = SecretsConnector().run_pipeline( "pipeline.yaml", SyncOptions(dry_run=True), ) diff --git a/docs/package-surface.md b/docs/package-surface.md index afec02b..0971e79 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -293,17 +293,16 @@ assert payload["service"]["name"].upper_first() == "Api" ``` The `secrets` adapter is the Python-facing bridge to the standalone SecretSync -project (`jbcom/secrets-sync`). It uses the optional native `secretssync` -Python module when present and otherwise falls back to the `secretsync` CLI, -which must emit the stable `secretsync pipeline --output json` result envelope -for both dry-run and apply runs. +project (`jbcom/secrets-sync`). It uses the `secretsync` CLI, which must emit +the stable `secretsync pipeline --output json` result envelope for both dry-run +and apply runs. Secrets tool factories are exported from `extended_data.secrets`; the duplicate `extended_data.secrets.tools` module path is intentionally not preserved. ```python from extended_data import SecretsConnector, SyncOptions -result = SecretsConnector(prefer_native=False).run_pipeline( +result = SecretsConnector().run_pipeline( "pipeline.yaml", SyncOptions(dry_run=True), ) diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index fc89b43..53011c3 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -5,10 +5,9 @@ HashiCorp Vault to AWS Secrets Manager with two-phase architecture, inheritance, versioning, and CI/CD integration. -The connector can operate in two modes: -1. Native mode: Uses the optional gopy-generated `secretssync` Python module -2. CLI mode: Falls back to the `secretsync` subprocess CLI when bindings are - not available +The connector executes the supported `secretsync` subprocess CLI contract. +Alternate runtime adapters should be added only after SecretSync publishes a +stable adapter contract. Example usage: from extended_data.connectors.secrets import SecretsConnector @@ -45,16 +44,6 @@ from extended_data.logging import Logging -# Try to import native bindings -_NATIVE_AVAILABLE = False -try: - import secretssync as _native - - _NATIVE_AVAILABLE = True -except ImportError: - _native = None - - class SyncOperation(str, Enum): """Pipeline operation types.""" @@ -102,23 +91,6 @@ class SyncResult: results_json: str = "" diff_output: str = "" - @classmethod - def from_native(cls, native_result: Any) -> SyncResult: - """Create from native gopy result.""" - return cls( - success=native_result.Success, - target_count=native_result.TargetCount, - secrets_processed=native_result.SecretsProcessed, - secrets_added=native_result.SecretsAdded, - secrets_modified=native_result.SecretsModified, - secrets_removed=native_result.SecretsRemoved, - secrets_unchanged=native_result.SecretsUnchanged, - duration_ms=native_result.DurationMs, - error_message=native_result.ErrorMessage, - results_json=native_result.ResultsJSON, - diff_output=native_result.DiffOutput, - ) - @classmethod def from_cli_output(cls, output: dict[str, Any]) -> SyncResult: """Create from CLI JSON output.""" @@ -155,21 +127,6 @@ class ConfigInfo: vault_address: str = "" aws_region: str = "" - @classmethod - def from_native(cls, native_info: Any) -> ConfigInfo: - """Create from native gopy result.""" - return cls( - valid=native_info.Valid, - error_message=native_info.ErrorMessage, - source_count=native_info.SourceCount, - target_count=native_info.TargetCount, - sources=list(native_info.Sources) if native_info.Sources else [], - targets=list(native_info.Targets) if native_info.Targets else [], - has_merge_store=native_info.HasMergeStore, - vault_address=native_info.VaultAddress, - aws_region=native_info.AWSRegion, - ) - def to_dict(self) -> ExtendedDict: """Return an extended config info payload.""" return extend_data(asdict(self)) @@ -179,8 +136,7 @@ class SecretsConnector(VendorConnectorBase): """Enterprise-grade SecretSync connector. This connector wraps the standalone SecretSync project - (`jbcom/secrets-sync`) through either the optional native `secretssync` - Python bindings or the `secretsync` CLI. + (`jbcom/secrets-sync`) through the supported `secretsync` CLI. Features: - Two-phase pipeline architecture (merge → sync) @@ -189,15 +145,13 @@ class SecretsConnector(VendorConnectorBase): - Dry-run with visual diff output - CI/CD integration with exit codes - The connector operates in two modes: - 1. Native mode: Uses gopy-generated bindings (faster) - 2. CLI mode: Falls back to subprocess if bindings unavailable + Alternate runtime adapters are intentionally not accepted here until + SecretSync publishes a stable adapter contract. """ def __init__( self, cli_path: str | None = None, - prefer_native: bool = True, logger: Logging | None = None, **kwargs: Any, ) -> None: @@ -205,17 +159,14 @@ def __init__( Args: cli_path: Path to secretsync CLI binary (for CLI mode) - prefer_native: Prefer native bindings over CLI logger: Logger instance **kwargs: Passed to VendorConnectorBase """ super().__init__(logger=logger, **kwargs) - self._prefer_native = prefer_native and _NATIVE_AVAILABLE self._cli_path = cli_path or self._find_cli() - mode = "native" if self._prefer_native else "CLI" - self.logger.info(f"SecretsConnector initialized in {mode} mode") + self.logger.info("SecretsConnector initialized in CLI mode") def _find_cli(self) -> str | None: """Find the SecretSync `secretsync` CLI binary.""" @@ -233,11 +184,6 @@ def _find_cli(self) -> str | None: return None - @property - def native_available(self) -> bool: - """Check if native bindings are available.""" - return _NATIVE_AVAILABLE - @property def cli_available(self) -> bool: """Check if CLI is available.""" @@ -252,10 +198,7 @@ def validate_config(self, config_path: str) -> ExtendedDict: Returns: Extended validation payload. """ - if self._prefer_native: - is_valid, message = _native.ValidateConfig(config_path) - else: - is_valid, message = self._cli_validate_config(config_path) + is_valid, message = self._cli_validate_config(config_path) return extend_data({ "valid": is_valid, @@ -293,10 +236,6 @@ def get_config_info(self, config_path: str) -> ExtendedDict: Returns: Extended configuration details payload. """ - if self._prefer_native: - native_info = _native.GetConfigInfo(config_path) - return ConfigInfo.from_native(native_info).to_dict() - return self._cli_get_config_info(config_path).to_dict() def _cli_get_config_info(self, config_path: str) -> ConfigInfo: @@ -345,29 +284,8 @@ def run_pipeline( """ options = options or SyncOptions() - if self._prefer_native: - return self._native_run_pipeline(config_path, options).to_dict() - return self._cli_run_pipeline(config_path, options).to_dict() - def _native_run_pipeline( - self, - config_path: str, - options: SyncOptions, - ) -> SyncResult: - """Run pipeline via native bindings.""" - native_opts = _native.DefaultSyncOptions() - native_opts.DryRun = options.dry_run - native_opts.Operation = options.operation.value - native_opts.Targets = ",".join(options.targets) - native_opts.ContinueOnError = options.continue_on_error - native_opts.Parallelism = options.parallelism - native_opts.ComputeDiff = options.compute_diff - native_opts.OutputFormat = options.output_format.value - - native_result = _native.RunPipeline(config_path, native_opts) - return SyncResult.from_native(native_result) - def _cli_run_pipeline( self, config_path: str, @@ -377,7 +295,7 @@ def _cli_run_pipeline( if not self._cli_path: return SyncResult( success=False, - error_message="CLI not available and native bindings not installed", + error_message="secretsync CLI not available", ) # CLI mode always requests JSON so this Python surface can reliably @@ -432,7 +350,7 @@ def _cli_run_pipeline( success=False, error_message=( "Unsupported secretsync JSON output: expected pipeline result envelope. " - "Upgrade secretsync or use native bindings." + "Upgrade secretsync to a version that emits the stable result envelope." ), ) parsed = SyncResult.from_cli_output(output) @@ -475,10 +393,6 @@ def dry_run(self, config_path: str) -> ExtendedDict: Returns: Extended dry-run result payload. """ - if self._prefer_native: - native_result = _native.DryRun(config_path) - return SyncResult.from_native(native_result).to_dict() - options = SyncOptions(dry_run=True, compute_diff=True) return self._cli_run_pipeline(config_path, options).to_dict() @@ -492,10 +406,6 @@ def merge(self, config_path: str, dry_run: bool = False) -> ExtendedDict: Returns: Extended merge result payload. """ - if self._prefer_native: - native_result = _native.Merge(config_path, dry_run) - return SyncResult.from_native(native_result).to_dict() - options = SyncOptions( operation=SyncOperation.MERGE, dry_run=dry_run, @@ -513,10 +423,6 @@ def sync(self, config_path: str, dry_run: bool = False) -> ExtendedDict: Returns: Extended sync result payload. """ - if self._prefer_native: - native_result = _native.Sync(config_path, dry_run) - return SyncResult.from_native(native_result).to_dict() - options = SyncOptions( operation=SyncOperation.SYNC, dry_run=dry_run, @@ -533,11 +439,6 @@ def get_targets(self, config_path: str) -> ExtendedDict: Returns: Extended targets payload. """ - if self._prefer_native: - targets, err = _native.GetTargets(config_path) - target_list = list(targets) if targets else [] - return extend_data({"targets": target_list, "count": len(target_list), "error_message": err}) - info = self.get_config_info(config_path) targets = info.get("targets", []) return extend_data({ @@ -555,11 +456,6 @@ def get_sources(self, config_path: str) -> ExtendedDict: Returns: Extended sources payload. """ - if self._prefer_native: - sources, err = _native.GetSources(config_path) - source_list = list(sources) if sources else [] - return extend_data({"sources": source_list, "count": len(source_list), "error_message": err}) - info = self.get_config_info(config_path) sources = info.get("sources", []) return extend_data({ diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index 98b9233..6b0b06d 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -1,5 +1,6 @@ import json +from inspect import signature from pathlib import Path from unittest.mock import MagicMock, patch @@ -33,8 +34,15 @@ def mock_logger() -> MagicMock: @pytest.fixture def connector(mock_logger: MagicMock) -> SecretsConnector: - # Force CLI mode by setting prefer_native=False - return SecretsConnector(cli_path="/usr/bin/secretsync", prefer_native=False, logger=mock_logger) + return SecretsConnector(cli_path="/usr/bin/secretsync", logger=mock_logger) + + +def test_secrets_connector_has_single_cli_runtime_contract() -> None: + """SecretSync integration should not expose unowned native binding switches.""" + parameters = signature(SecretsConnector).parameters + + assert "prefer_native" not in parameters + assert not hasattr(SecretsConnector(cli_path="/usr/bin/secretsync"), "native_available") def test_cli_get_config_info_valid(connector: SecretsConnector, tmp_path: Path) -> None: @@ -234,6 +242,7 @@ def test_cli_run_pipeline_rejects_legacy_raw_diff_json(mock_run: MagicMock, conn assert isinstance(result, ExtendedDict) assert result["success"] is False assert "expected pipeline result envelope" in result["error_message"] + assert "native bindings" not in result["error_message"] @patch("subprocess.run") diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 04d05b0..8b2ed22 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -29,7 +29,7 @@ "lifecyclelogging", "vendor_connectors", ) -REMOVED_PUBLIC_KEYWORDS = ("unhump_results",) +REMOVED_PUBLIC_KEYWORDS = ("prefer_native", "unhump_results") SECRETSSYNC_PROJECT_PATTERNS = ( re.compile(r"\bsecretssync\s+(?:Go\s+)?(?:project|library|repo|repository|CLI|connector|bindings?)\b", re.IGNORECASE), re.compile(r"\b(?:project|library|repo|repository|CLI|connector|bindings?)\s+secretssync\b", re.IGNORECASE), @@ -334,7 +334,7 @@ def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: def test_public_guidance_names_secrets_sync_roles_precisely() -> None: - """Use SecretSync for the product and reserve exact names for CLI/native modules.""" + """Use SecretSync for the product and reserve exact names for CLI modules.""" offenders: list[str] = [] paths = [REPO_ROOT / "README.md"] paths.extend(path for root in (REPO_ROOT / "docs", REPO_ROOT / "src") for path in root.rglob("*")) From 8c73f258c9f1e0988c674888049910d3234e3ff8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 16:04:37 -0500 Subject: [PATCH 188/287] docs: add secrets connector example --- examples/connectors/README.md | 11 +++++ examples/connectors/basic_secrets.py | 70 ++++++++++++++++++++++++++++ tests/examples/test_safe_examples.py | 19 ++++++++ 3 files changed, 100 insertions(+) create mode 100644 examples/connectors/basic_secrets.py diff --git a/examples/connectors/README.md b/examples/connectors/README.md index 091b1c3..ae3ac96 100644 --- a/examples/connectors/README.md +++ b/examples/connectors/README.md @@ -31,6 +31,9 @@ pip install "extended-data[langchain]" # For the Meshy MCP server pip install "extended-data[meshy,mcp]" + +# For SecretSync pipeline inspection and dry-run syncs +pip install "extended-data[secrets]" ``` ## Examples @@ -40,6 +43,7 @@ pip install "extended-data[meshy,mcp]" - [`basic_aws.py`](basic_aws.py) - AWS connector with Organizations and S3 - [`basic_google.py`](basic_google.py) - Google Cloud connector with Workspace and Billing - [`basic_meshy.py`](basic_meshy.py) - Meshy AI 3D generation +- [`basic_secrets.py`](basic_secrets.py) - SecretSync pipeline config inspection and dry-run execution ### AI Agent Integration @@ -61,6 +65,10 @@ export GOOGLE_SERVICE_ACCOUNT='{"type": "service_account", ...}' # Meshy AI export MESHY_API_KEY="msy_your_key" +# SecretSync +export VAULT_ADDR="https://vault.example.com" +export AWS_REGION="us-east-1" + # For LangChain examples export ANTHROPIC_API_KEY="sk-ant-..." ``` @@ -73,4 +81,7 @@ uv run python examples/connectors/basic_meshy.py # Run with debug logging LOGLEVEL=DEBUG uv run python examples/connectors/basic_meshy.py + +# Run the SecretSync bridge against a pipeline config +uv run python examples/connectors/basic_secrets.py pipeline.yaml ``` diff --git a/examples/connectors/basic_secrets.py b/examples/connectors/basic_secrets.py new file mode 100644 index 0000000..863987e --- /dev/null +++ b/examples/connectors/basic_secrets.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +"""Example: SecretSync connector usage. + +This example demonstrates the `extended-data[secrets]` bridge to the +standalone `jbcom/secrets-sync` CLI. + +Requirements: + pip install extended-data[secrets] + secretsync must be installed on PATH + +Run: + uv run python examples/connectors/basic_secrets.py pipeline.yaml +""" + +from __future__ import annotations + +import sys + +from pathlib import Path + + +def main() -> int: + """Inspect a SecretSync pipeline config and run a dry-run through the CLI contract.""" + try: + from extended_data import OutputFormat, SecretsConnector, SyncOptions + except ImportError: + print("Error: Could not import SecretSync support. Install with: pip install extended-data[secrets]") + return 1 + + config_path = Path(sys.argv[1] if len(sys.argv) > 1 else "pipeline.yaml") + connector = SecretsConnector() + + print(f"Inspecting SecretSync config: {config_path}") + config_info = connector.get_config_info(str(config_path)) + if not config_info["valid"]: + print(f"Error: {config_info['error_message']}") + return 1 + + print( + "Config summary: " + f"{config_info['source_count']} source(s), " + f"{config_info['target_count']} target(s), " + f"merge store={config_info['has_merge_store']}", + ) + + if not connector.cli_available: + print("Error: secretsync CLI not available on PATH.") + print("Install jbcom/secrets-sync and re-run this example to exercise the dry-run contract.") + return 1 + + result = connector.run_pipeline( + str(config_path), + SyncOptions(dry_run=True, compute_diff=True, output_format=OutputFormat.JSON), + ) + + if not result["success"]: + print("Error: secretsync dry run failed.") + print("Run secretsync directly in a secure terminal for full diagnostics.") + print("The CLI must emit the stable `secretsync pipeline --output json` result envelope.") + return 1 + + print("Dry run completed successfully.") + if result["diff_output"]: + print("Diff output was returned by secretsync and is not printed because it may contain secret values.") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index e0f55bf..b37470c 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -36,6 +36,7 @@ "examples/connectors/basic_aws.py", "examples/connectors/basic_google.py", "examples/connectors/basic_meshy.py", + "examples/connectors/basic_secrets.py", "examples/connectors/langchain_tools.py", "examples/connectors/mcp_server.py", ] @@ -173,6 +174,24 @@ def test_examples_do_not_import_tier1_utilities_from_root() -> None: assert offenders == [] +def test_secrets_example_does_not_print_raw_sync_results() -> None: + """SecretSync output can include secret values and should not be echoed.""" + text = (REPO_ROOT / "examples/connectors/basic_secrets.py").read_text(encoding="utf-8") + raw_result_fields = [ + "error_message", + "secrets_processed", + "secrets_added", + "secrets_modified", + "secrets_removed", + "secrets_unchanged", + "diff_output", + ] + + for field in raw_result_fields: + assert f'print(result["{field}"])' not in text + assert f"print(result['{field}'])" not in text + + @pytest.mark.parametrize("example_path", ALL_EXAMPLES) def test_example_compiles(example_path: str, tmp_path: Path) -> None: """Every example should at least remain syntactically valid.""" From 68dd0fdc36d55530e9e6524ff69d2ea6dc758437 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 16:14:16 -0500 Subject: [PATCH 189/287] test: guard secrets cli contract --- src/extended_data/connectors/secrets/__init__.py | 5 ++--- tests/connectors/test_secrets.py | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 53011c3..094ce2e 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -298,9 +298,8 @@ def _cli_run_pipeline( error_message="secretsync CLI not available", ) - # CLI mode always requests JSON so this Python surface can reliably - # return a structured SyncResult. Native mode can pass through other - # output formats because it returns a typed result directly. + # Always request JSON so this Python surface can reliably return a + # structured SyncResult from the supported CLI contract. cmd = [ self._cli_path, "pipeline", diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index 6b0b06d..a777960 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -1,6 +1,6 @@ import json -from inspect import signature +from inspect import getsource, signature from pathlib import Path from unittest.mock import MagicMock, patch @@ -43,6 +43,8 @@ def test_secrets_connector_has_single_cli_runtime_contract() -> None: assert "prefer_native" not in parameters assert not hasattr(SecretsConnector(cli_path="/usr/bin/secretsync"), "native_available") + assert "Native mode" not in getsource(SecretsConnector) + assert "native bindings" not in getsource(SecretsConnector) def test_cli_get_config_info_valid(connector: SecretsConnector, tmp_path: Path) -> None: From 81979d314284732164a37cb43233cc96d9717b80 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 16:17:09 -0500 Subject: [PATCH 190/287] docs: clarify secrets cli bridge --- README.md | 2 +- src/extended_data/secrets/__init__.py | 2 +- tests/core/test_release_hygiene.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f992389..7018e02 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ extended_data/ inputs/ InputProvider and decorator-based input injection logging/ structured lifecycle logging connectors/ Tier 3 ConnectorFabric and vendor adapters - secrets/ Python access to secret sync primitives + secrets/ SecretSync CLI bridge and typed result exports workflows/ Tier 3 higher-order workflow composition ``` diff --git a/src/extended_data/secrets/__init__.py b/src/extended_data/secrets/__init__.py index 0287f51..f4e4c5d 100644 --- a/src/extended_data/secrets/__init__.py +++ b/src/extended_data/secrets/__init__.py @@ -1,4 +1,4 @@ -"""Secret synchronization adapters for Extended Data.""" +"""SecretSync CLI bridge exports for Extended Data.""" from extended_data._version import __version__ from extended_data.connectors.secrets import ( diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 8b2ed22..4f00b47 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -34,6 +34,7 @@ re.compile(r"\bsecretssync\s+(?:Go\s+)?(?:project|library|repo|repository|CLI|connector|bindings?)\b", re.IGNORECASE), re.compile(r"\b(?:project|library|repo|repository|CLI|connector|bindings?)\s+secretssync\b", re.IGNORECASE), ) +IMPRECISE_SECRETSSYNC_TERMS = ("secret sync primitives",) EXTRA_REFERENCE_RE = re.compile(r"extended-data\[([^\]\n]+)\]") NON_RUNTIME_EXTRAS = {"all", "dev", "tests", "typing"} PACKAGE_SHAPE_RE = re.compile(r"^ ([a-z_]+)/\s+") @@ -347,5 +348,8 @@ def test_public_guidance_names_secrets_sync_roles_precisely() -> None: if pattern.search(text): offenders.append(str(path.relative_to(REPO_ROOT))) break + for term in IMPRECISE_SECRETSSYNC_TERMS: + if term in text.lower(): + offenders.append(f"{path.relative_to(REPO_ROOT)}: {term}") assert offenders == [] From 8c12fed99c571f87a89efc1ae757869a4bebaaed Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 16:32:54 -0500 Subject: [PATCH 191/287] test: prove tier two container bases --- tests/core/test_containers.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index ec3f596..d329208 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -5,6 +5,8 @@ import datetime import json +from collections import UserDict, UserList, UserString +from collections.abc import MutableSet from pathlib import Path from typing import Any @@ -21,6 +23,20 @@ ) +def test_tier2_containers_inherit_expected_python_bases() -> None: + """Tier 2 classes should be real extended primitives, not detached facades.""" + assert issubclass(ExtendedString, UserString) + assert issubclass(ExtendedDict, UserDict) + assert issubclass(ExtendedList, UserList) + assert issubclass(ExtendedTuple, tuple) + assert issubclass(ExtendedSet, MutableSet) + assert isinstance(ExtendedString("api"), UserString) + assert isinstance(ExtendedDict({"service": "api"}), UserDict) + assert isinstance(ExtendedList(["api"]), UserList) + assert isinstance(ExtendedTuple(("api",)), tuple) + assert isinstance(ExtendedSet({"api"}), MutableSet) + + def test_extended_string_chains_primitive_transforms() -> None: """ExtendedString composes Tier 1 string primitives.""" value = ExtendedString("API Response Value") From f9a3e31b7639985c08d4da8532c1a9ba213a6f13 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 16:41:23 -0500 Subject: [PATCH 192/287] fix: redact connector error boundaries --- README.md | 8 ++-- docs/package-surface.md | 11 ++--- .../connectors/anthropic/__init__.py | 2 + src/extended_data/connectors/base.py | 5 ++- .../connectors/cursor/__init__.py | 8 +--- src/extended_data/connectors/google/jules.py | 7 ++-- src/extended_data/connectors/meshy/base.py | 3 +- .../connectors/slack/__init__.py | 9 ++-- tests/connectors/meshy/test_meshy_base.py | 32 +++++++++++++++ tests/connectors/test_anthropic.py | 21 ++++++++++ tests/connectors/test_base.py | 41 ++++++++++++++++++- tests/connectors/test_cursor.py | 10 +++++ tests/connectors/test_google_jules.py | 23 +++++++++++ tests/connectors/test_slack_connector.py | 16 +++++++- 14 files changed, 169 insertions(+), 27 deletions(-) create mode 100644 tests/connectors/meshy/test_meshy_base.py diff --git a/README.md b/README.md index 7018e02..a748be5 100644 --- a/README.md +++ b/README.md @@ -143,10 +143,10 @@ payload contract; framework factory functions still return framework tool objects. The generic CLI `call` command and MCP bridge expose only methods that advertise Extended Data payload returns. -Both serialized boundaries redact common secret-bearing keys and token-shaped -strings before writing terminal or MCP output, so connector data methods can -return structured vendor payloads without making stdout or tool responses a -secret leak by default. +Serialized CLI/MCP boundaries and connector API error messages redact common +secret-bearing keys and token-shaped strings, so connector data methods can +return structured vendor payloads without making stdout, tool responses, or +raised transport errors a secret leak by default. Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. diff --git a/docs/package-surface.md b/docs/package-surface.md index 0971e79..682c245 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -279,11 +279,12 @@ their payload returns as `ExtendedDict` or `ExtendedList[ExtendedDict]`. The generic CLI `call` command and MCP bridge expose only connector methods that advertise Extended Data payload returns, so raw SDK client factories and low-level HTTP helpers do not leak into serialized tool catalogs. -Those serialized boundaries apply redaction after Tier 2 containers are lowered -to JSON-compatible data. Common secret-bearing keys such as `password`, -`api_key`, `access_token`, `authorization`, and `client_secret`, plus token-like -strings in error text, are replaced with `[REDACTED]` before CLI stdout/stderr -or MCP tool responses are emitted. +Serialized CLI/MCP boundaries apply redaction after Tier 2 containers are +lowered to JSON-compatible data, and connector API error messages use the same +redaction policy before exceptions are raised. Common secret-bearing keys such +as `password`, `api_key`, `access_token`, `authorization`, and `client_secret`, +plus token-like strings in error text, are replaced with `[REDACTED]` before +CLI stdout/stderr, MCP tool responses, or raised transport errors expose them. LangChain, CrewAI, Strands, and auto-detection factory functions still return plain framework tool object lists. diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index a1e10fc..40bb620 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -38,6 +38,7 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin from extended_data.logging import Logging @@ -287,6 +288,7 @@ def _handle_error(self, response: httpx.Response) -> None: except Exception: error_type = "unknown" message = response.text + message = redact_sensitive_text(message) if status_code == 401: raise AnthropicAuthError(message, status_code=status_code, error_type=error_type) diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 5918685..61363f4 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -44,6 +44,7 @@ def my_operation(self) -> ExtendedDict: wait_exponential, ) +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.inputs import InputProvider from extended_data.logging import Logging @@ -268,12 +269,12 @@ def _request_once( # Retry on 5xx server errors if response.status_code >= 500: - msg = f"Server error {response.status_code}: {response.text}" + msg = f"Server error {response.status_code}: {redact_sensitive_text(response.text)}" raise RateLimitError(msg) # Raise on 4xx client errors (don't retry) if response.status_code >= 400: - msg = f"API error {response.status_code}: {response.text}" + msg = f"API error {response.status_code}: {redact_sensitive_text(response.text)}" raise ConnectorAPIError(msg, status_code=response.status_code) return response diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index 10cf8f2..729821a 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -34,6 +34,7 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging @@ -303,12 +304,7 @@ def sanitize_error(error: Any) -> str: Sanitized error message string. """ message = str(error) if not isinstance(error, str) else error - # Remove potential API keys, tokens, or sensitive patterns - message = re.sub(r"Bearer\s+[a-zA-Z0-9._-]+", "Bearer [REDACTED]", message, flags=re.IGNORECASE) - message = re.sub( - r"api[_-]?key[=:]\s*[\"']?[a-zA-Z0-9._-]+[\"']?", "api_key=[REDACTED]", message, flags=re.IGNORECASE - ) - return re.sub(r"token[=:]\s*[\"']?[a-zA-Z0-9._-]+[\"']?", "token=[REDACTED]", message, flags=re.IGNORECASE) + return redact_sensitive_text(message) # ============================================================================= diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index 96213af..dee54ed 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -34,6 +34,7 @@ from pydantic import BaseModel, Field from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList @@ -165,12 +166,12 @@ def _handle_response(self, response: httpx.Response) -> dict[str, Any]: try: error = response.json().get("error", {}) raise JulesError( - error.get("message", response.text), + redact_sensitive_text(error.get("message", response.text)), error.get("code", response.status_code), - error.get("details"), + redact_sensitive_data(error.get("details")), ) except (ValueError, KeyError) as exc: - raise JulesError(response.text, response.status_code) from exc + raise JulesError(redact_sensitive_text(response.text), response.status_code) from exc return response.json() # ========================================================================= diff --git a/src/extended_data/connectors/meshy/base.py b/src/extended_data/connectors/meshy/base.py index fb8b104..14c9c61 100644 --- a/src/extended_data/connectors/meshy/base.py +++ b/src/extended_data/connectors/meshy/base.py @@ -20,6 +20,7 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.inputs import InputProvider @@ -160,7 +161,7 @@ def request( # Raise on 4xx if response.status_code >= 400: - msg = f"API error: {response.text}" + msg = f"API error: {redact_sensitive_text(response.text)}" raise MeshyAPIError( msg, status_code=response.status_code, diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index aedec1d..9468863 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -25,6 +25,7 @@ def batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data, to_builtin from extended_data.io import wrap_raw_data_for_export from extended_data.logging import Logging @@ -66,17 +67,17 @@ class SlackAPIError(RuntimeError): def __init__(self, response: Any) -> None: self.response = response self.status_code = response.status_code if hasattr(response, "status_code") else None - super().__init__(f"Slack API error: {response}") + super().__init__(f"Slack API error: {redact_sensitive_text(response)}") def _slack_response_payload(response: Any) -> dict[str, Any]: """Normalize Slack SDK response objects into a serializable payload.""" if isinstance(response, Mapping): - return dict(response) + return redact_sensitive_data(dict(response)) data = getattr(response, "data", None) if isinstance(data, Mapping): - return dict(data) + return redact_sensitive_data(dict(data)) payload: dict[str, Any] = {} response_get = getattr(response, "get", None) @@ -90,7 +91,7 @@ def _slack_response_payload(response: Any) -> dict[str, Any]: if status_code is not None: payload["status_code"] = status_code - return payload or {"response": str(response)} + return redact_sensitive_data(payload or {"response": str(response)}) def get_divider() -> ExtendedDict: diff --git a/tests/connectors/meshy/test_meshy_base.py b/tests/connectors/meshy/test_meshy_base.py new file mode 100644 index 0000000..76ebd53 --- /dev/null +++ b/tests/connectors/meshy/test_meshy_base.py @@ -0,0 +1,32 @@ +"""Tests for Meshy connector HTTP base helpers.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import httpx +import pytest + +from extended_data.connectors.meshy import base + + +def test_meshy_request_redacts_sensitive_error_body(monkeypatch: pytest.MonkeyPatch) -> None: + """Meshy API errors should not expose raw response secrets.""" + mock_client = MagicMock() + mock_client.request.return_value = httpx.Response( + 400, + content=b'{"api_key":"key_123","message":"Authorization: Bearer raw_token"}', + ) + + monkeypatch.setattr(base, "_rate_limit", lambda: None) + monkeypatch.setattr(base, "_headers", lambda: {"Authorization": "Bearer test"}) + monkeypatch.setattr(base, "get_client", lambda: mock_client) + + with pytest.raises(base.MeshyAPIError) as exc_info: + base.request("GET", "text-to-3d") + + message = str(exc_info.value) + assert exc_info.value.status_code == 400 + assert "key_123" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message diff --git a/tests/connectors/test_anthropic.py b/tests/connectors/test_anthropic.py index 6ea1cea..eb813f6 100644 --- a/tests/connectors/test_anthropic.py +++ b/tests/connectors/test_anthropic.py @@ -10,6 +10,7 @@ from extended_data.connectors.anthropic import ( CLAUDE_MODELS, + AnthropicAuthError, AnthropicConnector, AnthropicError, ContentBlock, @@ -255,6 +256,26 @@ def test_get_model(self): assert isinstance(model["display_name"], ExtendedString) assert model["display_name"] == "Claude Sonnet 4" + def test_handle_error_redacts_sensitive_vendor_message(self): + """Anthropic errors should preserve status metadata without leaking secrets.""" + import httpx + + connector = AnthropicConnector(api_key="test-key") + response = httpx.Response( + 401, + json={"error": {"type": "auth_error", "message": "denied password=hunter2 Bearer raw_token"}}, + ) + + with pytest.raises(AnthropicAuthError) as exc_info: + connector._handle_error(response) + + message = str(exc_info.value) + assert exc_info.value.status_code == 401 + assert exc_info.value.error_type == "auth_error" + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + class TestClaudeModels: """Tests for Claude model constants. diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index 743514b..7dd8fc0 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -11,7 +11,7 @@ from pydantic import BaseModel, Field -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorAPIError, RateLimitError, VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.logging import Logging @@ -201,6 +201,45 @@ class InvalidRetryConnector(ExampleConnector): mock_client.request.assert_not_called() +def test_request_once_redacts_sensitive_client_error_body() -> None: + """Programmatic connector API errors should not expose raw secret-bearing bodies.""" + connector = _connector() + mock_client = MagicMock() + mock_client.request.return_value = httpx.Response( + 401, + content=b'{"password":"hunter2","message":"Authorization: Bearer raw_token"}', + ) + connector._client = mock_client + + with pytest.raises(ConnectorAPIError) as exc_info: + connector._request_once("GET", "/status") + + message = str(exc_info.value) + assert exc_info.value.status_code == 401 + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + + +def test_request_once_redacts_sensitive_server_error_body() -> None: + """Retry-triggering server errors should not carry raw response secrets.""" + connector = _connector() + mock_client = MagicMock() + mock_client.request.return_value = httpx.Response( + 500, + content=b'{"api_key":"key_123","message":"Bearer raw_token"}', + ) + connector._client = mock_client + + with pytest.raises(RateLimitError) as exc_info: + connector._request_once("GET", "/status") + + message = str(exc_info.value) + assert "key_123" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + + def test_get_tools_requires_langchain_extra(monkeypatch) -> None: """Base LangChain tool export should fail visibly when langchain-core is missing.""" connector = _connector() diff --git a/tests/connectors/test_cursor.py b/tests/connectors/test_cursor.py index a7a929b..6612051 100644 --- a/tests/connectors/test_cursor.py +++ b/tests/connectors/test_cursor.py @@ -17,6 +17,7 @@ CursorError, CursorValidationError, Repository, + sanitize_error, validate_agent_id, validate_prompt_text, validate_repository, @@ -114,6 +115,15 @@ def test_validate_webhook_url_ipv6_internal(self): with pytest.raises(CursorValidationError, match="internal"): validate_webhook_url("https://[fe80::1]/webhook") + def test_sanitize_error_uses_shared_secret_redaction(self): + """Cursor error sanitization should cover common connector secret patterns.""" + redacted = sanitize_error("failed password=hunter2 token=tok_123 Authorization: Bearer raw_token") + + assert "hunter2" not in redacted + assert "tok_123" not in redacted + assert "raw_token" not in redacted + assert "[REDACTED]" in redacted + class TestModels: """Tests for Pydantic models.""" diff --git a/tests/connectors/test_google_jules.py b/tests/connectors/test_google_jules.py index c0aa690..a1e4f00 100644 --- a/tests/connectors/test_google_jules.py +++ b/tests/connectors/test_google_jules.py @@ -158,3 +158,26 @@ def test_handle_response_raises_jules_error() -> None: assert exc_info.value.code == 403 assert exc_info.value.details == [{"reason": "forbidden"}] + + +def test_handle_response_redacts_sensitive_jules_error_details() -> None: + """Jules API errors should not expose raw secret-bearing fields.""" + connector = JulesConnector(api_key="test-key") + response = _response( + { + "error": { + "message": "denied password=hunter2 Bearer raw_token", + "code": 403, + "details": [{"api_key": "key_123"}], + } + }, + 403, + ) + + with pytest.raises(JulesError) as exc_info: + connector._handle_response(response) + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert exc_info.value.details == [{"api_key": "[REDACTED]"}] diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index 8289e40..6dab8be 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -9,6 +9,7 @@ import pytest from extended_data.connectors.slack import ( + SlackAPIError, SlackConnector, get_divider, get_field_context_message_blocks, @@ -49,6 +50,16 @@ def test_slack_block_helpers_return_extended_payloads(): assert isinstance(rich[0]["elements"], ExtendedList) +def test_slack_api_error_redacts_sensitive_response_text() -> None: + """Slack API errors should not expose raw secret-bearing response values.""" + error = SlackAPIError({"ok": False, "password": "hunter2", "authorization": "Bearer raw_token"}) + + message = str(error) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + + class TestSlackConnector: """Test suite for SlackConnector.""" @@ -136,7 +147,9 @@ def __init__(self, response): mock_bot_client = MagicMock() mock_bot_client.users_conversations.return_value = {"channels": [{"name": "general", "id": "C12345"}]} - mock_bot_client.chat_postMessage.side_effect = FakeSlackApiError({"ok": False, "error": "channel_not_found"}) + mock_bot_client.chat_postMessage.side_effect = FakeSlackApiError( + {"ok": False, "error": "channel_not_found", "password": "hunter2"} + ) mock_user_client = MagicMock() mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] @@ -154,6 +167,7 @@ def __init__(self, response): assert isinstance(result, ExtendedDict) assert isinstance(result["error"], ExtendedString) assert result["error"] == "channel_not_found" + assert result["password"] == "[REDACTED]" @patch("extended_data.connectors.slack.SlackConnector._call_api") @patch("extended_data.connectors.slack.WebClient") From 3b763c01aedae3f78e0ebd913b680b0f33248f23 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 16:50:54 -0500 Subject: [PATCH 193/287] fix: redact mcp connector errors --- src/extended_data/connectors/mcp.py | 9 +++++-- src/extended_data/connectors/meshy/mcp.py | 26 +++++++++++++++--- tests/connectors/meshy/test_meshy_mcp.py | 32 +++++++++++++++++++++++ tests/connectors/test_mcp.py | 13 ++++++++- 4 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 tests/connectors/meshy/test_meshy_mcp.py diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index c56ddb7..8e42cbf 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -28,7 +28,7 @@ from collections.abc import Callable, Iterable, Mapping from typing import Any, cast -from extended_data.connectors.redaction import redact_sensitive_data +from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.connectors.registry import _list_connector_classes, get_connector from extended_data.connectors.surface import connector_data_methods from extended_data.containers import to_builtin @@ -110,6 +110,11 @@ def _jsonable_tool_result(result: Any) -> Any: return redact_sensitive_data(result) +def _tool_error_text(error: Exception) -> str: + """Return an MCP-safe error string without raw secret values.""" + return f"Error: {type(error).__name__}: {redact_sensitive_text(error)}" + + def create_server() -> Any: """Create the unified MCP server with all registered connectors.""" try: @@ -192,7 +197,7 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: return [TextContent(type="text", text=json.dumps(_jsonable_tool_result(result), indent=2, default=str))] except Exception as e: - return [TextContent(type="text", text=f"Error: {type(e).__name__}: {e}")] + return [TextContent(type="text", text=_tool_error_text(e))] return server diff --git a/src/extended_data/connectors/meshy/mcp.py b/src/extended_data/connectors/meshy/mcp.py index 464b7c9..e23e785 100644 --- a/src/extended_data/connectors/meshy/mcp.py +++ b/src/extended_data/connectors/meshy/mcp.py @@ -31,9 +31,12 @@ import json -from collections.abc import Callable +from collections.abc import Callable, Iterable, Mapping from typing import Any, cast +from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text +from extended_data.containers import to_builtin + MCP_INSTALL_MESSAGE = "MCP SDK not installed. Install with: pip install extended-data[meshy,mcp]" @@ -254,6 +257,23 @@ def _create_mcp_tools() -> list[tuple[Any, Callable[..., Any]]]: return mcp_tools +def _jsonable_tool_result(result: Any) -> Any: + """Lower Meshy tool results to JSON-compatible redacted data.""" + if hasattr(result, "model_dump"): + result = result.model_dump() + elif isinstance(result, Iterable) and not isinstance(result, (str, bytes, bytearray, Mapping)): + result = [item.model_dump() if hasattr(item, "model_dump") else item for item in result] + result = to_builtin(result) + if isinstance(result, set | frozenset): + result = [to_builtin(item) for item in result] + return redact_sensitive_data(result) + + +def _tool_error_payload(error: object) -> dict[str, str]: + """Return an MCP-safe error payload without raw secret values.""" + return {"error": redact_sensitive_text(error)} + + def create_server() -> Any: """Create an MCP server with Meshy AI tools. @@ -299,12 +319,12 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]: try: result = handler(**arguments) - return [TextContent(type="text", text=json.dumps(result, indent=2))] + return [TextContent(type="text", text=json.dumps(_jsonable_tool_result(result), indent=2))] except Exception as e: return [ TextContent( type="text", - text=json.dumps({"error": str(e)}, indent=2), + text=json.dumps(_tool_error_payload(e), indent=2), ) ] diff --git a/tests/connectors/meshy/test_meshy_mcp.py b/tests/connectors/meshy/test_meshy_mcp.py new file mode 100644 index 0000000..53b21d7 --- /dev/null +++ b/tests/connectors/meshy/test_meshy_mcp.py @@ -0,0 +1,32 @@ +"""Tests for Meshy MCP serialization helpers.""" + +from __future__ import annotations + +from extended_data.connectors.meshy.mcp import _jsonable_tool_result, _tool_error_payload +from extended_data.containers import ExtendedDict, ExtendedSet + + +def test_meshy_mcp_result_lowers_and_redacts_extended_payloads() -> None: + """Meshy MCP result serialization should handle Tier 2 payloads directly.""" + payload = ExtendedDict( + { + "service": {"name": "meshy"}, + "password": "hunter2", + "tags": ExtendedSet({"asset", "model"}), + } + ) + + result = _jsonable_tool_result(payload) + + assert result["service"] == {"name": "meshy"} + assert result["password"] == "[REDACTED]" + assert sorted(result["tags"]) == ["asset", "model"] + + +def test_meshy_mcp_error_payload_redacts_sensitive_values() -> None: + """Meshy MCP errors should not return raw secret-bearing exception text.""" + payload = _tool_error_payload(RuntimeError("failed api_key=key_123 Bearer raw_token")) + + assert "key_123" not in payload["error"] + assert "raw_token" not in payload["error"] + assert "[REDACTED]" in payload["error"] diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index e11c316..e95566f 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -4,7 +4,7 @@ import pytest -from extended_data.connectors.mcp import _get_public_methods, _jsonable_tool_result, create_server +from extended_data.connectors.mcp import _get_public_methods, _jsonable_tool_result, _tool_error_text, create_server from extended_data.connectors.meshy.connector import MeshyConnector from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet @@ -68,3 +68,14 @@ def test_jsonable_tool_result_lowers_extended_set_payloads() -> None: payload = ExtendedSet({"api", "worker"}) assert sorted(_jsonable_tool_result(payload)) == ["api", "worker"] + + +def test_tool_error_text_redacts_sensitive_exception_values() -> None: + """Generic MCP errors should not bypass connector redaction.""" + error = RuntimeError("failed password=hunter2 Authorization: Bearer raw_token") + + text = _tool_error_text(error) + + assert "hunter2" not in text + assert "raw_token" not in text + assert "[REDACTED]" in text From ec58ac506a81ab70f23f4ab7c6b0328ebdc42f2b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 16:56:37 -0500 Subject: [PATCH 194/287] fix: redact unknown tool diagnostics --- src/extended_data/connectors/base.py | 2 +- src/extended_data/connectors/mcp.py | 7 ++++++- src/extended_data/connectors/meshy/mcp.py | 2 +- tests/connectors/meshy/test_meshy_mcp.py | 9 +++++++++ tests/connectors/test_base.py | 13 +++++++++++++ tests/connectors/test_mcp.py | 17 ++++++++++++++++- 6 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 61363f4..bda7a9f 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -567,7 +567,7 @@ def handle_ai_tool_call(self, name: str, arguments: dict[str, Any]) -> Any: Tool result """ if name not in self._tool_functions: - msg = f"Unknown tool: {name}" + msg = f"Unknown tool: {redact_sensitive_text(name)}" raise ValueError(msg) func = self._tool_functions[name] diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 8e42cbf..a18269d 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -115,6 +115,11 @@ def _tool_error_text(error: Exception) -> str: return f"Error: {type(error).__name__}: {redact_sensitive_text(error)}" +def _unknown_tool_text(name: str) -> str: + """Return an MCP-safe unknown-tool diagnostic.""" + return f"Unknown tool: {redact_sensitive_text(name)}" + + def create_server() -> Any: """Create the unified MCP server with all registered connectors.""" try: @@ -176,7 +181,7 @@ async def list_tools() -> list[Tool]: async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: """Execute a tool and return results.""" if name not in tools: - return [TextContent(type="text", text=f"Unknown tool: {name}")] + return [TextContent(type="text", text=_unknown_tool_text(name))] tool = tools[name] connector_name = tool["connector"] diff --git a/src/extended_data/connectors/meshy/mcp.py b/src/extended_data/connectors/meshy/mcp.py index e23e785..2aac1c4 100644 --- a/src/extended_data/connectors/meshy/mcp.py +++ b/src/extended_data/connectors/meshy/mcp.py @@ -313,7 +313,7 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]: return [ TextContent( type="text", - text=json.dumps({"error": f"Unknown tool: {name}"}), + text=json.dumps(_tool_error_payload(f"Unknown tool: {name}")), ) ] diff --git a/tests/connectors/meshy/test_meshy_mcp.py b/tests/connectors/meshy/test_meshy_mcp.py index 53b21d7..0d775d0 100644 --- a/tests/connectors/meshy/test_meshy_mcp.py +++ b/tests/connectors/meshy/test_meshy_mcp.py @@ -30,3 +30,12 @@ def test_meshy_mcp_error_payload_redacts_sensitive_values() -> None: assert "key_123" not in payload["error"] assert "raw_token" not in payload["error"] assert "[REDACTED]" in payload["error"] + + +def test_meshy_mcp_error_payload_redacts_unknown_tool_names() -> None: + """Meshy MCP unknown-tool diagnostics should redact user-controlled names.""" + payload = _tool_error_payload("Unknown tool: password=hunter2 Authorization: Bearer raw_token") + + assert "hunter2" not in payload["error"] + assert "raw_token" not in payload["error"] + assert "[REDACTED]" in payload["error"] diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index 7dd8fc0..b754f89 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -142,6 +142,19 @@ def test_handle_ai_tool_call_promotes_result_payloads() -> None: assert result["status"].upper_first() == "Ok" +def test_handle_ai_tool_call_redacts_unknown_tool_names() -> None: + """Unknown AI tool diagnostics should not echo secret-bearing names.""" + connector = _connector() + + with pytest.raises(ValueError) as exc_info: + connector.handle_ai_tool_call("password=hunter2 Authorization: Bearer raw_token", {}) + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + + def test_get_ai_tool_definitions_promotes_definition_payloads() -> None: """AI tool definition export should expose extended containers.""" diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index e95566f..9d9881e 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -4,7 +4,13 @@ import pytest -from extended_data.connectors.mcp import _get_public_methods, _jsonable_tool_result, _tool_error_text, create_server +from extended_data.connectors.mcp import ( + _get_public_methods, + _jsonable_tool_result, + _tool_error_text, + _unknown_tool_text, + create_server, +) from extended_data.connectors.meshy.connector import MeshyConnector from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet @@ -79,3 +85,12 @@ def test_tool_error_text_redacts_sensitive_exception_values() -> None: assert "hunter2" not in text assert "raw_token" not in text assert "[REDACTED]" in text + + +def test_unknown_tool_text_redacts_sensitive_tool_names() -> None: + """Generic MCP unknown-tool diagnostics should redact user-controlled names.""" + text = _unknown_tool_text("password=hunter2 Authorization: Bearer raw_token") + + assert "hunter2" not in text + assert "raw_token" not in text + assert "[REDACTED]" in text From 67ba1b0da1e720347a6051db5f5e39da66a0279c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:01:18 -0500 Subject: [PATCH 195/287] fix: redact connector error payloads --- .../connectors/anthropic/__init__.py | 2 +- .../connectors/meshy/webhooks/handler.py | 6 +- .../connectors/secrets/__init__.py | 42 ++++++----- tests/connectors/meshy/test_webhooks.py | 49 +++++++++++++ tests/connectors/test_anthropic.py | 20 +++++ tests/connectors/test_secrets.py | 73 +++++++++++++++++++ 6 files changed, 170 insertions(+), 22 deletions(-) diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index 40bb620..321e206 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -537,7 +537,7 @@ def execute_agent_task( return AgentExecutionResult( success=False, output="", - error=str(e), + error=redact_sensitive_text(e), duration_seconds=duration, ) diff --git a/src/extended_data/connectors/meshy/webhooks/handler.py b/src/extended_data/connectors/meshy/webhooks/handler.py index 5c86be6..942f197 100644 --- a/src/extended_data/connectors/meshy/webhooks/handler.py +++ b/src/extended_data/connectors/meshy/webhooks/handler.py @@ -10,6 +10,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.webhooks.schemas import MeshyWebhookPayload +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, extend_data, to_builtin from ..persistence.repository import TaskRepository @@ -65,7 +66,7 @@ def handle_signed_webhook( return extend_data({ "status": "error", "message": "Invalid webhook payload", - "error": str(exc), + "error": redact_sensitive_text(exc), }) return self.handle_webhook(parsed_payload, project=project, spec_hash=spec_hash) @@ -111,7 +112,8 @@ def handle_webhook( error_message = None if payload.status == "FAILED": - error_message = payload.get_error_message() + raw_error_message = payload.get_error_message() + error_message = redact_sensitive_text(raw_error_message) if raw_error_message else None result_paths = to_builtin(payload.get_all_urls()) diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 094ce2e..0940820 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -40,6 +40,7 @@ from typing import Any from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.containers import ExtendedDict, extend_data from extended_data.logging import Logging @@ -94,18 +95,19 @@ class SyncResult: @classmethod def from_cli_output(cls, output: dict[str, Any]) -> SyncResult: """Create from CLI JSON output.""" + safe_output = redact_sensitive_data(output) return cls( - success=output.get("success", False), - target_count=output.get("target_count", 0), - secrets_processed=output.get("secrets_processed", 0), - secrets_added=output.get("secrets_added", 0), - secrets_modified=output.get("secrets_modified", 0), - secrets_removed=output.get("secrets_removed", 0), - secrets_unchanged=output.get("secrets_unchanged", 0), - duration_ms=output.get("duration_ms", 0), - error_message=output.get("error_message", ""), - results_json=json.dumps(output.get("results", [])), - diff_output=output.get("diff_output", ""), + success=safe_output.get("success", False), + target_count=safe_output.get("target_count", 0), + secrets_processed=safe_output.get("secrets_processed", 0), + secrets_added=safe_output.get("secrets_added", 0), + secrets_modified=safe_output.get("secrets_modified", 0), + secrets_removed=safe_output.get("secrets_removed", 0), + secrets_unchanged=safe_output.get("secrets_unchanged", 0), + duration_ms=safe_output.get("duration_ms", 0), + error_message=safe_output.get("error_message", ""), + results_json=json.dumps(safe_output.get("results", [])), + diff_output=safe_output.get("diff_output", ""), ) def to_dict(self) -> ExtendedDict: @@ -221,11 +223,11 @@ def _cli_validate_config(self, config_path: str) -> tuple[bool, str]: ) if result.returncode == 0: return True, "Configuration is valid" - return False, result.stderr or result.stdout + return False, redact_sensitive_text(result.stderr or result.stdout) except subprocess.TimeoutExpired: return False, "Validation timed out" except Exception as e: - return False, str(e) + return False, redact_sensitive_text(e) def get_config_info(self, config_path: str) -> ExtendedDict: """Get detailed information about a configuration. @@ -264,9 +266,9 @@ def _cli_get_config_info(self, config_path: str) -> ConfigInfo: aws_region=cfg.get("aws", {}).get("region", ""), ) except FileNotFoundError: - return ConfigInfo(error_message=f"Configuration file not found: {config_path}") + return ConfigInfo(error_message=f"Configuration file not found: {redact_sensitive_text(config_path)}") except yaml.YAMLError as e: - return ConfigInfo(error_message=f"Error parsing YAML file: {e}") + return ConfigInfo(error_message=f"Error parsing YAML file: {redact_sensitive_text(e)}") def run_pipeline( self, @@ -354,7 +356,9 @@ def _cli_run_pipeline( ) parsed = SyncResult.from_cli_output(output) if result.returncode != 0 and not parsed.error_message: - parsed.error_message = result.stderr or f"secretsync exited with status {result.returncode}" + parsed.error_message = redact_sensitive_text( + result.stderr or f"secretsync exited with status {result.returncode}" + ) return parsed if result.returncode == 0: @@ -365,7 +369,7 @@ def _cli_run_pipeline( return SyncResult( success=False, - error_message=result.stderr or result.stdout, + error_message=redact_sensitive_text(result.stderr or result.stdout), ) except subprocess.TimeoutExpired: return SyncResult( @@ -375,12 +379,12 @@ def _cli_run_pipeline( except json.JSONDecodeError as e: return SyncResult( success=False, - error_message=f"Failed to parse output: {e}", + error_message=f"Failed to parse output: {redact_sensitive_text(e)}", ) except Exception as e: return SyncResult( success=False, - error_message=str(e), + error_message=redact_sensitive_text(e), ) def dry_run(self, config_path: str) -> ExtendedDict: diff --git a/tests/connectors/meshy/test_webhooks.py b/tests/connectors/meshy/test_webhooks.py index 01e3daf..fde430f 100644 --- a/tests/connectors/meshy/test_webhooks.py +++ b/tests/connectors/meshy/test_webhooks.py @@ -218,6 +218,39 @@ def test_handle_webhook_failed_task(self, webhook_handler, mock_repository, webh call_args = mock_repository.record_task_update.call_args assert call_args[1]["error"] == "Generation failed due to invalid prompt" + def test_handle_webhook_redacts_failed_task_error(self, webhook_handler, mock_repository): + """Failed task errors recorded from webhook payloads should be redacted.""" + asset_manifest = AssetManifest( + asset_spec_hash="hash-xyz", + spec_fingerprint="hash-xyz", + project="project1", + asset_intent="creature", + task_graph=[ + TaskGraphEntry( + task_id="task-failed-secret", + service="text3d", + status="IN_PROGRESS", + created_at=datetime.now(timezone.utc), + updated_at=datetime.now(timezone.utc), + ) + ], + ) + mock_repository.find_task_by_id.return_value = _task_lookup_payload("project1", "hash-xyz", asset_manifest) + + payload = MeshyWebhookPayload( + id="task-failed-secret", + status="FAILED", + created_at=1700000000, + task_error={"message": "denied password=hunter2 Authorization: Bearer raw_token"}, + ) + + webhook_handler.handle_webhook(payload) + + error = mock_repository.record_task_update.call_args.kwargs["error"] + assert "hunter2" not in error + assert "raw_token" not in error + assert "[REDACTED]" in error + def test_handle_webhook_downloads_artifact(self, temp_dir, webhook_payload_succeeded): """Test that handler downloads artifacts on success.""" from pathlib import Path @@ -328,6 +361,22 @@ def test_handle_signed_webhook_rejects_invalid_signature( mock_repository.find_task_by_id.assert_not_called() mock_repository.record_task_update.assert_not_called() + def test_handle_signed_webhook_redacts_invalid_payload_error(self, mock_repository): + """Signed payload parse failures should not echo secret-bearing input.""" + payload = b'{"id":"task-12345-abcde","password":"hunter2","authorization":"Bearer raw_token"}' + signature = hmac.new(b"secret", payload, hashlib.sha256).hexdigest() + handler = WebhookHandler(repository=mock_repository, webhook_secret="secret") + + result = handler.handle_signed_webhook(payload, signature) + + assert result["status"] == "error" + assert result["message"] == "Invalid webhook payload" + assert "hunter2" not in result["error"] + assert "raw_token" not in result["error"] + assert "[REDACTED]" in result["error"] + mock_repository.find_task_by_id.assert_not_called() + mock_repository.record_task_update.assert_not_called() + def test_handle_signed_webhook_processes_valid_signature( self, mock_repository, webhook_payload_succeeded ): diff --git a/tests/connectors/test_anthropic.py b/tests/connectors/test_anthropic.py index eb813f6..5edc97f 100644 --- a/tests/connectors/test_anthropic.py +++ b/tests/connectors/test_anthropic.py @@ -276,6 +276,26 @@ def test_handle_error_redacts_sensitive_vendor_message(self): assert "raw_token" not in message assert "[REDACTED]" in message + def test_execute_agent_task_redacts_error_result(self): + """Agent task failures should not expose secrets in public result errors.""" + import httpx + + with patch.object(httpx, "Client"): + connector = AnthropicConnector(api_key="test-key") + + with patch.object( + connector, + "create_message", + side_effect=AnthropicError("failed password=hunter2 Authorization: Bearer raw_token"), + ): + result = connector.execute_agent_task("summarize") + + assert result.success is False + assert result.error is not None + assert "hunter2" not in result.error + assert "raw_token" not in result.error + assert "[REDACTED]" in result.error + class TestClaudeModels: """Tests for Claude model constants. diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index a777960..ece8a67 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -273,6 +273,44 @@ def test_cli_run_pipeline_parses_failure_result_envelope(mock_run: MagicMock, co assert json.loads(str(result["results_json"]))[0]["error"] == "denied" +@patch("subprocess.run") +def test_cli_run_pipeline_redacts_failure_result_envelope( + mock_run: MagicMock, + connector: SecretsConnector, +) -> None: + mock_run.return_value = MagicMock( + returncode=1, + stdout=json.dumps( + { + "success": False, + "error_message": "pipeline failed password=hunter2 Authorization: Bearer raw_token", + "results": [ + { + "target": "prod", + "success": False, + "error": "target denied api_key=key_123", + "password": "hunter2", + } + ], + "diff_output": "changed token=tok_123", + } + ), + stderr="", + ) + + result = connector.run_pipeline("config.yaml") + + assert result["success"] is False + assert "hunter2" not in result["error_message"] + assert "raw_token" not in result["error_message"] + assert "[REDACTED]" in result["error_message"] + assert "hunter2" not in result["results_json"] + assert "key_123" not in result["results_json"] + assert '"password": "[REDACTED]"' in result["results_json"] + assert "tok_123" not in result["diff_output"] + assert "[REDACTED]" in result["diff_output"] + + @patch("subprocess.run") def test_cli_run_pipeline_failure_envelope_uses_stderr_when_error_message_missing( mock_run: MagicMock, @@ -321,6 +359,25 @@ def test_cli_run_pipeline_non_json_failure_uses_cli_output(mock_run: MagicMock, assert result["error_message"] == "not json" +@patch("subprocess.run") +def test_cli_run_pipeline_non_json_failure_redacts_cli_output( + mock_run: MagicMock, + connector: SecretsConnector, +) -> None: + mock_run.return_value = MagicMock( + returncode=1, + stdout="", + stderr="failed password=hunter2 Authorization: Bearer raw_token", + ) + + result = connector.run_pipeline("config.yaml") + + assert result["success"] is False + assert "hunter2" not in result["error_message"] + assert "raw_token" not in result["error_message"] + assert "[REDACTED]" in result["error_message"] + + @patch("subprocess.run") def test_cli_run_pipeline_only_emits_supported_cli_flags(mock_run: MagicMock, connector: SecretsConnector) -> None: mock_run.return_value = MagicMock( @@ -361,6 +418,22 @@ def test_cli_validate_config(mock_run: MagicMock, connector: SecretsConnector) - assert "validate" in args +@patch("subprocess.run") +def test_cli_validate_config_redacts_cli_output(mock_run: MagicMock, connector: SecretsConnector) -> None: + mock_run.return_value = MagicMock( + returncode=1, + stdout="", + stderr="invalid password=hunter2 Authorization: Bearer raw_token", + ) + + validation = connector.validate_config("config.yaml") + + assert validation["valid"] is False + assert "hunter2" not in validation["message"] + assert "raw_token" not in validation["message"] + assert "[REDACTED]" in validation["message"] + + @patch("extended_data.connectors.secrets.SecretsConnector") def test_run_pipeline_tool_default_continue_on_error_matches_cli(mock_connector_class: MagicMock) -> None: mock_connector = mock_connector_class.return_value From f68931166414b9f8b1fc4c23b5c499d89b95c2c9 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:08:17 -0500 Subject: [PATCH 196/287] fix: redact mesh task failures --- src/extended_data/connectors/meshy/animate.py | 6 +-- src/extended_data/connectors/meshy/base.py | 15 +++++++ src/extended_data/connectors/meshy/image3d.py | 8 ++-- .../connectors/meshy/retexture.py | 6 +-- src/extended_data/connectors/meshy/rigging.py | 6 +-- src/extended_data/connectors/meshy/text3d.py | 6 +-- src/extended_data/connectors/registry.py | 19 ++++++--- .../connectors/secrets/__init__.py | 2 +- tests/connectors/meshy/test_meshy_base.py | 10 +++++ tests/connectors/meshy/test_task_ids.py | 40 +++++++++++++++++++ tests/connectors/test_connectors.py | 27 +++++++++++-- tests/connectors/test_secrets.py | 26 ++++++++++++ 12 files changed, 141 insertions(+), 30 deletions(-) diff --git a/src/extended_data/connectors/meshy/animate.py b/src/extended_data/connectors/meshy/animate.py index 0605830..fe6ee56 100644 --- a/src/extended_data/connectors/meshy/animate.py +++ b/src/extended_data/connectors/meshy/animate.py @@ -47,10 +47,8 @@ def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Extende if status == TaskStatus.SUCCEEDED: return result if status == TaskStatus.FAILED: - error = result.get("task_error", {}) - msg = error.get("message", "Unknown error") if isinstance(error, dict) else str(error) - msg = f"Task failed: {msg}" - raise RuntimeError(msg) + error = result.get("task_error") or result.get("error") + raise RuntimeError(base.task_failure_message(error)) if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) diff --git a/src/extended_data/connectors/meshy/base.py b/src/extended_data/connectors/meshy/base.py index 14c9c61..60a71dc 100644 --- a/src/extended_data/connectors/meshy/base.py +++ b/src/extended_data/connectors/meshy/base.py @@ -14,6 +14,7 @@ import threading import time +from collections.abc import Mapping from typing import Any import httpx @@ -112,6 +113,20 @@ def _headers() -> dict[str, str]: } +def task_failure_message(error: Any) -> str: + """Return a public, redacted Meshy task failure message.""" + if isinstance(error, Mapping): + message = error.get("message") or error.get("error") or "Unknown error" + else: + message = error or "Unknown error" + return f"Task failed: {redact_sensitive_text(message)}" + + +def unexpected_response_message(data: Any) -> str: + """Return a public, redacted unexpected-response diagnostic.""" + return f"Unexpected API response: missing 'result' key. Response: {redact_sensitive_text(data)}" + + @retry( retry=retry_if_exception_type((RateLimitError, httpx.TimeoutException)), stop=stop_after_attempt(5), diff --git a/src/extended_data/connectors/meshy/image3d.py b/src/extended_data/connectors/meshy/image3d.py index 8260ae9..e639b5c 100644 --- a/src/extended_data/connectors/meshy/image3d.py +++ b/src/extended_data/connectors/meshy/image3d.py @@ -26,7 +26,7 @@ def create(request: Image3DRequest) -> ExtendedString: ) data = response.json() if "result" not in data: - raise RuntimeError(f"Unexpected API response: missing 'result' key. Response: {data}") + raise RuntimeError(base.unexpected_response_message(data)) return extend_data(data["result"]) @@ -47,7 +47,7 @@ def refine(task_id: str) -> ExtendedString: ) data = response.json() if "result" not in data: - raise RuntimeError(f"Unexpected API response: missing 'result' key. Response: {data}") + raise RuntimeError(base.unexpected_response_message(data)) return extend_data(data["result"]) @@ -73,8 +73,8 @@ def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Extende if status == TaskStatus.SUCCEEDED: return result if status == TaskStatus.FAILED: - msg = f"Task failed: {result.get('error') or 'Unknown error'}" - raise RuntimeError(msg) + error = result.get("task_error") or result.get("error") + raise RuntimeError(base.task_failure_message(error)) if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) diff --git a/src/extended_data/connectors/meshy/retexture.py b/src/extended_data/connectors/meshy/retexture.py index afa0694..b972b01 100644 --- a/src/extended_data/connectors/meshy/retexture.py +++ b/src/extended_data/connectors/meshy/retexture.py @@ -42,10 +42,8 @@ def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Extende if status == TaskStatus.SUCCEEDED: return result if status == TaskStatus.FAILED: - error = result.get("task_error", {}) - msg = error.get("message", "Unknown error") if isinstance(error, dict) else str(error) - msg = f"Task failed: {msg}" - raise RuntimeError(msg) + error = result.get("task_error") or result.get("error") + raise RuntimeError(base.task_failure_message(error)) if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) diff --git a/src/extended_data/connectors/meshy/rigging.py b/src/extended_data/connectors/meshy/rigging.py index bcdbdb7..8a6bdc9 100644 --- a/src/extended_data/connectors/meshy/rigging.py +++ b/src/extended_data/connectors/meshy/rigging.py @@ -42,10 +42,8 @@ def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Extende if status == TaskStatus.SUCCEEDED: return result if status == TaskStatus.FAILED: - error = result.get("task_error", {}) - msg = error.get("message", "Unknown error") if isinstance(error, dict) else str(error) - msg = f"Task failed: {msg}" - raise RuntimeError(msg) + error = result.get("task_error") or result.get("error") + raise RuntimeError(base.task_failure_message(error)) if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) diff --git a/src/extended_data/connectors/meshy/text3d.py b/src/extended_data/connectors/meshy/text3d.py index 26c1829..e142045 100644 --- a/src/extended_data/connectors/meshy/text3d.py +++ b/src/extended_data/connectors/meshy/text3d.py @@ -54,10 +54,8 @@ def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> Extende if status == TaskStatus.SUCCEEDED: return result if status == TaskStatus.FAILED: - error = result.get("task_error", {}) - msg = error.get("message", "Unknown error") if isinstance(error, dict) else str(error) - msg = f"Task failed: {msg}" - raise RuntimeError(msg) + error = result.get("task_error") or result.get("error") + raise RuntimeError(base.task_failure_message(error)) if status == TaskStatus.EXPIRED: msg = "Task expired" raise RuntimeError(msg) diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 0d07a97..9f40874 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -41,6 +41,7 @@ get_extra_for_connector, get_missing_connector_requirements, ) +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data @@ -144,12 +145,18 @@ def _discover_connectors() -> dict[str, builtins.type[VendorConnectorBase]]: continue import warnings - warnings.warn(f"Failed to load connector '{ep.name}': {e}", stacklevel=2) + warnings.warn( + f"Failed to load connector '{redact_sensitive_text(ep.name)}': {redact_sensitive_text(e)}", + stacklevel=2, + ) except Exception as e: # Log but don't fail - allow partial loading import warnings - warnings.warn(f"Failed to load connector '{ep.name}': {e}", stacklevel=2) + warnings.warn( + f"Failed to load connector '{redact_sensitive_text(ep.name)}': {redact_sensitive_text(e)}", + stacklevel=2, + ) _connector_cache = connectors return connectors @@ -166,7 +173,7 @@ def _raise_missing_builtin_connector(name: str, error: ImportError) -> NoReturn: if missing: msg = f"{msg}\nMissing packages: {', '.join(str(package) for package in missing)}" if str(error): - msg = f"{msg}\nOriginal import error: {error}" + msg = f"{msg}\nOriginal import error: {redact_sensitive_text(error)}" raise ImportError(msg) from error @@ -215,7 +222,7 @@ def get_connector_class(name: str) -> builtins.type[VendorConnectorBase]: if name_lower in BUILTIN_CONNECTORS: _raise_unregistered_builtin_connector(name_lower) available = ", ".join(sorted(connectors.keys())) - raise ValueError(f"Unknown connector: {name}. Available: {available}") + raise ValueError(f"Unknown connector: {redact_sensitive_text(name)}. Available: {available}") if name_lower in BUILTIN_CONNECTORS: missing = get_missing_connector_requirements(name_lower) @@ -295,7 +302,7 @@ def _missing_builtin_connector_info(name: str, error: ImportError | None) -> Con """Build metadata for a known built-in connector that cannot be loaded.""" spec = BUILTIN_CONNECTORS[name] error_message = ( - str(error) + redact_sensitive_text(error) if error else "Built-in connector is declared but is not registered in the extended_data.connectors entry point group." ) @@ -338,7 +345,7 @@ def get_connector_info(name: str, *, include_unavailable: bool = True) -> Extend return _missing_builtin_connector_info(connector_name, None).as_dict() available = ", ".join(sorted(connectors.keys())) - raise ValueError(f"Unknown connector: {name}. Available: {available}") + raise ValueError(f"Unknown connector: {redact_sensitive_text(name)}. Available: {available}") def list_connector_info(*, include_unavailable: bool = True) -> ExtendedList[ExtendedDict]: diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 0940820..e3d39f7 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -343,7 +343,7 @@ def _cli_run_pipeline( if result.returncode == 0: return SyncResult( success=False, - error_message=f"Failed to parse output: {e}", + error_message=f"Failed to parse output: {redact_sensitive_text(e)}", ) else: if not isinstance(output, dict) or "success" not in output: diff --git a/tests/connectors/meshy/test_meshy_base.py b/tests/connectors/meshy/test_meshy_base.py index 76ebd53..4604cf7 100644 --- a/tests/connectors/meshy/test_meshy_base.py +++ b/tests/connectors/meshy/test_meshy_base.py @@ -30,3 +30,13 @@ def test_meshy_request_redacts_sensitive_error_body(monkeypatch: pytest.MonkeyPa assert "key_123" not in message assert "raw_token" not in message assert "[REDACTED]" in message + + +def test_task_failure_message_redacts_sensitive_values() -> None: + """Meshy task failure messages should share the connector redaction boundary.""" + message = base.task_failure_message({"message": "failed password=hunter2 Authorization: Bearer raw_token"}) + + assert message.startswith("Task failed:") + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message diff --git a/tests/connectors/meshy/test_task_ids.py b/tests/connectors/meshy/test_task_ids.py index 7d78169..aa0b0c6 100644 --- a/tests/connectors/meshy/test_task_ids.py +++ b/tests/connectors/meshy/test_task_ids.py @@ -4,6 +4,8 @@ from unittest.mock import MagicMock, patch +import pytest + from extended_data.connectors.meshy import animate, image3d, retexture, rigging, text3d from extended_data.connectors.meshy.models import ( AnimationRequest, @@ -158,3 +160,41 @@ def test_retexture_get_returns_extended_payload() -> None: assert isinstance(result, ExtendedDict) assert isinstance(result["model_urls"], ExtendedDict) assert result["model_urls"]["glb"] == "https://example.com/retexture.glb" + + +@pytest.mark.parametrize("module", [text3d, image3d, retexture, rigging, animate]) +def test_meshy_poll_redacts_failed_task_errors(monkeypatch: pytest.MonkeyPatch, module: object) -> None: + """All Meshy polling helpers should redact vendor task failure messages.""" + monkeypatch.setattr( + module, + "get", + lambda task_id: { + "id": task_id, + "status": "FAILED", + "task_error": {"message": "denied password=hunter2 Authorization: Bearer raw_token"}, + "error": "denied api_key=key_123", + }, + ) + + with pytest.raises(RuntimeError) as exc_info: + module.poll("task-secret", interval=0, timeout=1) + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "key_123" not in message + assert "[REDACTED]" in message + + +def test_image3d_create_redacts_unexpected_response() -> None: + """Image3D create diagnostics should not echo secret-bearing response payloads.""" + response = _json_response({"password": "hunter2", "authorization": "Bearer raw_token"}) + + with patch("extended_data.connectors.meshy.image3d.base.request", return_value=response): + with pytest.raises(RuntimeError) as exc_info: + image3d.create(Image3DRequest(image_url="https://example.com/source.png")) + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index bfcff82..cf3c0d2 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -343,19 +343,24 @@ def test_get_connector_class_known_missing_builtin_has_install_hint(self, monkey monkeypatch.setitem( registry._missing_builtin_connectors, "github", - ImportError("No module named 'github'"), + ImportError("No module named 'github' password=hunter2 Authorization: Bearer raw_token"), ) - with pytest.raises(ImportError, match=r"extended-data\[github\]"): + with pytest.raises(ImportError, match=r"extended-data\[github\]") as exc_info: registry.get_connector_class(" github ") + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + def test_get_connector_info_includes_known_missing_builtin(self, monkeypatch): """Registry metadata includes unavailable known connectors.""" monkeypatch.setattr(registry, "_connector_cache", {}) monkeypatch.setitem( registry._missing_builtin_connectors, "github", - ImportError("No module named 'github'"), + ImportError("No module named 'github' password=hunter2 Authorization: Bearer raw_token"), ) info = registry.get_connector_info(" github ") @@ -367,6 +372,22 @@ def test_get_connector_info_includes_known_missing_builtin(self, monkeypatch): assert info["extra"] == "github" assert info["install"] == "pip install extended-data[github]" assert info["class"] == "GitHubConnector" + assert "hunter2" not in info["error"] + assert "raw_token" not in info["error"] + assert "[REDACTED]" in info["error"] + + def test_get_connector_class_redacts_unknown_connector_name(self, monkeypatch): + """Unknown connector diagnostics should not echo secret-bearing names.""" + monkeypatch.setattr(registry, "_connector_cache", {}) + monkeypatch.setattr(registry, "_missing_builtin_connectors", {}) + + with pytest.raises(ValueError) as exc_info: + registry.get_connector_class("password=hunter2 Authorization: Bearer raw_token") + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message def test_get_connector_class_rejects_unregistered_builtin_entry_point(self, monkeypatch): """Declared built-ins must be registered through entry points.""" diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index ece8a67..cfa80c1 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -344,6 +344,32 @@ def test_cli_run_pipeline_success_without_json_is_error(mock_run: MagicMock, con assert "produced no JSON output" in result["error_message"] +@patch("json.loads") +@patch("subprocess.run") +def test_cli_run_pipeline_success_parse_error_is_redacted( + mock_run: MagicMock, + mock_json_loads: MagicMock, + connector: SecretsConnector, +) -> None: + mock_run.return_value = MagicMock( + returncode=0, + stdout="not json", + stderr="", + ) + mock_json_loads.side_effect = json.JSONDecodeError( + "invalid password=hunter2 Authorization: Bearer raw_token", + "", + 0, + ) + + result = connector.run_pipeline("config.yaml") + + assert result["success"] is False + assert "hunter2" not in result["error_message"] + assert "raw_token" not in result["error_message"] + assert "[REDACTED]" in result["error_message"] + + @patch("subprocess.run") def test_cli_run_pipeline_non_json_failure_uses_cli_output(mock_run: MagicMock, connector: SecretsConnector) -> None: mock_run.return_value = MagicMock( From 2c3a0426ce4f2c1be1951380505dacf1cf961b8b Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:11:36 -0500 Subject: [PATCH 197/287] fix: redact cursor agent errors --- .../connectors/cursor/__init__.py | 5 +++- src/extended_data/connectors/cursor/tools.py | 12 ++++++++- tests/connectors/test_cursor.py | 14 +++++++++++ tests/connectors/test_cursor_tools.py | 25 +++++++++++++++++++ 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index 729821a..f325b67 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -408,7 +408,10 @@ def _request_api( @staticmethod def _model_payload(model: BaseModel) -> dict[str, Any]: """Serialize a Cursor model into JSON-compatible API field names.""" - return model.model_dump(mode="json") + payload = model.model_dump(mode="json") + if isinstance(model, Agent) and payload.get("error"): + payload["error"] = sanitize_error(payload["error"]) + return payload # ========================================================================= # Agent Operations diff --git a/src/extended_data/connectors/cursor/tools.py b/src/extended_data/connectors/cursor/tools.py index 232477c..b6e32e7 100644 --- a/src/extended_data/connectors/cursor/tools.py +++ b/src/extended_data/connectors/cursor/tools.py @@ -13,6 +13,16 @@ from extended_data.containers import ExtendedDict, extend_data +def _error_value(error: Any) -> Any: + """Return a sanitized error value while preserving empty values.""" + if not error: + return error + + from extended_data.connectors.cursor import sanitize_error + + return sanitize_error(error) + + def _state_value(state: Any) -> Any: """Return enum values for tool payloads while preserving plain strings.""" return getattr(state, "value", state) @@ -87,7 +97,7 @@ def cursor_get_agent_status(agent_id: str) -> ExtendedDict: { "agent_id": agent.get("id", ""), "state": _state_value(agent.get("state")), - "error": agent.get("error"), + "error": _error_value(agent.get("error")), "pr_url": agent.get("pr_url"), } ) diff --git a/tests/connectors/test_cursor.py b/tests/connectors/test_cursor.py index 6612051..f8bd307 100644 --- a/tests/connectors/test_cursor.py +++ b/tests/connectors/test_cursor.py @@ -124,6 +124,20 @@ def test_sanitize_error_uses_shared_secret_redaction(self): assert "raw_token" not in redacted assert "[REDACTED]" in redacted + def test_agent_model_payload_redacts_error(self): + """Cursor agent payload serialization should redact agent error text.""" + agent = Agent( + id="test-agent-123", + state=AgentState.ERRORED, + error="failed password=hunter2 Authorization: Bearer raw_token", + ) + + payload = CursorConnector._model_payload(agent) + + assert "hunter2" not in payload["error"] + assert "raw_token" not in payload["error"] + assert "[REDACTED]" in payload["error"] + class TestModels: """Tests for Pydantic models.""" diff --git a/tests/connectors/test_cursor_tools.py b/tests/connectors/test_cursor_tools.py index 03d2287..72022a2 100644 --- a/tests/connectors/test_cursor_tools.py +++ b/tests/connectors/test_cursor_tools.py @@ -55,3 +55,28 @@ def test_cursor_get_agent_status(): assert result["agent_id"] == "agent_123" assert result["state"] == "finished" assert result["pr_url"] == "https://github.com/org/repo/pull/1" + + +def test_cursor_get_agent_status_redacts_error(): + """Cursor status tool should not expose secret-bearing agent errors.""" + from extended_data.connectors.cursor.tools import cursor_get_agent_status + + with patch("extended_data.connectors.cursor.CursorConnector") as mock_connector_class: + mock_connector = MagicMock() + mock_agent = extend_data( + { + "id": "agent_123", + "state": AgentState.ERRORED, + "error": "failed password=hunter2 Authorization: Bearer raw_token", + "pr_url": None, + } + ) + mock_connector.get_agent_status.return_value = mock_agent + mock_connector_class.return_value = mock_connector + + result = cursor_get_agent_status(agent_id="agent_123") + + assert isinstance(result, ExtendedDict) + assert "hunter2" not in result["error"] + assert "raw_token" not in result["error"] + assert "[REDACTED]" in result["error"] From 3bcdffef0d8a5c04770d1559a3baa233e0e06615 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:14:31 -0500 Subject: [PATCH 198/287] docs: remove future connector promise --- .../connectors/anthropic/__init__.py | 4 ++-- tests/core/test_release_hygiene.py | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index 321e206..cc83294 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -498,8 +498,8 @@ def execute_agent_task( Note: This is a simplified synchronous implementation. For production agent workflows with tools and multi-turn conversations, consider - using LangChain/LangGraph which will be available in the - extended_data.connectors.ai sub-package. + using an external workflow runner such as LangChain or LangGraph + with the tool builders in extended_data.connectors.ai_tools. """ import time diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 4f00b47..b8fb6c9 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -30,6 +30,7 @@ "vendor_connectors", ) REMOVED_PUBLIC_KEYWORDS = ("prefer_native", "unhump_results") +FUTURE_API_PROMISES = ("will be available", "coming soon") SECRETSSYNC_PROJECT_PATTERNS = ( re.compile(r"\bsecretssync\s+(?:Go\s+)?(?:project|library|repo|repository|CLI|connector|bindings?)\b", re.IGNORECASE), re.compile(r"\b(?:project|library|repo|repository|CLI|connector|bindings?)\s+secretssync\b", re.IGNORECASE), @@ -334,6 +335,25 @@ def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: assert offenders == [] +def test_public_text_does_not_promise_future_api_surfaces() -> None: + """Clean-break docs should describe current surfaces instead of placeholders.""" + offenders: list[str] = [] + paths = [REPO_ROOT / "README.md"] + paths.extend(path for root in (REPO_ROOT / "docs", REPO_ROOT / "examples", REPO_ROOT / "src") for path in root.rglob("*")) + + for path in sorted(path for path in paths if path.is_file()): + if path.suffix in {".pyc", ".png"}: + continue + relative_path = path.relative_to(REPO_ROOT) + for line_number, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): + normalized = line.lower() + for phrase in FUTURE_API_PROMISES: + if phrase in normalized: + offenders.append(f"{relative_path}:{line_number}: {phrase}") + + assert offenders == [] + + def test_public_guidance_names_secrets_sync_roles_precisely() -> None: """Use SecretSync for the product and reserve exact names for CLI modules.""" offenders: list[str] = [] From 9c323b002cb9bf33f4bde667d7d3dd0954541a2a Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:18:00 -0500 Subject: [PATCH 199/287] fix: redact persisted mesh errors --- .../connectors/meshy/persistence/repository.py | 9 ++++++--- tests/connectors/meshy/test_repository.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/extended_data/connectors/meshy/persistence/repository.py b/src/extended_data/connectors/meshy/persistence/repository.py index fbaf752..d3cae7c 100644 --- a/src/extended_data/connectors/meshy/persistence/repository.py +++ b/src/extended_data/connectors/meshy/persistence/repository.py @@ -19,6 +19,7 @@ TaskSubmission, ) from extended_data.connectors.meshy.persistence.utils import compute_spec_hash as util_compute_spec_hash +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, extend_data @@ -156,6 +157,8 @@ def record_task_update( msg = f"Asset {spec_hash} not found for project {project}" raise ValueError(msg) + safe_error = redact_sensitive_text(error) if error else None + # Find existing task entry or create new task_entry = None for entry in asset_record.task_graph: @@ -172,8 +175,8 @@ def record_task_update( if result_paths: task_entry.result_paths.update(result_paths) - if error: - task_entry.error = error + if safe_error: + task_entry.error = safe_error # Record status transition asset_record.history.append( @@ -196,7 +199,7 @@ def record_task_update( updated_at=_utc_now(), payload=payload or {}, result_paths=result_paths or {}, - error=error, + error=safe_error, ) asset_record.task_graph.append(task_entry) diff --git a/tests/connectors/meshy/test_repository.py b/tests/connectors/meshy/test_repository.py index ceb3905..ed08e1b 100644 --- a/tests/connectors/meshy/test_repository.py +++ b/tests/connectors/meshy/test_repository.py @@ -241,6 +241,23 @@ def test_record_task_update_with_error(self, repo_with_task): assert task["status"] == "FAILED" assert task["error"] == "Generation failed" + def test_record_task_update_redacts_error(self, repo_with_task): + """Persisted task errors should be redacted at the repository boundary.""" + repo_with_task.record_task_update( + project="project1", + spec_hash="hash-abc", + task_id="task-12345", + status="FAILED", + error="Generation failed password=hunter2 Authorization: Bearer raw_token", + ) + + asset = repo_with_task.get_asset_record("project1", "hash-abc") + assert asset is not None + task = asset["task_graph"][0] + assert "hunter2" not in task["error"] + assert "raw_token" not in task["error"] + assert "[REDACTED]" in task["error"] + def test_record_task_update_adds_history(self, repo_with_task): """Test that updates add history entries.""" repo_with_task.record_task_update( From dbdc468161923d616db5ff72d7828e4b3a8c142f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:25:27 -0500 Subject: [PATCH 200/287] fix: redact secrets tool payloads --- src/extended_data/connectors/secrets/tools.py | 33 ++++++++--- tests/connectors/test_secrets.py | 55 +++++++++++++++++++ 2 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index fc6cdc5..ab0c39e 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -6,10 +6,12 @@ from __future__ import annotations -from typing import Any +from collections.abc import Mapping +from typing import Any, cast from pydantic import BaseModel, Field +from extended_data.connectors.redaction import redact_sensitive_data from extended_data.containers import ExtendedDict, extend_data @@ -54,6 +56,19 @@ class GetConfigInfoSchema(BaseModel): # ============================================================================= +def _redacted_extended_payload(value: Any) -> ExtendedDict: + """Promote a connector payload after redacting terminal-sensitive fields.""" + return cast(ExtendedDict, extend_data(redact_sensitive_data(value))) + + +def _redacted_mapping(value: Any) -> Mapping[str, Any]: + """Return a redacted mapping view for tool payload summaries.""" + redacted = redact_sensitive_data(value) + if isinstance(redacted, Mapping): + return redacted + return {} + + def validate_config(config_path: str) -> ExtendedDict: """Validate a secrets sync pipeline configuration file. @@ -66,7 +81,7 @@ def validate_config(config_path: str) -> ExtendedDict: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - return extend_data(connector.validate_config(config_path)) + return _redacted_extended_payload(connector.validate_config(config_path)) def run_pipeline( @@ -120,9 +135,9 @@ def run_pipeline( compute_diff=dry_run, ) - result = connector.run_pipeline(config_path, options) + result = _redacted_mapping(connector.run_pipeline(config_path, options)) - return extend_data({ + return _redacted_extended_payload({ "success": result.get("success", False), "target_count": result.get("target_count", 0), "secrets_processed": result.get("secrets_processed", 0), @@ -148,9 +163,9 @@ def dry_run(config_path: str) -> ExtendedDict: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - result = connector.dry_run(config_path) + result = _redacted_mapping(connector.dry_run(config_path)) - return extend_data({ + return _redacted_extended_payload({ "success": result.get("success", False), "target_count": result.get("target_count", 0), "secrets_would_add": result.get("secrets_added", 0), @@ -174,7 +189,7 @@ def get_config_info(config_path: str) -> ExtendedDict: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - return extend_data(connector.get_config_info(config_path)) + return _redacted_extended_payload(connector.get_config_info(config_path)) def get_targets(config_path: str) -> ExtendedDict: @@ -189,7 +204,7 @@ def get_targets(config_path: str) -> ExtendedDict: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - return extend_data(connector.get_targets(config_path)) + return _redacted_extended_payload(connector.get_targets(config_path)) def get_sources(config_path: str) -> ExtendedDict: @@ -204,7 +219,7 @@ def get_sources(config_path: str) -> ExtendedDict: from extended_data.connectors.secrets import SecretsConnector connector = SecretsConnector() - return extend_data(connector.get_sources(config_path)) + return _redacted_extended_payload(connector.get_sources(config_path)) # ============================================================================= diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index cfa80c1..ffe8e24 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -511,6 +511,23 @@ def test_validate_config_tool_returns_extended_payload(mock_connector_class: Mag assert result["config_path"] == "config.yaml" +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_validate_config_tool_redacts_connector_payload(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.validate_config.return_value = { + "valid": False, + "message": "invalid password=hunter2 Authorization: Bearer raw_token", + "config_path": "config.yaml", + } + + result = validate_config("config.yaml") + + assert result["valid"] is False + assert "hunter2" not in result["message"] + assert "raw_token" not in result["message"] + assert "[REDACTED]" in result["message"] + + @patch("extended_data.connectors.secrets.SecretsConnector") def test_dry_run_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: mock_connector = mock_connector_class.return_value @@ -531,6 +548,44 @@ def test_dry_run_tool_returns_extended_payload(mock_connector_class: MagicMock) assert result["secrets_would_add"] == 1 +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_run_pipeline_tool_redacts_connector_payload_summary(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.run_pipeline.return_value = { + "success": False, + "error_message": "pipeline failed password=hunter2 Authorization: Bearer raw_token", + "diff_output": "changed token=tok_123", + } + + result = run_pipeline("config.yaml", dry_run=True) + + assert result["success"] is False + assert "hunter2" not in result["error_message"] + assert "raw_token" not in result["error_message"] + assert "tok_123" not in result["diff_output"] + assert "[REDACTED]" in result["error_message"] + assert "[REDACTED]" in result["diff_output"] + + +@patch("extended_data.connectors.secrets.SecretsConnector") +def test_dry_run_tool_redacts_connector_payload_summary(mock_connector_class: MagicMock) -> None: + mock_connector = mock_connector_class.return_value + mock_connector.dry_run.return_value = { + "success": False, + "error_message": "dry run failed password=hunter2 Authorization: Bearer raw_token", + "diff_output": "changed token=tok_123", + } + + result = dry_run("config.yaml") + + assert result["success"] is False + assert "hunter2" not in result["error_message"] + assert "raw_token" not in result["error_message"] + assert "tok_123" not in result["diff_output"] + assert "[REDACTED]" in result["error_message"] + assert "[REDACTED]" in result["diff_output"] + + @patch("extended_data.connectors.secrets.SecretsConnector") def test_get_config_info_tool_returns_extended_payload(mock_connector_class: MagicMock) -> None: mock_connector = mock_connector_class.return_value From 02602385ab25c082943f468e851ce7636f8dee50 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:29:00 -0500 Subject: [PATCH 201/287] fix: redact unknown tool frameworks --- src/extended_data/connectors/ai_tools.py | 10 +++++++++- src/extended_data/connectors/anthropic/tools.py | 3 ++- src/extended_data/connectors/aws/tools.py | 3 ++- src/extended_data/connectors/cursor/tools.py | 3 ++- src/extended_data/connectors/github/tools.py | 3 ++- src/extended_data/connectors/google/tools.py | 3 ++- src/extended_data/connectors/meshy/tools.py | 3 ++- src/extended_data/connectors/secrets/tools.py | 3 ++- src/extended_data/connectors/slack/tools.py | 3 ++- src/extended_data/connectors/vault/tools.py | 3 ++- src/extended_data/connectors/zoom/tools.py | 3 ++- tests/connectors/test_tool_frameworks.py | 15 +++++++++++++++ 12 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/extended_data/connectors/ai_tools.py b/src/extended_data/connectors/ai_tools.py index 85afde0..53de546 100644 --- a/src/extended_data/connectors/ai_tools.py +++ b/src/extended_data/connectors/ai_tools.py @@ -9,10 +9,11 @@ import builtins from collections.abc import Callable, Iterable, Mapping -from typing import Any, cast +from typing import Any, NoReturn, cast from pydantic import BaseModel +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, extend_data @@ -39,6 +40,13 @@ def get_pydantic_schema(model: builtins.type[BaseModel]) -> ExtendedDict: return cast(ExtendedDict, extend_data(schema)) +def raise_unknown_tool_framework(framework: str) -> NoReturn: + """Raise a redacted unknown-framework diagnostic for AI tool factories.""" + safe_framework = redact_sensitive_text(framework) + msg = f"Unknown framework: {safe_framework}. Options: auto, langchain, crewai, strands" + raise ValueError(msg) + + def build_langchain_tools(tool_definitions: Iterable[Mapping[str, Any]]) -> list[Any]: """Build LangChain StructuredTools from connector tool definition mappings.""" try: diff --git a/src/extended_data/connectors/anthropic/tools.py b/src/extended_data/connectors/anthropic/tools.py index 436969e..5240e12 100644 --- a/src/extended_data/connectors/anthropic/tools.py +++ b/src/extended_data/connectors/anthropic/tools.py @@ -11,6 +11,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, ExtendedList, extend_data @@ -154,7 +155,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}") + return raise_unknown_tool_framework(framework) __all__ = [ diff --git a/src/extended_data/connectors/aws/tools.py b/src/extended_data/connectors/aws/tools.py index 5c7e9ee..157a339 100644 --- a/src/extended_data/connectors/aws/tools.py +++ b/src/extended_data/connectors/aws/tools.py @@ -32,6 +32,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, ExtendedList, extend_data @@ -386,7 +387,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}") + return raise_unknown_tool_framework(framework) # ============================================================================= diff --git a/src/extended_data/connectors/cursor/tools.py b/src/extended_data/connectors/cursor/tools.py index b6e32e7..2a3e94f 100644 --- a/src/extended_data/connectors/cursor/tools.py +++ b/src/extended_data/connectors/cursor/tools.py @@ -10,6 +10,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, extend_data @@ -167,7 +168,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}") + return raise_unknown_tool_framework(framework) __all__ = [ diff --git a/src/extended_data/connectors/github/tools.py b/src/extended_data/connectors/github/tools.py index 01f8c15..f4efd19 100644 --- a/src/extended_data/connectors/github/tools.py +++ b/src/extended_data/connectors/github/tools.py @@ -10,6 +10,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, ExtendedList, extend_data @@ -354,7 +355,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}") + return raise_unknown_tool_framework(framework) # ============================================================================= diff --git a/src/extended_data/connectors/google/tools.py b/src/extended_data/connectors/google/tools.py index 5319444..1583958 100644 --- a/src/extended_data/connectors/google/tools.py +++ b/src/extended_data/connectors/google/tools.py @@ -30,6 +30,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, ExtendedList, extend_data @@ -427,7 +428,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands") + return raise_unknown_tool_framework(framework) # ============================================================================= diff --git a/src/extended_data/connectors/meshy/tools.py b/src/extended_data/connectors/meshy/tools.py index 2a92f5f..59022c3 100644 --- a/src/extended_data/connectors/meshy/tools.py +++ b/src/extended_data/connectors/meshy/tools.py @@ -12,6 +12,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, extend_data @@ -636,7 +637,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands") + return raise_unknown_tool_framework(framework) # ============================================================================= diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index ab0c39e..e9c8c8f 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -11,6 +11,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.connectors.redaction import redact_sensitive_data from extended_data.containers import ExtendedDict, extend_data @@ -326,7 +327,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}") + return raise_unknown_tool_framework(framework) # ============================================================================= diff --git a/src/extended_data/connectors/slack/tools.py b/src/extended_data/connectors/slack/tools.py index 2c2cc0b..a3bbadd 100644 --- a/src/extended_data/connectors/slack/tools.py +++ b/src/extended_data/connectors/slack/tools.py @@ -29,6 +29,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, ExtendedList, extend_data @@ -377,7 +378,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}. Options: auto, langchain, crewai, strands") + return raise_unknown_tool_framework(framework) # ============================================================================= diff --git a/src/extended_data/connectors/vault/tools.py b/src/extended_data/connectors/vault/tools.py index 6f662b6..d6ec738 100644 --- a/src/extended_data/connectors/vault/tools.py +++ b/src/extended_data/connectors/vault/tools.py @@ -11,6 +11,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, ExtendedList, extend_data @@ -173,7 +174,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}") + return raise_unknown_tool_framework(framework) # ============================================================================= diff --git a/src/extended_data/connectors/zoom/tools.py b/src/extended_data/connectors/zoom/tools.py index 4653f27..a790c47 100644 --- a/src/extended_data/connectors/zoom/tools.py +++ b/src/extended_data/connectors/zoom/tools.py @@ -10,6 +10,7 @@ from pydantic import BaseModel, Field +from extended_data.connectors.ai_tools import raise_unknown_tool_framework from extended_data.containers import ExtendedDict, ExtendedList, extend_data @@ -206,7 +207,7 @@ def get_tools(framework: str = "auto") -> list[Any]: if framework == "strands": return get_strands_tools() - raise ValueError(f"Unknown framework: {framework}") + return raise_unknown_tool_framework(framework) # ============================================================================= diff --git a/tests/connectors/test_tool_frameworks.py b/tests/connectors/test_tool_frameworks.py index b35e3a0..1c8b036 100644 --- a/tests/connectors/test_tool_frameworks.py +++ b/tests/connectors/test_tool_frameworks.py @@ -28,3 +28,18 @@ def test_get_tools_rejects_functions_alias(module_path: str) -> None: with pytest.raises(ValueError, match="Unknown framework"): module.get_tools("functions") + + +@pytest.mark.parametrize("module_path", TOOL_MODULES) +def test_get_tools_redacts_unknown_framework_diagnostics(module_path: str) -> None: + """Unknown framework diagnostics should not echo secret-bearing input.""" + module = importlib.import_module(module_path) + + with pytest.raises(ValueError) as exc_info: + module.get_tools("password=hunter2 Authorization: Bearer raw_token") + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + assert "auto, langchain, crewai, strands" in message From 5e06fbe6b57aefa8054526e7ab8b072c2685a926 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:31:55 -0500 Subject: [PATCH 202/287] fix: redact vault diagnostics --- .../connectors/vault/__init__.py | 76 ++++++++++-------- tests/connectors/test_vault_connector.py | 78 ++++++++++++++++++- 2 files changed, 122 insertions(+), 32 deletions(-) diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 6f3bf05..665480e 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -8,6 +8,7 @@ from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.logging import Logging from extended_data.primitives import is_nothing @@ -42,6 +43,11 @@ def _load_hvac() -> Any: VAULT_APPROLE_PATH_ENV_VAR = "VAULT_APPROLE_PATH" +def _safe_log_text(value: Any) -> str: + """Return a redacted string for Vault diagnostic output.""" + return redact_sensitive_text(value) + + class VaultConnector(VendorConnectorBase): """Vault connector with token and AppRole authentication.""" @@ -91,7 +97,7 @@ def vault_client(self) -> hvac.Client: return self._vault_client except VaultError as e: - self.logger.exception(f"Error initializing Vault client with token: {e}") + self.logger.exception(f"Error initializing Vault client with token: {_safe_log_text(e)}") # Fallback to AppRole authentication self.logger.info("Attempting AppRole authentication") @@ -120,7 +126,7 @@ def vault_client(self) -> hvac.Client: return self._vault_client except VaultError as e: - self.logger.exception(f"Error during AppRole authentication: {e}") + self.logger.exception(f"Error during AppRole authentication: {_safe_log_text(e)}") raise msg = "Vault authentication failed: no valid token or AppRole credentials provided" @@ -142,7 +148,7 @@ def _set_token_expiration(self) -> None: # No need to manually set tzinfo if running on Python 3.7 or newer. # If supporting Python <3.7, manual tzinfo assignment is required. except VaultError as e: - self.logger.exception(f"Failed to lookup Vault token expiration: {e}") + self.logger.exception(f"Failed to lookup Vault token expiration: {_safe_log_text(e)}") def _is_token_valid(self) -> bool: """Check if the current Vault token is still valid.""" @@ -194,7 +200,7 @@ def list_secrets( raise ValueError(msg) display_root = root_path if root_path not in (None, "", "/") else "/" - self.logger.info(f"Listing Vault secrets from {mount_point}{display_root}") + self.logger.info(f"Listing Vault secrets from {_safe_log_text(mount_point)}{_safe_log_text(display_root)}") secrets: dict[str, dict[str, Any]] = {} client = self.vault_client @@ -214,7 +220,7 @@ def list_secrets( for key in root_result.get("data", {}).get("keys", []) ] except VaultError as e: - self.logger.warning(f"Invalid root path {display_root}: {e}") + self.logger.warning(f"Invalid root path {_safe_log_text(display_root)}: {_safe_log_text(e)}") return self.extend_result(secrets) stack: deque[tuple[str, int]] = deque(initial_paths) @@ -230,9 +236,9 @@ def list_secrets( mount_point=mount_point, )["data"]["data"] secrets[current_path] = secret_data - self.logger.debug(f"Retrieved secret: {current_path}") + self.logger.debug(f"Retrieved secret: {_safe_log_text(current_path)}") except VaultError as e: - self.logger.warning(f"Failed to read secret {current_path}: {e}") + self.logger.warning(f"Failed to read secret {_safe_log_text(current_path)}: {_safe_log_text(e)}") # It's a directory, list its contents if within max_depth elif max_depth is None or depth < max_depth: try: @@ -245,7 +251,7 @@ def list_secrets( new_path = f"{current_path}{key}" # current_path already ends with / stack.append((new_path, depth + 1)) except VaultError as e: - self.logger.warning(f"Failed to list path {current_path}: {e}") + self.logger.warning(f"Failed to list path {_safe_log_text(current_path)}: {_safe_log_text(e)}") self.logger.info(f"Listed {len(secrets)} Vault secrets") return self.extend_result(secrets) @@ -274,7 +280,7 @@ def read_secret( return None return self.extend_result(data) except VaultError as e: - self.logger.warning(f"Failed to read secret {path}: {e}") + self.logger.warning(f"Failed to read secret {_safe_log_text(path)}: {_safe_log_text(e)}") return None def get_secret( @@ -300,7 +306,9 @@ def get_secret( Returns: Secret data dict, or None if not found. """ - self.logger.debug(f"Getting Vault secret: path={path}, secret_name={secret_name}") + self.logger.debug( + f"Getting Vault secret: path={_safe_log_text(path)}, secret_name={_safe_log_text(secret_name)}" + ) client = self.vault_client secret_data = None @@ -309,29 +317,29 @@ def get_secret( if not is_nothing(secret_name): # Build the full path: path/secret_name or just secret_name if path is "/" secret_path = f"{path}/{secret_name}" if path and path != "/" else secret_name - self.logger.debug(f"Resolved secret path: {secret_path}") + self.logger.debug(f"Resolved secret path: {_safe_log_text(secret_path)}") try: secret_data = client.secrets.kv.v2.read_secret_version(path=secret_path, mount_point=mount_point)[ "data" ]["data"] - self.logger.debug(f"Retrieved secret data for {secret_path}") + self.logger.debug(f"Retrieved secret data for {_safe_log_text(secret_path)}") except VaultError as e: self.logger.warning( - f"Failed to find secret at {path}" - + (f"/{secret_name}" if not is_nothing(secret_name) else "") - + f": {e}" + f"Failed to find secret at {_safe_log_text(path)}" + + (f"/{_safe_log_text(secret_name)}" if not is_nothing(secret_name) else "") + + f": {_safe_log_text(e)}" ) return self.extend_result(secret_data) if secret_data is not None else None # No secret_name provided - search under path - self.logger.info(f"Finding secrets under {path}") + self.logger.info(f"Finding secrets under {_safe_log_text(path)}") matching_secret_paths = self.list_secrets(root_path=path, mount_point=mount_point) self.logger.debug(f"Found {len(matching_secret_paths)} potential secrets") if is_nothing(matching_secret_paths): - self.logger.warning(f"No secrets found matching {path}") + self.logger.warning(f"No secrets found matching {_safe_log_text(path)}") return None # Convert to deque for efficient popleft iteration @@ -339,15 +347,18 @@ def get_secret( while path_queue and secret_data is None: secret_path = path_queue.popleft() - self.logger.debug(f"Checking secret path: {secret_path}") + safe_secret_path = _safe_log_text(secret_path) + self.logger.debug(f"Checking secret path: {safe_secret_path}") try: matching_secret_data = client.secrets.kv.v2.read_secret_version( path=secret_path, mount_point=mount_point )["data"]["data"] - self.logger.debug(f"Secret data for {secret_path}: {list(matching_secret_data.keys())}") + self.logger.debug( + f"Secret data for {safe_secret_path}: {_safe_log_text(list(matching_secret_data.keys()))}" + ) except VaultError: - self.logger.warning(f"{secret_path} is empty or invalid, skipping it") + self.logger.warning(f"{safe_secret_path} is empty or invalid, skipping it") continue # If no matchers, take the first non-empty secret @@ -361,7 +372,7 @@ def get_secret( for k, v in matchers.items(): datum = matching_secret_data.get(k) if datum == v: - self.logger.info(f"Matching {secret_path} on matcher {k}: {datum} equals {v}") + self.logger.info(f"Matched {safe_secret_path} on matcher {_safe_log_text(k)}") found_match = True break @@ -392,10 +403,10 @@ def write_secret( secret=data, mount_point=mount_point, ) - self.logger.info(f"Wrote secret to {path}") + self.logger.info(f"Wrote secret to {_safe_log_text(path)}") return True except VaultError as e: - self.logger.exception(f"Failed to write secret {path}: {e}") + self.logger.exception(f"Failed to write secret {_safe_log_text(path)}: {_safe_log_text(e)}") return False # --------------------------------------------------------------------- @@ -424,14 +435,16 @@ def list_aws_iam_roles( try: response = aws_secrets.list_roles(mount_point=mount_point) except VaultError as e: - self.logger.warning(f"Failed to list AWS IAM roles from mount {mount_point}: {e}") + self.logger.warning( + f"Failed to list AWS IAM roles from mount {_safe_log_text(mount_point)}: {_safe_log_text(e)}" + ) return self.extend_result([]) role_names = response.get("data", {}).get("keys", []) or [] if prefix: role_names = [role for role in role_names if role.startswith(prefix)] - self.logger.info(f"Found {len(role_names)} AWS IAM roles under mount {mount_point}") + self.logger.info(f"Found {len(role_names)} AWS IAM roles under mount {_safe_log_text(mount_point)}") return self.extend_result(role_names) def get_aws_iam_role( @@ -457,12 +470,12 @@ def get_aws_iam_role( try: response = self.vault_client.secrets.aws.read_role(name=role_name, mount_point=mount_point) except VaultError as e: - self.logger.warning(f"Failed to read AWS IAM role {role_name}: {e}") + self.logger.warning(f"Failed to read AWS IAM role {_safe_log_text(role_name)}: {_safe_log_text(e)}") return None role_data = response.get("data") if is_nothing(role_data): - self.logger.warning(f"AWS IAM role {role_name} exists but returned no data") + self.logger.warning(f"AWS IAM role {_safe_log_text(role_name)} exists but returned no data") return None return self.extend_result(role_data) @@ -505,14 +518,15 @@ def generate_aws_credentials( try: response = aws_secrets.generate_credentials(name=role_name, mount_point=mount_point, **generate_kwargs) except VaultError as e: - self.logger.exception(f"Failed to generate AWS credentials for role {role_name}: {e}") - raise RuntimeError(f"Failed to generate AWS credentials for role {role_name}") from e + safe_role_name = _safe_log_text(role_name) + self.logger.exception(f"Failed to generate AWS credentials for role {safe_role_name}: {_safe_log_text(e)}") + raise RuntimeError(f"Failed to generate AWS credentials for role {safe_role_name}") from e credentials = response.get("data") or {} if not credentials: - raise RuntimeError(f"Vault returned empty credentials for role {role_name}") + raise RuntimeError(f"Vault returned empty credentials for role {_safe_log_text(role_name)}") - self.logger.info(f"Generated AWS credentials for role {role_name}") + self.logger.info(f"Generated AWS credentials for role {_safe_log_text(role_name)}") return self.extend_result(credentials) diff --git a/tests/connectors/test_vault_connector.py b/tests/connectors/test_vault_connector.py index cd321a2..058c750 100644 --- a/tests/connectors/test_vault_connector.py +++ b/tests/connectors/test_vault_connector.py @@ -12,10 +12,19 @@ from hvac.exceptions import VaultError -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.vault import VaultConnector +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join( + str(arg) + for call in logger.method_calls + for arg in call.args + ) + + class TestVaultConnector: """Test suite for VaultConnector.""" @@ -147,6 +156,27 @@ def test_list_secrets_handles_invalid_root(self, base_connector_kwargs): mount_point="secret", ) + def test_list_secrets_redacts_vault_error_logs(self, base_connector_kwargs): + """Vault list failures should not log raw secret-bearing exception text.""" + connector = VaultConnector( + vault_url="https://vault.example.com", vault_token="test-token", **base_connector_kwargs + ) + + mock_client = MagicMock() + connector._vault_client = mock_client + connector._vault_token_expiration = datetime(2099, 1, 1, tzinfo=timezone.utc) + mock_client.secrets.kv.v2.list_secrets.side_effect = VaultError( + "denied password=hunter2 Authorization: Bearer raw_token" + ) + + secrets = connector.list_secrets(root_path="does/not/exist") + + logs = _logged_text(connector.logger) + assert secrets == {} + assert "hunter2" not in logs + assert "raw_token" not in logs + assert "[REDACTED]" in logs + def test_list_secrets_rejects_path_traversal(self, base_connector_kwargs): """Ensure list_secrets rejects path traversal in root_path.""" connector = VaultConnector( @@ -242,6 +272,27 @@ def test_get_aws_iam_role_handles_errors(self, base_connector_kwargs): assert connector.get_aws_iam_role(role_name="missing") is None + def test_get_secret_matcher_logs_redact_secret_values(self, base_connector_kwargs): + """Matcher-success logs should not expose matched Vault secret values.""" + connector = VaultConnector( + vault_url="https://vault.example.com", vault_token="test-token", **base_connector_kwargs + ) + + mock_client = MagicMock() + connector._vault_client = mock_client + connector._vault_token_expiration = datetime(2099, 1, 1, tzinfo=timezone.utc) + connector.list_secrets = MagicMock(return_value=extend_data({"prod/db": {}})) # type: ignore[method-assign] + mock_client.secrets.kv.v2.read_secret_version.return_value = { + "data": {"data": {"password": "hunter2", "username": "admin"}} + } + + secret = connector.get_secret(path="prod", matchers={"password": "hunter2"}) + + logs = _logged_text(connector.logger) + assert secret == {"password": "hunter2", "username": "admin"} + assert "hunter2" not in logs + assert "Matched prod/db on matcher password" in logs + def test_generate_aws_credentials_success(self, base_connector_kwargs): """generate_aws_credentials should return the generated credential payload.""" connector = VaultConnector( @@ -282,3 +333,28 @@ def test_generate_aws_credentials_error(self, base_connector_kwargs): with pytest.raises(RuntimeError): connector.generate_aws_credentials(role_name="prod") + + def test_generate_aws_credentials_redacts_error_diagnostics(self, base_connector_kwargs): + """Vault credential failures should redact role names and exception payloads.""" + connector = VaultConnector( + vault_url="https://vault.example.com", vault_token="test-token", **base_connector_kwargs + ) + + mock_client = MagicMock() + connector._vault_client = mock_client + connector._vault_token_expiration = datetime(2099, 1, 1, tzinfo=timezone.utc) + mock_client.secrets.aws.generate_credentials.side_effect = VaultError( + "denied api_key=key_123 Authorization: Bearer raw_token" + ) + + with pytest.raises(RuntimeError) as exc_info: + connector.generate_aws_credentials(role_name="prod password=hunter2") + + logs = _logged_text(connector.logger) + message = str(exc_info.value) + assert "hunter2" not in logs + assert "key_123" not in logs + assert "raw_token" not in logs + assert "hunter2" not in message + assert "[REDACTED]" in logs + assert "[REDACTED]" in message From 7359f5cfcf86a58844140b4f741d0eba14a6701e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:36:14 -0500 Subject: [PATCH 203/287] feat: promote redaction primitives --- README.md | 19 +++++++------ docs/package-surface.md | 28 +++++++++++++------ src/extended_data/connectors/ai_tools.py | 2 +- .../connectors/anthropic/__init__.py | 2 +- src/extended_data/connectors/base.py | 2 +- src/extended_data/connectors/cli.py | 2 +- .../connectors/cursor/__init__.py | 2 +- src/extended_data/connectors/google/jules.py | 2 +- src/extended_data/connectors/mcp.py | 2 +- src/extended_data/connectors/meshy/base.py | 2 +- src/extended_data/connectors/meshy/mcp.py | 2 +- .../meshy/persistence/repository.py | 2 +- .../connectors/meshy/webhooks/handler.py | 2 +- src/extended_data/connectors/registry.py | 2 +- .../connectors/secrets/__init__.py | 2 +- src/extended_data/connectors/secrets/tools.py | 2 +- .../connectors/slack/__init__.py | 2 +- .../connectors/vault/__init__.py | 2 +- src/extended_data/io/exporters.py | 3 +- src/extended_data/primitives/__init__.py | 3 ++ .../{connectors => primitives}/redaction.py | 4 +-- tests/core/test_export_utils.py | 11 ++++++++ tests/{connectors => core}/test_redaction.py | 6 ++-- 23 files changed, 67 insertions(+), 39 deletions(-) rename src/extended_data/{connectors => primitives}/redaction.py (94%) rename tests/{connectors => core}/test_redaction.py (86%) diff --git a/README.md b/README.md index a748be5..59f5cf4 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ CrewAI releases pull vulnerable `chromadb` versions transitively. ```python from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file -from extended_data.primitives import decode_json, encode_yaml, number_to_words +from extended_data.primitives import decode_json, encode_yaml, number_to_words, redact_sensitive_text logger = Logging(logger_name="example") inputs = InputProvider(inputs={"GITHUB_OWNER": "jbcom"}, from_environment=False) @@ -51,6 +51,7 @@ workflow = DataWorkflow.from_value(payload).then(("normalize", lambda data: data print(encode_yaml(payload)) print(decoded_file["service"]["name"].upper_first()) print(number_to_words(42)) +print(redact_sensitive_text("Authorization: Bearer raw_token")) print(workflow.as_builtin()) ``` @@ -114,9 +115,11 @@ Tier 1 primitive names are explicit in this major version and live under `bytes_to_string()` for bytes-like coercion and `string_to_bool()`, `string_to_int()`, `string_to_float()`, `string_to_path()`, `string_to_date()`, `string_to_datetime()`, and `string_to_time()` for scalar -string conversion. The old `bytestostr` and `strto*` helper names are not -preserved. Old package import namespaces are not shimmed; missing imports are -intentional so remaining migration work fails fast. +string conversion. Use `redact_sensitive_text()` and +`redact_sensitive_data()` for diagnostic and JSON-like payload redaction. The +old `bytestostr` and `strto*` helper names are not preserved. Old package +import namespaces are not shimmed; missing imports are intentional so remaining +migration work fails fast. Tier 1 public exports stay function-oriented; use `get_default_dict()` for nested or sorted default mappings instead of importing the internal helper class. @@ -143,10 +146,10 @@ payload contract; framework factory functions still return framework tool objects. The generic CLI `call` command and MCP bridge expose only methods that advertise Extended Data payload returns. -Serialized CLI/MCP boundaries and connector API error messages redact common -secret-bearing keys and token-shaped strings, so connector data methods can -return structured vendor payloads without making stdout, tool responses, or -raised transport errors a secret leak by default. +Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 +redaction primitives for common secret-bearing keys and token-shaped strings, +so connector data methods can return structured vendor payloads without making +stdout, tool responses, or raised transport errors a secret leak by default. Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. diff --git a/docs/package-surface.md b/docs/package-surface.md index 682c245..0ac55c4 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -28,14 +28,20 @@ from extended_data import ( extend_data, to_builtin, ) -from extended_data.primitives import decode_json, encode_yaml, normalize_data_encoding, number_to_words +from extended_data.primitives import ( + decode_json, + encode_yaml, + normalize_data_encoding, + number_to_words, + redact_sensitive_text, +) ``` ## Tiers - Tier 1 `extended_data.primitives` modules are pure functions and codecs for - strings, numbers, maps, lists, matching, state, type coercion, and structured - formats. + strings, numbers, maps, lists, matching, state, redaction, type coercion, and + structured formats. - Tier 2 `extended_data.containers` classes wrap Python container primitives as `ExtendedString`, `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and `ExtendedSet` with ergonomic methods over Tier 1 primitives. They use @@ -55,6 +61,9 @@ from the package root. Tier 1 public exports stay function-oriented; use `get_default_dict()` when a workflow needs nested or sorted default mappings rather than importing the internal sorted-default mapping helper class. +Use `redact_sensitive_text()` and `redact_sensitive_data()` when diagnostics or +JSON-like payloads need common secret-bearing keys and token-shaped strings +removed before display. Direct JSON, YAML, TOML, and HCL primitive decode failures raise `DataDecodeError` with format and position context while preserving the parser @@ -279,12 +288,13 @@ their payload returns as `ExtendedDict` or `ExtendedList[ExtendedDict]`. The generic CLI `call` command and MCP bridge expose only connector methods that advertise Extended Data payload returns, so raw SDK client factories and low-level HTTP helpers do not leak into serialized tool catalogs. -Serialized CLI/MCP boundaries apply redaction after Tier 2 containers are -lowered to JSON-compatible data, and connector API error messages use the same -redaction policy before exceptions are raised. Common secret-bearing keys such -as `password`, `api_key`, `access_token`, `authorization`, and `client_secret`, -plus token-like strings in error text, are replaced with `[REDACTED]` before -CLI stdout/stderr, MCP tool responses, or raised transport errors expose them. +Serialized CLI/MCP boundaries apply Tier 1 redaction after Tier 2 containers +are lowered to JSON-compatible data, and connector API error messages use the +same redaction policy before exceptions are raised. Common secret-bearing keys +such as `password`, `api_key`, `access_token`, `authorization`, and +`client_secret`, plus token-like strings in error text, are replaced with +`[REDACTED]` before CLI stdout/stderr, MCP tool responses, or raised transport +errors expose them. LangChain, CrewAI, Strands, and auto-detection factory functions still return plain framework tool object lists. diff --git a/src/extended_data/connectors/ai_tools.py b/src/extended_data/connectors/ai_tools.py index 53de546..a28558b 100644 --- a/src/extended_data/connectors/ai_tools.py +++ b/src/extended_data/connectors/ai_tools.py @@ -13,8 +13,8 @@ from pydantic import BaseModel -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, extend_data +from extended_data.primitives.redaction import redact_sensitive_text def get_pydantic_schema(model: builtins.type[BaseModel]) -> ExtendedDict: diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index cc83294..945fc36 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -38,9 +38,9 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin from extended_data.logging import Logging +from extended_data.primitives.redaction import redact_sensitive_text if TYPE_CHECKING: diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index bda7a9f..a30fc25 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -44,9 +44,9 @@ def my_operation(self) -> ExtendedDict: wait_exponential, ) -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.inputs import InputProvider from extended_data.logging import Logging +from extended_data.primitives.redaction import redact_sensitive_text if sys.version_info >= (3, 11): diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index 10e4431..2c2aebb 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -23,7 +23,6 @@ from collections.abc import Mapping from typing import Any -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.connectors.registry import ( get_connector, get_connector_class, @@ -33,6 +32,7 @@ from extended_data.connectors.surface import connector_data_methods, is_connector_data_method from extended_data.containers import ExtendedList from extended_data.containers.factory import to_builtin +from extended_data.primitives.redaction import redact_sensitive_text def _json_output(data: Any) -> str: diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index f325b67..c4a2592 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -34,9 +34,9 @@ from pydantic import BaseModel, ConfigDict, Field from extended_data.connectors.base import VendorConnectorBase -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging +from extended_data.primitives.redaction import redact_sensitive_text if TYPE_CHECKING: diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index dee54ed..15e79ba 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -34,8 +34,8 @@ from pydantic import BaseModel, Field from extended_data.connectors.base import VendorConnectorBase -from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList +from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text __all__ = [ diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index a18269d..a56a110 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -28,10 +28,10 @@ from collections.abc import Callable, Iterable, Mapping from typing import Any, cast -from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.connectors.registry import _list_connector_classes, get_connector from extended_data.connectors.surface import connector_data_methods from extended_data.containers import to_builtin +from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text def _check_mcp_installed() -> bool: diff --git a/src/extended_data/connectors/meshy/base.py b/src/extended_data/connectors/meshy/base.py index 60a71dc..9568b4d 100644 --- a/src/extended_data/connectors/meshy/base.py +++ b/src/extended_data/connectors/meshy/base.py @@ -21,8 +21,8 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.inputs import InputProvider +from extended_data.primitives.redaction import redact_sensitive_text class RateLimitError(Exception): diff --git a/src/extended_data/connectors/meshy/mcp.py b/src/extended_data/connectors/meshy/mcp.py index 2aac1c4..4870d62 100644 --- a/src/extended_data/connectors/meshy/mcp.py +++ b/src/extended_data/connectors/meshy/mcp.py @@ -34,8 +34,8 @@ from collections.abc import Callable, Iterable, Mapping from typing import Any, cast -from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.containers import to_builtin +from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text MCP_INSTALL_MESSAGE = "MCP SDK not installed. Install with: pip install extended-data[meshy,mcp]" diff --git a/src/extended_data/connectors/meshy/persistence/repository.py b/src/extended_data/connectors/meshy/persistence/repository.py index d3cae7c..aa8e39c 100644 --- a/src/extended_data/connectors/meshy/persistence/repository.py +++ b/src/extended_data/connectors/meshy/persistence/repository.py @@ -19,8 +19,8 @@ TaskSubmission, ) from extended_data.connectors.meshy.persistence.utils import compute_spec_hash as util_compute_spec_hash -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, extend_data +from extended_data.primitives.redaction import redact_sensitive_text def _utc_now() -> datetime: diff --git a/src/extended_data/connectors/meshy/webhooks/handler.py b/src/extended_data/connectors/meshy/webhooks/handler.py index 942f197..784f93f 100644 --- a/src/extended_data/connectors/meshy/webhooks/handler.py +++ b/src/extended_data/connectors/meshy/webhooks/handler.py @@ -10,8 +10,8 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.webhooks.schemas import MeshyWebhookPayload -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, extend_data, to_builtin +from extended_data.primitives.redaction import redact_sensitive_text from ..persistence.repository import TaskRepository from ..persistence.schemas import ArtifactRecord diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 9f40874..bfd17ae 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -41,8 +41,8 @@ get_extra_for_connector, get_missing_connector_requirements, ) -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data +from extended_data.primitives.redaction import redact_sensitive_text if TYPE_CHECKING: diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index e3d39f7..d1422e5 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -40,9 +40,9 @@ from typing import Any from extended_data.connectors.base import VendorConnectorBase -from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.containers import ExtendedDict, extend_data from extended_data.logging import Logging +from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text class SyncOperation(str, Enum): diff --git a/src/extended_data/connectors/secrets/tools.py b/src/extended_data/connectors/secrets/tools.py index e9c8c8f..8ce138e 100644 --- a/src/extended_data/connectors/secrets/tools.py +++ b/src/extended_data/connectors/secrets/tools.py @@ -12,8 +12,8 @@ from pydantic import BaseModel, Field from extended_data.connectors.ai_tools import raise_unknown_tool_framework -from extended_data.connectors.redaction import redact_sensitive_data from extended_data.containers import ExtendedDict, extend_data +from extended_data.primitives.redaction import redact_sensitive_data # ============================================================================= diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 9468863..90ab2bb 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -25,11 +25,11 @@ def batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase -from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data, to_builtin from extended_data.io import wrap_raw_data_for_export from extended_data.logging import Logging from extended_data.primitives import is_nothing +from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text class SlackFallbackError(Exception): diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 665480e..4f20b9f 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -8,10 +8,10 @@ from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase -from extended_data.connectors.redaction import redact_sensitive_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.logging import Logging from extended_data.primitives import is_nothing +from extended_data.primitives.redaction import redact_sensitive_text if TYPE_CHECKING: diff --git a/src/extended_data/io/exporters.py b/src/extended_data/io/exporters.py index 52880a4..dc838eb 100644 --- a/src/extended_data/io/exporters.py +++ b/src/extended_data/io/exporters.py @@ -19,6 +19,7 @@ encode_yaml, is_yaml_data, ) +from extended_data.primitives.redaction import redact_sensitive_text from extended_data.primitives.serialization import normalize_data_encoding from extended_data.primitives.types import convert_special_types, string_to_bool @@ -64,7 +65,7 @@ def wrap_raw_data_for_export( allow_encoding_bool = string_to_bool(allow_encoding, raise_on_error=True) allow_encoding = allow_encoding_bool if isinstance(allow_encoding_bool, bool) else allow_encoding except ValueError as e: - raise ValueError(f"Invalid allow_encoding value: {allow_encoding}") from e + raise ValueError(f"Invalid allow_encoding value: {redact_sensitive_text(allow_encoding)}") from e # Determine the encoding based on boolean allow_encoding and YAML data check if allow_encoding: diff --git a/src/extended_data/primitives/__init__.py b/src/extended_data/primitives/__init__.py index 1562cdc..340fedf 100644 --- a/src/extended_data/primitives/__init__.py +++ b/src/extended_data/primitives/__init__.py @@ -39,6 +39,7 @@ number_to_words, to_roman, ) +from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.primitives.sequences import filter_list, flatten_list from extended_data.primitives.serialization import normalize_data_encoding from extended_data.primitives.splitting import split_dict_by_type, split_list_by_type @@ -141,6 +142,8 @@ "pluralize", "reconstruct_special_type", "reconstruct_special_types", + "redact_sensitive_data", + "redact_sensitive_text", "sanitize_key", "singularize", "split_dict_by_type", diff --git a/src/extended_data/connectors/redaction.py b/src/extended_data/primitives/redaction.py similarity index 94% rename from src/extended_data/connectors/redaction.py rename to src/extended_data/primitives/redaction.py index bd802ee..25ef40a 100644 --- a/src/extended_data/connectors/redaction.py +++ b/src/extended_data/primitives/redaction.py @@ -1,4 +1,4 @@ -"""Redaction helpers for connector output boundaries.""" +"""Tier 1 redaction helpers for diagnostics and JSON-like data.""" from __future__ import annotations @@ -48,7 +48,7 @@ def redact_sensitive_text(message: Any) -> str: def redact_sensitive_data(value: Any) -> Any: - """Recursively redact common secret fields in JSON-like connector data.""" + """Recursively redact common secret fields in JSON-like data.""" if isinstance(value, Mapping): redacted: dict[Any, Any] = {} for key, item in value.items(): diff --git a/tests/core/test_export_utils.py b/tests/core/test_export_utils.py index fb445c2..fa25970 100644 --- a/tests/core/test_export_utils.py +++ b/tests/core/test_export_utils.py @@ -156,6 +156,17 @@ def test_wrap_raw_data_for_export_raw_false_and_invalid_values() -> None: wrap_raw_data_for_export(raw_data, allow_encoding="xml") +def test_wrap_raw_data_for_export_redacts_invalid_encoding_value() -> None: + """Invalid export-option diagnostics should not echo secret-bearing values.""" + with pytest.raises(ValueError) as exc_info: + wrap_raw_data_for_export({}, allow_encoding="password=hunter2 Authorization: Bearer raw_token") + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + + def test_wrap_raw_data_for_export_boolean_string_preserves_yaml_native_data() -> None: """Auto-encoding should keep YAML-native tagged values in YAML form.""" raw_data = {"bucket_name": YamlTagged("!Ref", "BucketName")} diff --git a/tests/connectors/test_redaction.py b/tests/core/test_redaction.py similarity index 86% rename from tests/connectors/test_redaction.py rename to tests/core/test_redaction.py index 188ea37..5f52949 100644 --- a/tests/connectors/test_redaction.py +++ b/tests/core/test_redaction.py @@ -1,8 +1,8 @@ -"""Tests for connector output redaction helpers.""" +"""Tests for Tier 1 redaction helpers.""" from __future__ import annotations -from extended_data.connectors.redaction import redact_sensitive_data, redact_sensitive_text +from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text def test_redact_sensitive_text_preserves_json_shape() -> None: @@ -20,7 +20,7 @@ def test_redact_sensitive_text_preserves_json_shape() -> None: def test_redact_sensitive_data_recurses_through_json_like_payloads() -> None: - """Structured redaction should handle nested connector data.""" + """Structured redaction should handle nested JSON-like data.""" payload = { "password": "hunter2", "nested": [{"api_key": "key_123", "value": "ok"}], From 987f76a1ca502859ecf5681028d1c10901a8e952 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:38:51 -0500 Subject: [PATCH 204/287] test: guard redaction primitive surface --- tests/core/test_package_surface.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index b29de94..39704f6 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -227,6 +227,14 @@ def test_tier2_container_methods_expose_integrated_primitives() -> None: assert export_safe == {"launched": "2026-06-10"} +def test_redaction_is_a_tier1_primitive_not_connector_local() -> None: + """Diagnostic redaction should live with reusable Tier 1 utilities.""" + assert primitives.redact_sensitive_text("password=hunter2") == "password=[REDACTED]" + assert primitives.redact_sensitive_data({"api_key": "key_123"}) == {"api_key": "[REDACTED]"} + assert util.find_spec("extended_data.connectors.redaction") is None + assert not hasattr(connectors, "redact_sensitive_text") + + def test_connectors_root_exports_builtin_connector_classes() -> None: """Every built-in registry connector class is exported from the connector package root.""" for spec in BUILTIN_CONNECTORS.values(): From 920b991d02be5e2847003ee96d8cecf3bec2a7c1 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:42:19 -0500 Subject: [PATCH 205/287] fix: redact vendor connector diagnostics --- .../connectors/slack/__init__.py | 13 ++- src/extended_data/connectors/zoom/__init__.py | 37 ++++++-- tests/connectors/test_slack_connector.py | 20 +++++ tests/connectors/test_zoom_connector.py | 87 +++++++++++++++++++ 4 files changed, 145 insertions(+), 12 deletions(-) diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 90ab2bb..b6c0041 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -524,15 +524,16 @@ def _call_api( TimeoutError: If rate-limited retries exceed `MAX_RETRY_TIMEOUT_SECONDS`. """ call = getattr(self.web_client, method, None) + safe_method = redact_sensitive_text(method) if call is None: - raise AttributeError(f"{method} is not supported by the Slack WebClient") + raise AttributeError(f"{safe_method} is not supported by the Slack WebClient") response: Any | None = None attempt = 1 total_delay = 0 while not response: - self.logger.debug(f"[Attempt {attempt}] Calling Slack WebClient {method}...") + self.logger.debug(f"[Attempt {attempt}] Calling Slack WebClient {safe_method}...") try: response = call(**kwargs) except SlackApiError as exc: @@ -540,7 +541,9 @@ def _call_api( delay = int(exc.response.headers["Retry-After"]) total_delay += delay if total_delay > MAX_RETRY_TIMEOUT_SECONDS: - raise TimeoutError(f"Slack WebClient {method} timed out after {total_delay} seconds") from exc + raise TimeoutError( + f"Slack WebClient {safe_method} timed out after {total_delay} seconds" + ) from exc self.logger.warning(f"Rate limited. Retrying in {delay} seconds") sleep(delay) attempt += 1 @@ -554,7 +557,9 @@ def _call_api( for datum in response.get(group_by, {}): datum_id = datum.get(id_field_name) if is_nothing(datum_id): - raise RuntimeError(f"No ID for field {id_field_name} in returned datum: {datum}") + safe_field_name = redact_sensitive_text(id_field_name) + safe_datum = redact_sensitive_data(datum) + raise RuntimeError(f"No ID for field {safe_field_name} in returned datum: {safe_datum}") grouped[datum_id] = datum return grouped diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index 4939e30..f52a807 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -5,18 +5,39 @@ import base64 from typing import Any +from urllib.parse import quote import requests from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList from extended_data.logging import Logging +from extended_data.primitives.redaction import REDACTED, redact_sensitive_text # Default timeout for HTTP requests in seconds DEFAULT_REQUEST_TIMEOUT = 30 +def _safe_zoom_text(value: Any, *sensitive_values: Any) -> str: + """Redact secrets and request identifiers from Zoom diagnostics.""" + text = redact_sensitive_text(value) + for sensitive_value in sensitive_values: + if sensitive_value is None: + continue + raw_value = str(sensitive_value) + if not raw_value: + continue + for candidate in {raw_value, quote(raw_value, safe="")}: + text = text.replace(candidate, REDACTED) + return text + + +def _zoom_error(action: str, exc: BaseException, *sensitive_values: Any) -> str: + """Build a redacted Zoom operational error message.""" + return f"{action}: {_safe_zoom_text(exc, *sensitive_values)}" + + class ZoomConnector(VendorConnectorBase): """Zoom connector for user management.""" @@ -89,7 +110,7 @@ def list_users(self) -> ExtendedDict: if not next_page_token: break except requests.exceptions.RequestException as exc: - raise RuntimeError(f"Failed to get Zoom users: {exc}") from exc + raise RuntimeError(_zoom_error("Failed to get Zoom users", exc)) from exc return self.extend_result(users) @@ -100,9 +121,9 @@ def remove_zoom_user(self, email: str) -> None: try: response = requests.delete(url, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - self.logger.warning(f"Removed Zoom user {email}") + self.logger.warning("Removed Zoom user") except requests.exceptions.RequestException as exc: - error_msg = f"Failed to remove Zoom user {email}: {exc}" + error_msg = _zoom_error("Failed to remove Zoom user", exc, email) self.errors.append(error_msg) self.logger.exception(error_msg) @@ -117,10 +138,10 @@ def create_zoom_user(self, email: str, first_name: str, last_name: str) -> bool: try: response = requests.post(url, headers=headers, json=user_info, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - self.logger.info(f"Created Zoom user {email}") + self.logger.info("Created Zoom user") return True except requests.exceptions.RequestException as exc: - error_msg = f"Failed to create Zoom user {email}: {exc}" + error_msg = _zoom_error("Failed to create Zoom user", exc, email, first_name, last_name) self.errors.append(error_msg) self.logger.exception(error_msg) return False @@ -142,7 +163,7 @@ def get_user(self, user_id: str) -> ExtendedDict: response.raise_for_status() return self.extend_result(response.json()) except requests.exceptions.RequestException as exc: - raise RuntimeError(f"Failed to get Zoom user {user_id}: {exc}") from exc + raise RuntimeError(_zoom_error("Failed to get Zoom user", exc, user_id)) from exc def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> ExtendedList[ExtendedDict]: """List meetings for a specific user. @@ -164,7 +185,7 @@ def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> Extend data = response.json() return self.extend_result(data.get("meetings", [])) except requests.exceptions.RequestException as exc: - raise RuntimeError(f"Failed to list meetings for user {user_id}: {exc}") from exc + raise RuntimeError(_zoom_error("Failed to list Zoom meetings", exc, user_id)) from exc def get_meeting(self, meeting_id: str) -> ExtendedDict: """Get details of a specific meeting. @@ -183,7 +204,7 @@ def get_meeting(self, meeting_id: str) -> ExtendedDict: response.raise_for_status() return self.extend_result(response.json()) except requests.exceptions.RequestException as exc: - raise RuntimeError(f"Failed to get meeting {meeting_id}: {exc}") from exc + raise RuntimeError(_zoom_error("Failed to get Zoom meeting", exc, meeting_id)) from exc from extended_data.connectors.zoom.tools import ( diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index 6dab8be..8ca2454 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -169,6 +169,26 @@ def __init__(self, response): assert result["error"] == "channel_not_found" assert result["password"] == "[REDACTED]" + @patch("extended_data.connectors.slack.WebClient") + def test_call_api_redacts_grouping_failure_payload(self, mock_webclient_class, base_connector_kwargs): + """Slack grouping failures should not dump raw secret-bearing response data.""" + mock_user_client = MagicMock() + mock_user_client.users_list.return_value = { + "members": [{"name": "missing-id", "password": "hunter2", "authorization": "Bearer raw_token"}] + } + mock_bot_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with pytest.raises(RuntimeError) as exc_info: + connector._call_api("users_list", group_by="members") + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + @patch("extended_data.connectors.slack.SlackConnector._call_api") @patch("extended_data.connectors.slack.WebClient") def test_list_users_filters_deleted( diff --git a/tests/connectors/test_zoom_connector.py b/tests/connectors/test_zoom_connector.py index 77d0207..daf7476 100644 --- a/tests/connectors/test_zoom_connector.py +++ b/tests/connectors/test_zoom_connector.py @@ -5,11 +5,17 @@ from unittest.mock import MagicMock, patch import pytest +import requests from extended_data.connectors.zoom import ZoomConnector from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + class TestZoomConnector: """Test suite for ZoomConnector.""" @@ -62,6 +68,33 @@ def test_get_access_token_failure(self, mock_post, base_connector_kwargs): with pytest.raises(RuntimeError, match="Failed to get Zoom access token"): connector.get_access_token() + @patch("extended_data.connectors.zoom.requests.get") + @patch("extended_data.connectors.zoom.requests.post") + def test_list_users_redacts_request_failure_details(self, mock_post, mock_get, base_connector_kwargs): + """Zoom list failures should not expose raw secret-bearing exception text.""" + mock_token_response = MagicMock() + mock_token_response.json.return_value = {"access_token": "test-token"} + mock_token_response.raise_for_status = MagicMock() + mock_post.return_value = mock_token_response + mock_get.side_effect = requests.exceptions.RequestException( + "status=401 password=hunter2 Authorization: Bearer raw_token" + ) + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError) as exc_info: + connector.list_users() + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_list_users(self, mock_post, mock_get, base_connector_kwargs): @@ -131,6 +164,32 @@ def test_create_zoom_user(self, mock_post, base_connector_kwargs): assert result is True assert mock_post.call_count == 2 + @patch("extended_data.connectors.zoom.requests.delete") + @patch("extended_data.connectors.zoom.requests.post") + def test_remove_zoom_user_redacts_error_state_and_logs(self, mock_post, mock_delete, base_connector_kwargs): + """Zoom mutation failures should redact user IDs and exception secrets.""" + mock_token_response = MagicMock() + mock_token_response.json.return_value = {"access_token": "test-token"} + mock_token_response.raise_for_status = MagicMock() + mock_post.return_value = mock_token_response + mock_delete.side_effect = requests.exceptions.RequestException( + "failed for private-user@example.com?access_token=raw_token" + ) + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + connector.remove_zoom_user("private-user@example.com") + + diagnostics = "\n".join(connector.errors) + _logged_text(connector.logger) + assert "private-user@example.com" not in diagnostics + assert "raw_token" not in diagnostics + assert "[REDACTED]" in diagnostics + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_get_user(self, mock_post, mock_get, base_connector_kwargs): @@ -163,6 +222,34 @@ def test_get_user(self, mock_post, mock_get, base_connector_kwargs): assert user["email"] == "user1@example.com" assert user["id"] == "123" + @patch("extended_data.connectors.zoom.requests.get") + @patch("extended_data.connectors.zoom.requests.post") + def test_get_user_redacts_identifier_and_secret_details(self, mock_post, mock_get, base_connector_kwargs): + """Zoom lookup failures should not echo user identifiers or secrets.""" + mock_token_response = MagicMock() + mock_token_response.json.return_value = {"access_token": "test-token"} + mock_token_response.raise_for_status = MagicMock() + mock_post.return_value = mock_token_response + mock_get.side_effect = requests.exceptions.RequestException( + "404 for user1@example.com and user1%40example.com client_secret=s3cr3t" + ) + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError) as exc_info: + connector.get_user("user1@example.com") + + message = str(exc_info.value) + assert "user1@example.com" not in message + assert "user1%40example.com" not in message + assert "s3cr3t" not in message + assert "[REDACTED]" in message + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_list_meetings(self, mock_post, mock_get, base_connector_kwargs): From 33fba4f889b763a903765295fc31a974614fa216 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:44:44 -0500 Subject: [PATCH 206/287] fix: redact aws secret diagnostics --- src/extended_data/connectors/aws/__init__.py | 77 ++++++++++++++------ tests/connectors/test_aws_connector.py | 66 +++++++++++++++++ 2 files changed, 119 insertions(+), 24 deletions(-) diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 1ca1dbd..8a8366d 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -27,11 +27,29 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging from extended_data.primitives import is_nothing +from extended_data.primitives.redaction import REDACTED, redact_sensitive_text AWSSecretValue = str | ExtendedString | Mapping[str, Any] | None +def _safe_aws_text(value: Any, *sensitive_values: Any) -> str: + """Redact secrets and resource identifiers from AWS diagnostics.""" + text = redact_sensitive_text(value) + for sensitive_value in sensitive_values: + if sensitive_value is None: + continue + raw_value = str(sensitive_value) + if raw_value: + text = text.replace(raw_value, REDACTED) + return text + + +def _aws_secret_error(action: str, exc: BaseException, *sensitive_values: Any) -> str: + """Build a redacted AWS Secrets Manager operation error message.""" + return f"{action}: {_safe_aws_text(exc, *sensitive_values)}" + + if TYPE_CHECKING: import boto3 @@ -255,7 +273,8 @@ def get_secret( Returns: The secret value as a string, or None if not found. """ - self.logger.debug(f"Getting AWS secret: {secret_id}") + safe_secret_id = _safe_aws_text(secret_id, secret_id) + self.logger.debug(f"Getting AWS secret: {safe_secret_id}") if secretsmanager is None: secretsmanager = self.get_aws_client( @@ -266,14 +285,15 @@ def get_secret( try: response = secretsmanager.get_secret_value(SecretId=secret_id) - self.logger.debug(f"Successfully retrieved secret: {secret_id}") + self.logger.debug(f"Successfully retrieved secret: {safe_secret_id}") except ClientError as e: error_code = e.response.get("Error", {}).get("Code", "") if error_code == "ResourceNotFoundException": - self.logger.warning(f"Secret not found: {secret_id}") + self.logger.warning(f"Secret not found: {safe_secret_id}") return None - self.logger.exception(f"Failed to get secret {secret_id}: {e}") - raise ValueError(f"Failed to get secret for ID '{secret_id}'") from e + error_message = _aws_secret_error("Failed to get secret", e, secret_id) + self.logger.exception(error_message) + raise ValueError(error_message) from e if "SecretString" in response: return self.extend_result(response["SecretString"]) @@ -372,7 +392,8 @@ def create_secret( msg = "secret_value is required to create a secret" raise ValueError(msg) - self.logger.info(f"Creating AWS secret: {name}") + safe_name = _safe_aws_text(name, name) + self.logger.info(f"Creating AWS secret: {safe_name}") role_arn = execution_role_arn or self.execution_role_arn secretsmanager = self.get_aws_client( client_name="secretsmanager", @@ -387,11 +408,12 @@ def create_secret( try: response = secretsmanager.create_secret(**create_kwargs) - self.logger.info(f"Created AWS secret ARN: {response.get('ARN')}") + self.logger.info(f"Created AWS secret ARN: {_safe_aws_text(response.get('ARN'), response.get('ARN'))}") return self.extend_result(response) except ClientError as exc: - self.logger.error(f"Failed to create secret {name}", exc_info=True) - raise RuntimeError(f"Failed to create secret '{name}'") from exc + error_message = _aws_secret_error("Failed to create secret", exc, name, secret_value) + self.logger.error(error_message, exc_info=True) + raise RuntimeError(error_message) from exc def update_secret( self, @@ -407,7 +429,8 @@ def update_secret( msg = "secret_value is required to update a secret" raise ValueError(msg) - self.logger.info(f"Updating AWS secret: {secret_id}") + safe_secret_id = _safe_aws_text(secret_id, secret_id) + self.logger.info(f"Updating AWS secret: {safe_secret_id}") role_arn = execution_role_arn or self.execution_role_arn secretsmanager = self.get_aws_client( @@ -417,11 +440,13 @@ def update_secret( try: response = secretsmanager.update_secret(SecretId=secret_id, SecretString=secret_value) - self.logger.info(f"Updated AWS secret ARN: {response.get('ARN', secret_id)}") + response_arn = response.get("ARN", secret_id) + self.logger.info(f"Updated AWS secret ARN: {_safe_aws_text(response_arn, response_arn)}") return self.extend_result(response) except ClientError as exc: - self.logger.error(f"Failed to update secret {secret_id}", exc_info=True) - raise RuntimeError(f"Failed to update secret '{secret_id}'") from exc + error_message = _aws_secret_error("Failed to update secret", exc, secret_id, secret_value) + self.logger.error(error_message, exc_info=True) + raise RuntimeError(error_message) from exc def delete_secret( self, @@ -439,7 +464,8 @@ def delete_secret( msg = "recovery_window_days must be between 7 and 30 when not forcing deletion" raise ValueError(msg) - self.logger.info(f"Deleting AWS secret: {secret_id}") + safe_secret_id = _safe_aws_text(secret_id, secret_id) + self.logger.info(f"Deleting AWS secret: {safe_secret_id}") role_arn = execution_role_arn or self.execution_role_arn secretsmanager = self.get_aws_client( @@ -455,11 +481,13 @@ def delete_secret( try: response = secretsmanager.delete_secret(**delete_kwargs) - self.logger.info(f"Delete secret request submitted for: {response.get('ARN', secret_id)}") + response_arn = response.get("ARN", secret_id) + self.logger.info(f"Delete secret request submitted for: {_safe_aws_text(response_arn, response_arn)}") return self.extend_result(response) except ClientError as exc: - self.logger.error(f"Failed to delete secret {secret_id}", exc_info=True) - raise RuntimeError(f"Failed to delete secret '{secret_id}'") from exc + error_message = _aws_secret_error("Failed to delete secret", exc, secret_id) + self.logger.error(error_message, exc_info=True) + raise RuntimeError(error_message) from exc def delete_secrets_matching( self, @@ -473,7 +501,8 @@ def delete_secrets_matching( msg = "prefix is required to delete matching secrets" raise ValueError(msg) - self.logger.info(f"Deleting secrets matching prefix: {prefix} (dry_run={dry_run})") + safe_prefix = _safe_aws_text(prefix, prefix) + self.logger.info(f"Deleting secrets matching prefix: {safe_prefix} (dry_run={dry_run})") role_arn = execution_role_arn or self.execution_role_arn secrets = self.list_secrets( @@ -488,14 +517,14 @@ def delete_secrets_matching( elif isinstance(value, Mapping) and "ARN" in value: secret_arns.append(value["ARN"]) else: - self.logger.debug(f"Skipping secret {secret_name} due to missing ARN data") + self.logger.debug(f"Skipping secret {_safe_aws_text(secret_name, secret_name)} due to missing ARN data") if not secret_arns: - self.logger.info(f"No secrets found for prefix: {prefix}") + self.logger.info(f"No secrets found for prefix: {safe_prefix}") return self.extend_result([]) if dry_run: - self.logger.info(f"Dry run enabled; would delete {len(secret_arns)} secrets for prefix {prefix}") + self.logger.info(f"Dry run enabled; would delete {len(secret_arns)} secrets for prefix {safe_prefix}") return self.extend_result(secret_arns) deleted_arns: list[str] = [] @@ -508,7 +537,7 @@ def delete_secrets_matching( ) deleted_arns.append(response.get("ARN", secret_arn)) - self.logger.info(f"Deleted {len(deleted_arns)} secrets for prefix {prefix}") + self.logger.info(f"Deleted {len(deleted_arns)} secrets for prefix {safe_prefix}") return self.extend_result(deleted_arns) def copy_secrets_to_s3( @@ -533,7 +562,7 @@ def copy_secrets_to_s3( """ import json as json_module - self.logger.info(f"Copying {len(secrets)} secrets to s3://{bucket}/{key}") + self.logger.info(f"Copying {len(secrets)} secrets to S3") s3_client = self.get_aws_client( client_name="s3", @@ -550,7 +579,7 @@ def copy_secrets_to_s3( ) s3_uri = f"s3://{bucket}/{key}" - self.logger.info(f"Uploaded secrets to {s3_uri}") + self.logger.info("Uploaded secrets to S3") return self.extend_result(s3_uri) def load_secrets_by_prefix( diff --git a/tests/connectors/test_aws_connector.py b/tests/connectors/test_aws_connector.py index 8321141..c7fd0d0 100644 --- a/tests/connectors/test_aws_connector.py +++ b/tests/connectors/test_aws_connector.py @@ -16,6 +16,11 @@ from extended_data.connectors.aws import AWSConnector +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + class TestAWSConnector: """Test suite for AWSConnector.""" @@ -253,6 +258,41 @@ def test_get_secret_returns_extended_string(self, base_connector_kwargs): assert isinstance(value, ExtendedString) assert value == "secret-value" + def test_get_secret_redacts_client_error_diagnostics(self, base_connector_kwargs): + """AWS secret lookup failures should not expose IDs or secret-bearing error text.""" + connector = AWSConnector(**base_connector_kwargs) + mock_client = MagicMock() + mock_client.get_secret_value.side_effect = ClientError( + {"Error": {"Code": "AccessDeniedException", "Message": "denied token=raw_token password=hunter2"}}, + "GetSecretValue", + ) + connector.get_aws_client = MagicMock(return_value=mock_client) + + with pytest.raises(ValueError) as exc_info: + connector.get_secret("prod/customer-private") + + diagnostics = _logged_text(connector.logger) + str(exc_info.value) + assert "prod/customer-private" not in diagnostics + assert "raw_token" not in diagnostics + assert "hunter2" not in diagnostics + assert "[REDACTED]" in diagnostics + + def test_get_secret_redacts_missing_secret_log(self, base_connector_kwargs): + """AWS missing-secret logs should not expose raw requested IDs.""" + connector = AWSConnector(**base_connector_kwargs) + mock_client = MagicMock() + mock_client.get_secret_value.side_effect = ClientError( + {"Error": {"Code": "ResourceNotFoundException", "Message": "missing"}}, + "GetSecretValue", + ) + connector.get_aws_client = MagicMock(return_value=mock_client) + + assert connector.get_secret("prod/customer-private") is None + + logs = _logged_text(connector.logger) + assert "prod/customer-private" not in logs + assert "[REDACTED]" in logs + def test_create_secret_with_tags_and_description(self, base_connector_kwargs): """Ensure create_secret builds payload and sends to AWS.""" connector = AWSConnector(**base_connector_kwargs) @@ -291,6 +331,26 @@ def test_create_secret_requires_name(self, base_connector_kwargs): with pytest.raises(ValueError, match="name is required"): connector.create_secret(name="", secret_value="value") + def test_create_secret_redacts_error_diagnostics(self, base_connector_kwargs): + """AWS secret creation failures should not expose names, values, or exception secrets.""" + connector = AWSConnector(**base_connector_kwargs) + mock_client = MagicMock() + mock_client.create_secret.side_effect = ClientError( + {"Error": {"Code": "AccessDeniedException", "Message": "denied secret=raw-secret api_key=key_123"}}, + "CreateSecret", + ) + connector.get_aws_client = MagicMock(return_value=mock_client) + + with pytest.raises(RuntimeError) as exc_info: + connector.create_secret(name="/vendors/private", secret_value="super-secret") + + diagnostics = _logged_text(connector.logger) + str(exc_info.value) + assert "/vendors/private" not in diagnostics + assert "super-secret" not in diagnostics + assert "raw-secret" not in diagnostics + assert "key_123" not in diagnostics + assert "[REDACTED]" in diagnostics + def test_update_secret_calls_aws(self, base_connector_kwargs): """Ensure update_secret forwards call to boto3 client.""" connector = AWSConnector(**base_connector_kwargs) @@ -369,6 +429,9 @@ def test_delete_secrets_matching_dry_run(self, base_connector_kwargs): assert isinstance(to_delete[0], ExtendedString) assert to_delete == ["arn:a", "arn:b"] connector.delete_secret.assert_not_called() + logs = _logged_text(connector.logger) + assert "/vendors/" not in logs + assert "[REDACTED]" in logs connector.list_secrets.assert_called_once_with( prefix="/vendors/", execution_role_arn="arn:role:override", @@ -430,6 +493,9 @@ def test_copy_secrets_to_s3_unwraps_extended_data(self, base_connector_kwargs): assert isinstance(uri, ExtendedString) assert uri == "s3://target-bucket/secrets.json" + logs = _logged_text(connector.logger) + assert "target-bucket" not in logs + assert "secrets.json" not in logs mock_client.put_object.assert_called_once_with( Bucket="target-bucket", Key="secrets.json", From 002d3f7673574cc6723405fdea5b244cace994e9 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:46:33 -0500 Subject: [PATCH 207/287] feat: support explicit redaction values --- src/extended_data/connectors/aws/__init__.py | 11 ++---- src/extended_data/connectors/zoom/__init__.py | 14 ++------ src/extended_data/primitives/redaction.py | 35 ++++++++++++++----- tests/core/test_redaction.py | 21 +++++++++++ 4 files changed, 51 insertions(+), 30 deletions(-) diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 8a8366d..6dc2844 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -27,7 +27,7 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging from extended_data.primitives import is_nothing -from extended_data.primitives.redaction import REDACTED, redact_sensitive_text +from extended_data.primitives.redaction import redact_sensitive_text AWSSecretValue = str | ExtendedString | Mapping[str, Any] | None @@ -35,14 +35,7 @@ def _safe_aws_text(value: Any, *sensitive_values: Any) -> str: """Redact secrets and resource identifiers from AWS diagnostics.""" - text = redact_sensitive_text(value) - for sensitive_value in sensitive_values: - if sensitive_value is None: - continue - raw_value = str(sensitive_value) - if raw_value: - text = text.replace(raw_value, REDACTED) - return text + return redact_sensitive_text(value, values=sensitive_values) def _aws_secret_error(action: str, exc: BaseException, *sensitive_values: Any) -> str: diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index f52a807..e762da2 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -5,14 +5,13 @@ import base64 from typing import Any -from urllib.parse import quote import requests from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList from extended_data.logging import Logging -from extended_data.primitives.redaction import REDACTED, redact_sensitive_text +from extended_data.primitives.redaction import redact_sensitive_text # Default timeout for HTTP requests in seconds @@ -21,16 +20,7 @@ def _safe_zoom_text(value: Any, *sensitive_values: Any) -> str: """Redact secrets and request identifiers from Zoom diagnostics.""" - text = redact_sensitive_text(value) - for sensitive_value in sensitive_values: - if sensitive_value is None: - continue - raw_value = str(sensitive_value) - if not raw_value: - continue - for candidate in {raw_value, quote(raw_value, safe="")}: - text = text.replace(candidate, REDACTED) - return text + return redact_sensitive_text(value, values=sensitive_values) def _zoom_error(action: str, exc: BaseException, *sensitive_values: Any) -> str: diff --git a/src/extended_data/primitives/redaction.py b/src/extended_data/primitives/redaction.py index 25ef40a..8daee81 100644 --- a/src/extended_data/primitives/redaction.py +++ b/src/extended_data/primitives/redaction.py @@ -4,8 +4,9 @@ import re -from collections.abc import Mapping +from collections.abc import Iterable, Mapping from typing import Any +from urllib.parse import quote SENSITIVE_KEY_PATTERN = ( @@ -38,16 +39,32 @@ def _redacted_field(match: re.Match[str]) -> str: return f"{prefix}{_redacted_value(value)}" -def redact_sensitive_text(message: Any) -> str: +def _redact_known_values(text: str, values: Iterable[Any] | None) -> str: + """Redact explicitly provided values and URL-encoded variants.""" + if values is None: + return text + for value in values: + if value is None: + continue + raw_value = str(value) + if not raw_value: + continue + for candidate in {raw_value, quote(raw_value, safe="")}: + text = text.replace(candidate, REDACTED) + return text + + +def redact_sensitive_text(message: Any, *, values: Iterable[Any] | None = None) -> str: """Redact common secret fields in terminal-oriented text.""" text = str(message) text = JSON_SECRET_RE.sub(_redacted_field, text) text = KEY_VALUE_SECRET_RE.sub(lambda match: f"{match.group(1)}{REDACTED}", text) text = CLI_SECRET_RE.sub(lambda match: f"{match.group(1)}{REDACTED}", text) - return BEARER_SECRET_RE.sub(rf"\1{REDACTED}", text) + text = BEARER_SECRET_RE.sub(rf"\1{REDACTED}", text) + return _redact_known_values(text, values) -def redact_sensitive_data(value: Any) -> Any: +def redact_sensitive_data(value: Any, *, values: Iterable[Any] | None = None) -> Any: """Recursively redact common secret fields in JSON-like data.""" if isinstance(value, Mapping): redacted: dict[Any, Any] = {} @@ -55,14 +72,14 @@ def redact_sensitive_data(value: Any) -> Any: if isinstance(key, str) and SENSITIVE_KEY_RE.fullmatch(key): redacted[key] = REDACTED else: - redacted[key] = redact_sensitive_data(item) + redacted[key] = redact_sensitive_data(item, values=values) return redacted if isinstance(value, list): - return [redact_sensitive_data(item) for item in value] + return [redact_sensitive_data(item, values=values) for item in value] if isinstance(value, tuple): - return tuple(redact_sensitive_data(item) for item in value) + return tuple(redact_sensitive_data(item, values=values) for item in value) if isinstance(value, set): - return {redact_sensitive_data(item) for item in value} + return {redact_sensitive_data(item, values=values) for item in value} if isinstance(value, str): - return redact_sensitive_text(value) + return redact_sensitive_text(value, values=values) return value diff --git a/tests/core/test_redaction.py b/tests/core/test_redaction.py index 5f52949..e50e676 100644 --- a/tests/core/test_redaction.py +++ b/tests/core/test_redaction.py @@ -19,6 +19,18 @@ def test_redact_sensitive_text_preserves_json_shape() -> None: assert '"Authorization": "[REDACTED]"' in redacted +def test_redact_sensitive_text_accepts_known_diagnostic_values() -> None: + """Callers can redact known resource identifiers that are sensitive in context.""" + message = "failed for user@example.com and user%40example.com with token=raw_token" + + redacted = redact_sensitive_text(message, values=["user@example.com"]) + + assert "user@example.com" not in redacted + assert "user%40example.com" not in redacted + assert "raw_token" not in redacted + assert redacted.count("[REDACTED]") == 3 + + def test_redact_sensitive_data_recurses_through_json_like_payloads() -> None: """Structured redaction should handle nested JSON-like data.""" payload = { @@ -36,3 +48,12 @@ def test_redact_sensitive_data_recurses_through_json_like_payloads() -> None: "headers": {"authorization": "[REDACTED]"}, "message": "client_secret=[REDACTED]", } + + +def test_redact_sensitive_data_applies_known_values_recursively() -> None: + """Structured redaction should carry caller-provided sensitive values through nested text.""" + payload = {"details": ["private/path", {"message": "see private%2Fpath"}]} + + redacted = redact_sensitive_data(payload, values=["private/path"]) + + assert redacted == {"details": ["[REDACTED]", {"message": "see [REDACTED]"}]} From 7897142a31b6868ec7b9b96aa68c7ad909245d2a Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:48:22 -0500 Subject: [PATCH 208/287] fix: redact google service account diagnostics --- .../connectors/google/__init__.py | 7 ++++++- tests/connectors/test_google_connector.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index 3d237d9..0b6964f 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -15,6 +15,7 @@ from extended_data.connectors.google.workspace import GoogleWorkspaceMixin from extended_data.containers import ExtendedDict, ExtendedList from extended_data.logging import Logging +from extended_data.primitives.redaction import redact_sensitive_text if TYPE_CHECKING: @@ -100,7 +101,11 @@ def __init__( try: service_account_info = json.loads(service_account_info) except json.JSONDecodeError as e: - self.logger.exception(f"Failed to parse GOOGLE_SERVICE_ACCOUNT JSON: {e}") + safe_payload = redact_sensitive_text(service_account_info, values=[service_account_info]) + self.logger.exception( + "Failed to parse GOOGLE_SERVICE_ACCOUNT JSON: " + f"{redact_sensitive_text(e)}. Payload: {safe_payload}" + ) raise if not isinstance(service_account_info, dict): diff --git a/tests/connectors/test_google_connector.py b/tests/connectors/test_google_connector.py index 7d8883b..e7f1ec2 100644 --- a/tests/connectors/test_google_connector.py +++ b/tests/connectors/test_google_connector.py @@ -6,6 +6,7 @@ from unittest.mock import MagicMock, patch import pytest +import json pytest.importorskip("google.oauth2.service_account") pytest.importorskip("googleapiclient") @@ -14,6 +15,11 @@ from extended_data.connectors.google import GoogleConnector +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + def _service_account(): """Return a reusable service account payload.""" return { @@ -85,6 +91,18 @@ def test_init_with_dict_service_account(self, base_connector_kwargs): assert connector.service_account_info == service_account assert connector._credentials is None + def test_init_redacts_invalid_service_account_json_logs(self, base_connector_kwargs): + """Invalid service-account JSON diagnostics should not expose key material.""" + invalid_service_account = '{"private_key": "-----BEGIN RSA PRIVATE KEY-----\\nMIIE...test"' + + with pytest.raises(json.JSONDecodeError): + GoogleConnector(service_account_info=invalid_service_account, **base_connector_kwargs) + + logs = _logged_text(base_connector_kwargs["logger"].logger) + assert "MIIE...test" not in logs + assert "BEGIN RSA PRIVATE KEY" not in logs + assert "[REDACTED]" in logs + @patch("extended_data.connectors.google.service_account.Credentials.from_service_account_info") def test_credentials_property(self, mock_from_sa, base_connector_kwargs): """Test credentials property creates credentials.""" From da08073dd8235b692e994d7ac5e2ab69d17ede82 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:49:35 -0500 Subject: [PATCH 209/287] fix: avoid logging anthropic task prompts --- .../connectors/anthropic/__init__.py | 2 +- tests/connectors/test_anthropic.py | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index 945fc36..89b2d3e 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -503,7 +503,7 @@ def execute_agent_task( """ import time - self.logger.info(f"Executing agent task: {task[:100]}...") + self.logger.info(f"Executing agent task with {len(task)} characters") start_time = time.time() default_system = """You are a helpful AI assistant that executes coding tasks. diff --git a/tests/connectors/test_anthropic.py b/tests/connectors/test_anthropic.py index 5edc97f..3fe697c 100644 --- a/tests/connectors/test_anthropic.py +++ b/tests/connectors/test_anthropic.py @@ -22,6 +22,11 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + class TestModels: """Tests for Pydantic models.""" @@ -296,6 +301,31 @@ def test_execute_agent_task_redacts_error_result(self): assert "raw_token" not in result.error assert "[REDACTED]" in result.error + def test_execute_agent_task_does_not_log_task_prompt(self, base_connector_kwargs): + """Agent task diagnostics should not expose raw prompt text.""" + import httpx + + with patch.object(httpx, "Client"): + connector = AnthropicConnector(api_key="test-key", **base_connector_kwargs) + + with patch.object( + connector, + "create_message", + return_value=extend_data( + { + "content": [{"type": "text", "text": "done"}], + "usage": {"input_tokens": 2, "output_tokens": 1}, + } + ), + ): + result = connector.execute_agent_task("rotate password=hunter2 for customer-prod") + + logs = _logged_text(connector.logger) + assert result.success is True + assert "hunter2" not in logs + assert "customer-prod" not in logs + assert "Executing agent task with" in logs + class TestClaudeModels: """Tests for Claude model constants. From 48d73d9e7b0709b71592de7d07cd9d66f441faa8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:51:13 -0500 Subject: [PATCH 210/287] docs: document explicit redaction values --- README.md | 20 ++++++++++++-------- docs/package-surface.md | 13 +++++++++++-- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 59f5cf4..4869619 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ print(encode_yaml(payload)) print(decoded_file["service"]["name"].upper_first()) print(number_to_words(42)) print(redact_sensitive_text("Authorization: Bearer raw_token")) +print(redact_sensitive_text("failed for user@example.com", values=["user@example.com"])) print(workflow.as_builtin()) ``` @@ -116,10 +117,12 @@ Tier 1 primitive names are explicit in this major version and live under `string_to_int()`, `string_to_float()`, `string_to_path()`, `string_to_date()`, `string_to_datetime()`, and `string_to_time()` for scalar string conversion. Use `redact_sensitive_text()` and -`redact_sensitive_data()` for diagnostic and JSON-like payload redaction. The -old `bytestostr` and `strto*` helper names are not preserved. Old package -import namespaces are not shimmed; missing imports are intentional so remaining -migration work fails fast. +`redact_sensitive_data()` for diagnostic and JSON-like payload redaction. Pass +`values=[...]` when a caller knows specific context values, such as resource +IDs, emails, paths, or URLs, must be withheld in addition to common secret +fields. The old `bytestostr` and `strto*` helper names are not preserved. Old +package import namespaces are not shimmed; missing imports are intentional so +remaining migration work fails fast. Tier 1 public exports stay function-oriented; use `get_default_dict()` for nested or sorted default mappings instead of importing the internal helper class. @@ -148,10 +151,11 @@ The generic CLI `call` command and MCP bridge expose only methods that advertise Extended Data payload returns. Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 redaction primitives for common secret-bearing keys and token-shaped strings, -so connector data methods can return structured vendor payloads without making -stdout, tool responses, or raised transport errors a secret leak by default. -Raw SDK/client objects and raw transport responses remain available from the -methods that explicitly return them. +including connector-provided `values=[...]` for context-sensitive resource +identifiers, so connector data methods can return structured vendor payloads +without making stdout, tool responses, logs, or raised transport errors a +secret leak by default. Raw SDK/client objects and raw transport responses +remain available from the methods that explicitly return them. The `secrets` connector integrates with the standalone SecretSync project (`jbcom/secrets-sync`) through the `secretsync` CLI. It expects diff --git a/docs/package-surface.md b/docs/package-surface.md index 0ac55c4..5915807 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -63,7 +63,10 @@ workflow needs nested or sorted default mappings rather than importing the internal sorted-default mapping helper class. Use `redact_sensitive_text()` and `redact_sensitive_data()` when diagnostics or JSON-like payloads need common secret-bearing keys and token-shaped strings -removed before display. +removed before display. Pass `values=[...]` when a caller knows additional +context-specific identifiers, such as emails, paths, URLs, or vendor resource +IDs, must be withheld as well; URL-encoded forms of those values are redacted +too. Direct JSON, YAML, TOML, and HCL primitive decode failures raise `DataDecodeError` with format and position context while preserving the parser @@ -81,6 +84,10 @@ aliases = ExtendedTuple(("api", ("gateway",))).flatten() tags = ExtendedSet({"prod", "prod", ""}).compact() words = number_to_words(42) encoding = normalize_data_encoding("YML") +safe_error = redact_sensitive_text( + "failed for user@example.com and user%40example.com", + values=["user@example.com"], +) ``` `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and `ExtendedSet` recursively @@ -294,7 +301,9 @@ same redaction policy before exceptions are raised. Common secret-bearing keys such as `password`, `api_key`, `access_token`, `authorization`, and `client_secret`, plus token-like strings in error text, are replaced with `[REDACTED]` before CLI stdout/stderr, MCP tool responses, or raised transport -errors expose them. +errors expose them. Connectors can also pass context-specific `values=[...]` +for resource IDs, paths, URLs, emails, prompt text, or vendor payload handles +that are sensitive only in that operation. LangChain, CrewAI, Strands, and auto-detection factory functions still return plain framework tool object lists. From a3f176376903694f4d9638c2252c99026b116138 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 17:53:07 -0500 Subject: [PATCH 211/287] fix: redact vault resource diagnostics --- .../connectors/vault/__init__.py | 66 +++++++++++-------- tests/connectors/test_vault_connector.py | 4 +- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 4f20b9f..ce6cda6 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -43,9 +43,14 @@ def _load_hvac() -> Any: VAULT_APPROLE_PATH_ENV_VAR = "VAULT_APPROLE_PATH" -def _safe_log_text(value: Any) -> str: +def _safe_log_text(value: Any, *sensitive_values: Any) -> str: """Return a redacted string for Vault diagnostic output.""" - return redact_sensitive_text(value) + return redact_sensitive_text(value, values=sensitive_values) + + +def _safe_ref_text(value: Any) -> str: + """Return a redacted string for sensitive Vault resource references.""" + return redact_sensitive_text(value, values=[value]) class VaultConnector(VendorConnectorBase): @@ -200,7 +205,7 @@ def list_secrets( raise ValueError(msg) display_root = root_path if root_path not in (None, "", "/") else "/" - self.logger.info(f"Listing Vault secrets from {_safe_log_text(mount_point)}{_safe_log_text(display_root)}") + self.logger.info(f"Listing Vault secrets from {_safe_ref_text(mount_point)}{_safe_ref_text(display_root)}") secrets: dict[str, dict[str, Any]] = {} client = self.vault_client @@ -220,7 +225,7 @@ def list_secrets( for key in root_result.get("data", {}).get("keys", []) ] except VaultError as e: - self.logger.warning(f"Invalid root path {_safe_log_text(display_root)}: {_safe_log_text(e)}") + self.logger.warning(f"Invalid root path {_safe_ref_text(display_root)}: {_safe_log_text(e, display_root)}") return self.extend_result(secrets) stack: deque[tuple[str, int]] = deque(initial_paths) @@ -236,9 +241,11 @@ def list_secrets( mount_point=mount_point, )["data"]["data"] secrets[current_path] = secret_data - self.logger.debug(f"Retrieved secret: {_safe_log_text(current_path)}") + self.logger.debug(f"Retrieved secret: {_safe_ref_text(current_path)}") except VaultError as e: - self.logger.warning(f"Failed to read secret {_safe_log_text(current_path)}: {_safe_log_text(e)}") + self.logger.warning( + f"Failed to read secret {_safe_ref_text(current_path)}: {_safe_log_text(e, current_path)}" + ) # It's a directory, list its contents if within max_depth elif max_depth is None or depth < max_depth: try: @@ -251,7 +258,9 @@ def list_secrets( new_path = f"{current_path}{key}" # current_path already ends with / stack.append((new_path, depth + 1)) except VaultError as e: - self.logger.warning(f"Failed to list path {_safe_log_text(current_path)}: {_safe_log_text(e)}") + self.logger.warning( + f"Failed to list path {_safe_ref_text(current_path)}: {_safe_log_text(e, current_path)}" + ) self.logger.info(f"Listed {len(secrets)} Vault secrets") return self.extend_result(secrets) @@ -280,7 +289,7 @@ def read_secret( return None return self.extend_result(data) except VaultError as e: - self.logger.warning(f"Failed to read secret {_safe_log_text(path)}: {_safe_log_text(e)}") + self.logger.warning(f"Failed to read secret {_safe_ref_text(path)}: {_safe_log_text(e, path)}") return None def get_secret( @@ -307,7 +316,7 @@ def get_secret( Secret data dict, or None if not found. """ self.logger.debug( - f"Getting Vault secret: path={_safe_log_text(path)}, secret_name={_safe_log_text(secret_name)}" + f"Getting Vault secret: path={_safe_ref_text(path)}, secret_name={_safe_ref_text(secret_name)}" ) client = self.vault_client @@ -317,29 +326,29 @@ def get_secret( if not is_nothing(secret_name): # Build the full path: path/secret_name or just secret_name if path is "/" secret_path = f"{path}/{secret_name}" if path and path != "/" else secret_name - self.logger.debug(f"Resolved secret path: {_safe_log_text(secret_path)}") + self.logger.debug(f"Resolved secret path: {_safe_ref_text(secret_path)}") try: secret_data = client.secrets.kv.v2.read_secret_version(path=secret_path, mount_point=mount_point)[ "data" ]["data"] - self.logger.debug(f"Retrieved secret data for {_safe_log_text(secret_path)}") + self.logger.debug(f"Retrieved secret data for {_safe_ref_text(secret_path)}") except VaultError as e: self.logger.warning( - f"Failed to find secret at {_safe_log_text(path)}" - + (f"/{_safe_log_text(secret_name)}" if not is_nothing(secret_name) else "") - + f": {_safe_log_text(e)}" + f"Failed to find secret at {_safe_ref_text(path)}" + + (f"/{_safe_ref_text(secret_name)}" if not is_nothing(secret_name) else "") + + f": {_safe_log_text(e, path, secret_name, secret_path)}" ) return self.extend_result(secret_data) if secret_data is not None else None # No secret_name provided - search under path - self.logger.info(f"Finding secrets under {_safe_log_text(path)}") + self.logger.info(f"Finding secrets under {_safe_ref_text(path)}") matching_secret_paths = self.list_secrets(root_path=path, mount_point=mount_point) self.logger.debug(f"Found {len(matching_secret_paths)} potential secrets") if is_nothing(matching_secret_paths): - self.logger.warning(f"No secrets found matching {_safe_log_text(path)}") + self.logger.warning(f"No secrets found matching {_safe_ref_text(path)}") return None # Convert to deque for efficient popleft iteration @@ -347,7 +356,7 @@ def get_secret( while path_queue and secret_data is None: secret_path = path_queue.popleft() - safe_secret_path = _safe_log_text(secret_path) + safe_secret_path = _safe_ref_text(secret_path) self.logger.debug(f"Checking secret path: {safe_secret_path}") try: @@ -403,10 +412,10 @@ def write_secret( secret=data, mount_point=mount_point, ) - self.logger.info(f"Wrote secret to {_safe_log_text(path)}") + self.logger.info(f"Wrote secret to {_safe_ref_text(path)}") return True except VaultError as e: - self.logger.exception(f"Failed to write secret {_safe_log_text(path)}: {_safe_log_text(e)}") + self.logger.exception(f"Failed to write secret {_safe_ref_text(path)}: {_safe_log_text(e, path)}") return False # --------------------------------------------------------------------- @@ -436,7 +445,8 @@ def list_aws_iam_roles( response = aws_secrets.list_roles(mount_point=mount_point) except VaultError as e: self.logger.warning( - f"Failed to list AWS IAM roles from mount {_safe_log_text(mount_point)}: {_safe_log_text(e)}" + f"Failed to list AWS IAM roles from mount {_safe_ref_text(mount_point)}: " + f"{_safe_log_text(e, mount_point)}" ) return self.extend_result([]) @@ -444,7 +454,7 @@ def list_aws_iam_roles( if prefix: role_names = [role for role in role_names if role.startswith(prefix)] - self.logger.info(f"Found {len(role_names)} AWS IAM roles under mount {_safe_log_text(mount_point)}") + self.logger.info(f"Found {len(role_names)} AWS IAM roles under mount {_safe_ref_text(mount_point)}") return self.extend_result(role_names) def get_aws_iam_role( @@ -470,12 +480,12 @@ def get_aws_iam_role( try: response = self.vault_client.secrets.aws.read_role(name=role_name, mount_point=mount_point) except VaultError as e: - self.logger.warning(f"Failed to read AWS IAM role {_safe_log_text(role_name)}: {_safe_log_text(e)}") + self.logger.warning(f"Failed to read AWS IAM role {_safe_ref_text(role_name)}: {_safe_log_text(e, role_name)}") return None role_data = response.get("data") if is_nothing(role_data): - self.logger.warning(f"AWS IAM role {_safe_log_text(role_name)} exists but returned no data") + self.logger.warning(f"AWS IAM role {_safe_ref_text(role_name)} exists but returned no data") return None return self.extend_result(role_data) @@ -518,15 +528,17 @@ def generate_aws_credentials( try: response = aws_secrets.generate_credentials(name=role_name, mount_point=mount_point, **generate_kwargs) except VaultError as e: - safe_role_name = _safe_log_text(role_name) - self.logger.exception(f"Failed to generate AWS credentials for role {safe_role_name}: {_safe_log_text(e)}") + safe_role_name = _safe_ref_text(role_name) + self.logger.exception( + f"Failed to generate AWS credentials for role {safe_role_name}: {_safe_log_text(e, role_name)}" + ) raise RuntimeError(f"Failed to generate AWS credentials for role {safe_role_name}") from e credentials = response.get("data") or {} if not credentials: - raise RuntimeError(f"Vault returned empty credentials for role {_safe_log_text(role_name)}") + raise RuntimeError(f"Vault returned empty credentials for role {_safe_ref_text(role_name)}") - self.logger.info(f"Generated AWS credentials for role {_safe_log_text(role_name)}") + self.logger.info(f"Generated AWS credentials for role {_safe_ref_text(role_name)}") return self.extend_result(credentials) diff --git a/tests/connectors/test_vault_connector.py b/tests/connectors/test_vault_connector.py index 058c750..f3bce0c 100644 --- a/tests/connectors/test_vault_connector.py +++ b/tests/connectors/test_vault_connector.py @@ -173,6 +173,7 @@ def test_list_secrets_redacts_vault_error_logs(self, base_connector_kwargs): logs = _logged_text(connector.logger) assert secrets == {} + assert "does/not/exist" not in logs assert "hunter2" not in logs assert "raw_token" not in logs assert "[REDACTED]" in logs @@ -290,8 +291,9 @@ def test_get_secret_matcher_logs_redact_secret_values(self, base_connector_kwarg logs = _logged_text(connector.logger) assert secret == {"password": "hunter2", "username": "admin"} + assert "prod/db" not in logs assert "hunter2" not in logs - assert "Matched prod/db on matcher password" in logs + assert "Matched [REDACTED] on matcher password" in logs def test_generate_aws_credentials_success(self, base_connector_kwargs): """generate_aws_credentials should return the generated credential payload.""" From 80efbbdb733e1b75cb6d9f1c45f07925f877244a Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:05:07 -0500 Subject: [PATCH 212/287] fix: harden aws resource diagnostics --- src/extended_data/connectors/aws/__init__.py | 74 ++++++----- .../connectors/aws/_diagnostics.py | 38 ++++++ .../connectors/aws/codedeploy.py | 58 ++++++--- src/extended_data/connectors/aws/s3.py | 72 +++++++---- src/extended_data/connectors/aws/sso.py | 53 +++++--- tests/connectors/test_aws_codedeploy.py | 116 ++++++++++++++++-- tests/connectors/test_aws_connector.py | 43 ++++++- tests/connectors/test_aws_s3.py | 48 ++++++++ tests/connectors/test_aws_sso.py | 71 +++++++++++ 9 files changed, 460 insertions(+), 113 deletions(-) create mode 100644 src/extended_data/connectors/aws/_diagnostics.py diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 6dc2844..701f072 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -20,6 +20,7 @@ from typing import TYPE_CHECKING, Any from extended_data.connectors._optional import require_extra +from extended_data.connectors.aws._diagnostics import aws_operation_error, safe_aws_ref, safe_aws_text from extended_data.connectors.aws.organizations import AWSOrganizationsMixin from extended_data.connectors.aws.s3 import AWSS3Mixin from extended_data.connectors.aws.sso import AWSSSOmixin @@ -27,22 +28,11 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging from extended_data.primitives import is_nothing -from extended_data.primitives.redaction import redact_sensitive_text AWSSecretValue = str | ExtendedString | Mapping[str, Any] | None -def _safe_aws_text(value: Any, *sensitive_values: Any) -> str: - """Redact secrets and resource identifiers from AWS diagnostics.""" - return redact_sensitive_text(value, values=sensitive_values) - - -def _aws_secret_error(action: str, exc: BaseException, *sensitive_values: Any) -> str: - """Build a redacted AWS Secrets Manager operation error message.""" - return f"{action}: {_safe_aws_text(exc, *sensitive_values)}" - - if TYPE_CHECKING: import boto3 @@ -109,21 +99,23 @@ def assume_role(self, execution_role_arn: str, role_session_name: str) -> Any: Raises: RuntimeError: If role assumption fails. """ - self.logger.info(f"Attempting to assume role: {execution_role_arn}") + safe_role_arn = safe_aws_ref(execution_role_arn) + self.logger.info(f"Attempting to assume role: {safe_role_arn}") sts_client = self.default_aws_session.client("sts") try: response = sts_client.assume_role(RoleArn=execution_role_arn, RoleSessionName=role_session_name) credentials = response["Credentials"] - self.logger.info(f"Successfully assumed role: {execution_role_arn}") + self.logger.info(f"Successfully assumed role: {safe_role_arn}") return self._boto3.Session( aws_access_key_id=credentials["AccessKeyId"], aws_secret_access_key=credentials["SecretAccessKey"], aws_session_token=credentials["SessionToken"], ) except ClientError as e: - self.logger.error(f"Failed to assume role: {execution_role_arn}", exc_info=True) - raise RuntimeError(f"Failed to assume role {execution_role_arn}") from e + error_message = aws_operation_error("Failed to assume role", e, execution_role_arn) + self.logger.error(error_message) # noqa: TRY400 - traceback can expose raw provider diagnostics. + raise RuntimeError(error_message) from None def get_aws_session( self, @@ -227,8 +219,14 @@ def get_aws_resource( try: return session.resource(service_name, config=config, **resource_args) except ClientError as e: - self.logger.error(f"Failed to create resource for service: {service_name}", exc_info=True) - raise RuntimeError(f"Failed to create resource for service {service_name}") from e + error_message = aws_operation_error( + f"Failed to create resource for service {service_name}", + e, + execution_role_arn, + role_session_name, + ) + self.logger.error(error_message) # noqa: TRY400 - traceback can expose raw provider diagnostics. + raise RuntimeError(error_message) from None # ========================================================================= # Identity Operations @@ -266,7 +264,7 @@ def get_secret( Returns: The secret value as a string, or None if not found. """ - safe_secret_id = _safe_aws_text(secret_id, secret_id) + safe_secret_id = safe_aws_text(secret_id, secret_id) self.logger.debug(f"Getting AWS secret: {safe_secret_id}") if secretsmanager is None: @@ -284,9 +282,9 @@ def get_secret( if error_code == "ResourceNotFoundException": self.logger.warning(f"Secret not found: {safe_secret_id}") return None - error_message = _aws_secret_error("Failed to get secret", e, secret_id) - self.logger.exception(error_message) - raise ValueError(error_message) from e + error_message = aws_operation_error("Failed to get secret", e, secret_id) + self.logger.error(error_message) # noqa: TRY400 - traceback can expose raw provider diagnostics. + raise ValueError(error_message) from None if "SecretString" in response: return self.extend_result(response["SecretString"]) @@ -385,7 +383,7 @@ def create_secret( msg = "secret_value is required to create a secret" raise ValueError(msg) - safe_name = _safe_aws_text(name, name) + safe_name = safe_aws_text(name, name) self.logger.info(f"Creating AWS secret: {safe_name}") role_arn = execution_role_arn or self.execution_role_arn secretsmanager = self.get_aws_client( @@ -401,12 +399,12 @@ def create_secret( try: response = secretsmanager.create_secret(**create_kwargs) - self.logger.info(f"Created AWS secret ARN: {_safe_aws_text(response.get('ARN'), response.get('ARN'))}") + self.logger.info(f"Created AWS secret ARN: {safe_aws_text(response.get('ARN'), response.get('ARN'))}") return self.extend_result(response) except ClientError as exc: - error_message = _aws_secret_error("Failed to create secret", exc, name, secret_value) - self.logger.error(error_message, exc_info=True) - raise RuntimeError(error_message) from exc + error_message = aws_operation_error("Failed to create secret", exc, name, secret_value) + self.logger.error(error_message) # noqa: TRY400 - traceback can expose raw provider diagnostics. + raise RuntimeError(error_message) from None def update_secret( self, @@ -422,7 +420,7 @@ def update_secret( msg = "secret_value is required to update a secret" raise ValueError(msg) - safe_secret_id = _safe_aws_text(secret_id, secret_id) + safe_secret_id = safe_aws_text(secret_id, secret_id) self.logger.info(f"Updating AWS secret: {safe_secret_id}") role_arn = execution_role_arn or self.execution_role_arn @@ -434,12 +432,12 @@ def update_secret( try: response = secretsmanager.update_secret(SecretId=secret_id, SecretString=secret_value) response_arn = response.get("ARN", secret_id) - self.logger.info(f"Updated AWS secret ARN: {_safe_aws_text(response_arn, response_arn)}") + self.logger.info(f"Updated AWS secret ARN: {safe_aws_text(response_arn, response_arn)}") return self.extend_result(response) except ClientError as exc: - error_message = _aws_secret_error("Failed to update secret", exc, secret_id, secret_value) - self.logger.error(error_message, exc_info=True) - raise RuntimeError(error_message) from exc + error_message = aws_operation_error("Failed to update secret", exc, secret_id, secret_value) + self.logger.error(error_message) # noqa: TRY400 - traceback can expose raw provider diagnostics. + raise RuntimeError(error_message) from None def delete_secret( self, @@ -457,7 +455,7 @@ def delete_secret( msg = "recovery_window_days must be between 7 and 30 when not forcing deletion" raise ValueError(msg) - safe_secret_id = _safe_aws_text(secret_id, secret_id) + safe_secret_id = safe_aws_text(secret_id, secret_id) self.logger.info(f"Deleting AWS secret: {safe_secret_id}") role_arn = execution_role_arn or self.execution_role_arn @@ -475,12 +473,12 @@ def delete_secret( try: response = secretsmanager.delete_secret(**delete_kwargs) response_arn = response.get("ARN", secret_id) - self.logger.info(f"Delete secret request submitted for: {_safe_aws_text(response_arn, response_arn)}") + self.logger.info(f"Delete secret request submitted for: {safe_aws_text(response_arn, response_arn)}") return self.extend_result(response) except ClientError as exc: - error_message = _aws_secret_error("Failed to delete secret", exc, secret_id) - self.logger.error(error_message, exc_info=True) - raise RuntimeError(error_message) from exc + error_message = aws_operation_error("Failed to delete secret", exc, secret_id) + self.logger.error(error_message) # noqa: TRY400 - traceback can expose raw provider diagnostics. + raise RuntimeError(error_message) from None def delete_secrets_matching( self, @@ -494,7 +492,7 @@ def delete_secrets_matching( msg = "prefix is required to delete matching secrets" raise ValueError(msg) - safe_prefix = _safe_aws_text(prefix, prefix) + safe_prefix = safe_aws_text(prefix, prefix) self.logger.info(f"Deleting secrets matching prefix: {safe_prefix} (dry_run={dry_run})") role_arn = execution_role_arn or self.execution_role_arn @@ -510,7 +508,7 @@ def delete_secrets_matching( elif isinstance(value, Mapping) and "ARN" in value: secret_arns.append(value["ARN"]) else: - self.logger.debug(f"Skipping secret {_safe_aws_text(secret_name, secret_name)} due to missing ARN data") + self.logger.debug(f"Skipping secret {safe_aws_text(secret_name, secret_name)} due to missing ARN data") if not secret_arns: self.logger.info(f"No secrets found for prefix: {safe_prefix}") diff --git a/src/extended_data/connectors/aws/_diagnostics.py b/src/extended_data/connectors/aws/_diagnostics.py new file mode 100644 index 0000000..577e1fb --- /dev/null +++ b/src/extended_data/connectors/aws/_diagnostics.py @@ -0,0 +1,38 @@ +"""AWS diagnostic redaction helpers.""" + +from __future__ import annotations + +from collections.abc import Iterable, Mapping +from typing import Any + +from extended_data.primitives.redaction import redact_sensitive_text + + +def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: + """Yield scalar values from nested diagnostic context.""" + for value in values: + if value is None: + continue + if isinstance(value, Mapping): + yield from _iter_diagnostic_values(value.values()) + elif isinstance(value, (str, bytes)): + yield value + elif isinstance(value, Iterable): + yield from _iter_diagnostic_values(value) + else: + yield value + + +def safe_aws_text(value: Any, *sensitive_values: Any) -> str: + """Redact secrets and caller-provided resource identifiers from AWS diagnostics.""" + return redact_sensitive_text(value, values=_iter_diagnostic_values(sensitive_values)) + + +def safe_aws_ref(value: Any) -> str: + """Redact a single AWS resource reference for diagnostic logs.""" + return safe_aws_text(value, value) + + +def aws_operation_error(action: str, exc: BaseException, *sensitive_values: Any) -> str: + """Build a redacted AWS operation error message.""" + return f"{action}: {safe_aws_text(exc, *sensitive_values)}" diff --git a/src/extended_data/connectors/aws/codedeploy.py b/src/extended_data/connectors/aws/codedeploy.py index 4d78024..4e069dc 100644 --- a/src/extended_data/connectors/aws/codedeploy.py +++ b/src/extended_data/connectors/aws/codedeploy.py @@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any from extended_data.connectors.aws import AWSConnector +from extended_data.connectors.aws._diagnostics import aws_operation_error, safe_aws_ref, safe_aws_text from extended_data.containers import ExtendedDict, extend_data, to_builtin from extended_data.logging import Logging @@ -138,8 +139,12 @@ def _safe_get_deployment( ) -> dict[str, Any] | None: try: response = codedeploy_client.get_deployment(deploymentId=deployment_id) - except ClientError: - logger.warning("Unable to fetch CodeDeploy deployment details for %s", deployment_id, exc_info=True) + except ClientError as exc: + logger.warning( + "Unable to fetch CodeDeploy deployment details for %s: %s", + safe_aws_ref(deployment_id), + safe_aws_text(exc, deployment_id), + ) return None return response.get("deploymentInfo") @@ -234,9 +239,17 @@ def get_aws_codedeploy_deployments( final_token = token break except ClientError as exc: - logger.error("Failed to list CodeDeploy deployments", exc_info=True) - msg = "Failed to list AWS CodeDeploy deployments" - raise RuntimeError(msg) from exc + msg = aws_operation_error( + "Failed to list AWS CodeDeploy deployments", + exc, + application_name, + deployment_group_name, + deployment_config_name, + next_token, + tag_filters, + ) + logger.error(msg) # noqa: TRY400 - traceback can expose raw provider diagnostics. + raise RuntimeError(msg) from None deployment_infos: list[dict[str, Any]] | None = None if include_details and deployment_ids: @@ -248,11 +261,8 @@ def get_aws_codedeploy_deployments( if deployment_id in items: deployment_infos.append(items[deployment_id]) - logger.info( - "Fetched %s CodeDeploy deployments%s", - len(deployment_ids), - f" (next token: {final_token})" if final_token else "", - ) + safe_token_detail = f" (next token: {safe_aws_ref(final_token)})" if final_token else "" + logger.info("Fetched %s CodeDeploy deployments%s", len(deployment_ids), safe_token_detail) _ = connector # appease linters when we instantiate a connector internally return extend_data( @@ -332,16 +342,28 @@ def create_codedeploy_deployment( try: response = client.create_deployment(**request) except ClientError as exc: - logger.error("Failed to create CodeDeploy deployment", exc_info=True) - msg = "Failed to create AWS CodeDeploy deployment" - raise RuntimeError(msg) from exc + msg = aws_operation_error( + "Failed to create AWS CodeDeploy deployment", + exc, + application_name, + deployment_group_name, + revision, + description, + ) + logger.error(msg) # noqa: TRY400 - traceback can expose raw provider diagnostics. + raise RuntimeError(msg) from None deployment_id = response.get("deploymentId") if not deployment_id: msg = "CodeDeploy did not return a deploymentId." raise RuntimeError(msg) - logger.info("Created CodeDeploy deployment %s for %s/%s", deployment_id, application_name, deployment_group_name) + logger.info( + "Created CodeDeploy deployment %s for %s/%s", + safe_aws_ref(deployment_id), + safe_aws_ref(application_name), + safe_aws_ref(deployment_group_name), + ) deployment_info: dict[str, Any] | None = None if wait: @@ -351,11 +373,13 @@ def create_codedeploy_deployment( deploymentId=deployment_id, WaiterConfig={"Delay": waiter_delay, "MaxAttempts": waiter_max_attempts}, ) - except WaiterError as exc: + except WaiterError: deployment_info = _safe_get_deployment(client, deployment_id, logger) status = deployment_info.get("status") if deployment_info else "unknown" - msg = f"Deployment {deployment_id} did not reach a successful state (status={status})." - raise RuntimeError(msg) from exc + safe_deployment_id = safe_aws_ref(deployment_id) + safe_status = safe_aws_text(status) + msg = f"Deployment {safe_deployment_id} did not reach a successful state (status={safe_status})." + raise RuntimeError(msg) from None deployment_info = _safe_get_deployment(client, deployment_id, logger) elif include_details: deployment_info = _safe_get_deployment(client, deployment_id, logger) diff --git a/src/extended_data/connectors/aws/s3.py b/src/extended_data/connectors/aws/s3.py index 7e02038..066d27c 100644 --- a/src/extended_data/connectors/aws/s3.py +++ b/src/extended_data/connectors/aws/s3.py @@ -10,6 +10,7 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any +from extended_data.connectors.aws._diagnostics import safe_aws_ref, safe_aws_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.primitives import unhump_map @@ -26,6 +27,12 @@ class ClientError(Exception): """Fallback exception used until botocore is imported.""" +def _safe_s3_uri(bucket: str, key: str | None = None) -> str: + """Return a diagnostic-safe S3 URI.""" + uri = f"s3://{bucket}" if key is None else f"s3://{bucket}/{key}" + return safe_aws_text(uri, bucket, key) + + class AWSS3Mixin: """Mixin providing AWS S3 operations. @@ -109,7 +116,8 @@ def get_bucket_location( Returns: The AWS region where the bucket is located. """ - self.logger.debug(f"Getting location for bucket: {bucket_name}") + safe_bucket = safe_aws_ref(bucket_name) + self.logger.debug(f"Getting location for bucket: {safe_bucket}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3 = self.get_aws_client( @@ -138,7 +146,8 @@ def get_object( Returns: The object contents, or None if not found. """ - self.logger.debug(f"Getting S3 object: s3://{bucket}/{key}") + safe_uri = _safe_s3_uri(bucket, key) + self.logger.debug(f"Getting S3 object: {safe_uri}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3 = self.get_aws_client( @@ -155,7 +164,7 @@ def get_object( return body except ClientError as e: if e.response.get("Error", {}).get("Code") == "NoSuchKey": - self.logger.warning(f"S3 object not found: s3://{bucket}/{key}") + self.logger.warning(f"S3 object not found: {safe_uri}") return None raise @@ -209,7 +218,8 @@ def put_object( Returns: The S3 put_object response. """ - self.logger.debug(f"Putting S3 object: s3://{bucket}/{key}") + safe_uri = _safe_s3_uri(bucket, key) + self.logger.debug(f"Putting S3 object: {safe_uri}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3 = self.get_aws_client( @@ -237,7 +247,7 @@ def put_object( put_args["Metadata"] = {str(key): str(value) for key, value in metadata.items()} response = s3.put_object(**put_args) - self.logger.debug(f"Put object to s3://{bucket}/{key}") + self.logger.debug(f"Put object to {safe_uri}") return self.extend_result(response) def put_json_object( @@ -288,7 +298,8 @@ def delete_object( Returns: The S3 delete_object response. """ - self.logger.debug(f"Deleting S3 object: s3://{bucket}/{key}") + safe_uri = _safe_s3_uri(bucket, key) + self.logger.debug(f"Deleting S3 object: {safe_uri}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3 = self.get_aws_client( @@ -297,7 +308,7 @@ def delete_object( ) response = s3.delete_object(Bucket=bucket, Key=key) - self.logger.debug(f"Deleted object s3://{bucket}/{key}") + self.logger.debug(f"Deleted object {safe_uri}") return self.extend_result(response) def list_objects( @@ -322,7 +333,8 @@ def list_objects( Returns: List of object metadata dictionaries. """ - self.logger.debug(f"Listing objects in s3://{bucket}/{prefix or ''}") + safe_uri = _safe_s3_uri(bucket, prefix or None) + self.logger.debug(f"Listing objects in {safe_uri}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3 = self.get_aws_client( @@ -375,7 +387,9 @@ def copy_object( Returns: The S3 copy_object response. """ - self.logger.debug(f"Copying s3://{source_bucket}/{source_key} to s3://{dest_bucket}/{dest_key}") + safe_source_uri = _safe_s3_uri(source_bucket, source_key) + safe_dest_uri = _safe_s3_uri(dest_bucket, dest_key) + self.logger.debug(f"Copying {safe_source_uri} to {safe_dest_uri}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3 = self.get_aws_client( @@ -388,7 +402,7 @@ def copy_object( Key=dest_key, CopySource={"Bucket": source_bucket, "Key": source_key}, ) - self.logger.debug(f"Copied object to s3://{dest_bucket}/{dest_key}") + self.logger.debug(f"Copied object to {safe_dest_uri}") return self.extend_result(response) # ========================================================================= @@ -409,7 +423,8 @@ def get_bucket_features( Returns: Dictionary with logging, versioning, lifecycle_rules, and policy. """ - self.logger.debug(f"Getting features for bucket: {bucket_name}") + safe_bucket = safe_aws_ref(bucket_name) + self.logger.debug(f"Getting features for bucket: {safe_bucket}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3_resource: ServiceResource = self.get_aws_resource( @@ -421,7 +436,7 @@ def get_bucket_features( # Check if bucket exists if not bucket.creation_date: - self.logger.warning(f"Bucket does not exist: {bucket_name}") + self.logger.warning(f"Bucket does not exist: {safe_bucket}") return self.extend_result({}) features: dict[str, Any] = {} @@ -476,7 +491,8 @@ def find_buckets_by_name( Returns: Dictionary mapping bucket names to bucket data/features. """ - self.logger.info(f"Finding S3 buckets containing: {name_contains}") + safe_search = safe_aws_ref(name_contains) + self.logger.info(f"Finding S3 buckets containing: {safe_search}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3_resource: ServiceResource = self.get_aws_resource( @@ -488,7 +504,7 @@ def find_buckets_by_name( for bucket in s3_resource.buckets.all(): if name_contains in bucket.name: - self.logger.debug(f"Found matching bucket: {bucket.name}") + self.logger.debug(f"Found matching bucket: {safe_aws_ref(bucket.name)}") if include_features: buckets[bucket.name] = to_builtin( @@ -528,7 +544,8 @@ def create_bucket( Returns: Create bucket response. """ - self.logger.info(f"Creating S3 bucket: {bucket_name}") + safe_bucket = safe_aws_ref(bucket_name) + self.logger.info(f"Creating S3 bucket: {safe_bucket}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3 = self.get_aws_client( @@ -548,7 +565,7 @@ def create_bucket( } result = s3.create_bucket(**create_args) - self.logger.info(f"Created bucket: {bucket_name}") + self.logger.info(f"Created bucket: {safe_bucket}") # Enable versioning if requested if enable_versioning: @@ -556,7 +573,7 @@ def create_bucket( Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"}, ) - self.logger.info(f"Enabled versioning for bucket: {bucket_name}") + self.logger.info(f"Enabled versioning for bucket: {safe_bucket}") # Apply tags if provided if tags: @@ -565,7 +582,7 @@ def create_bucket( Bucket=bucket_name, Tagging={"TagSet": tag_set}, ) - self.logger.info(f"Applied {len(tags)} tags to bucket: {bucket_name}") + self.logger.info(f"Applied {len(tags)} tags to bucket: {safe_bucket}") return self.extend_result(result) @@ -585,7 +602,8 @@ def delete_bucket( Raises: ClientError: If bucket not empty and force=False. """ - self.logger.info(f"Deleting S3 bucket: {bucket_name}") + safe_bucket = safe_aws_ref(bucket_name) + self.logger.info(f"Deleting S3 bucket: {safe_bucket}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if force: @@ -596,11 +614,11 @@ def delete_bucket( bucket = s3_resource.Bucket(bucket_name) # Delete all objects - self.logger.info(f"Deleting all objects in bucket: {bucket_name}") + self.logger.info(f"Deleting all objects in bucket: {safe_bucket}") bucket.objects.all().delete() # Delete all versions - self.logger.info(f"Deleting all versions in bucket: {bucket_name}") + self.logger.info(f"Deleting all versions in bucket: {safe_bucket}") bucket.object_versions.all().delete() s3 = self.get_aws_client( @@ -609,7 +627,7 @@ def delete_bucket( ) s3.delete_bucket(Bucket=bucket_name) - self.logger.info(f"Deleted bucket: {bucket_name}") + self.logger.info(f"Deleted bucket: {safe_bucket}") def get_bucket_tags( self, @@ -653,7 +671,8 @@ def set_bucket_tags( tags: Dictionary of tag key-value pairs. execution_role_arn: ARN of role to assume for cross-account access. """ - self.logger.info(f"Setting {len(tags)} tags on bucket: {bucket_name}") + safe_bucket = safe_aws_ref(bucket_name) + self.logger.info(f"Setting {len(tags)} tags on bucket: {safe_bucket}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) s3 = self.get_aws_client( @@ -666,7 +685,7 @@ def set_bucket_tags( Bucket=bucket_name, Tagging={"TagSet": tag_set}, ) - self.logger.info(f"Set tags on bucket: {bucket_name}") + self.logger.info(f"Set tags on bucket: {safe_bucket}") def get_bucket_sizes( self, @@ -705,6 +724,7 @@ def get_bucket_sizes( bucket_sizes: dict[str, dict[str, Any]] = {} for bucket_name in bucket_names: + safe_bucket = safe_aws_ref(bucket_name) size_bytes = 0 object_count = 0 @@ -725,7 +745,7 @@ def get_bucket_sizes( if size_response.get("Datapoints"): size_bytes = int(max(size_response["Datapoints"], key=lambda x: x["Timestamp"])["Average"]) except Exception as e: - self.logger.debug(f"Could not get size for {bucket_name}: {e}") + self.logger.debug(f"Could not get size for {safe_bucket}: {safe_aws_text(e, bucket_name)}") # Get object count try: @@ -744,7 +764,7 @@ def get_bucket_sizes( if count_response.get("Datapoints"): object_count = int(max(count_response["Datapoints"], key=lambda x: x["Timestamp"])["Average"]) except Exception as e: - self.logger.debug(f"Could not get count for {bucket_name}: {e}") + self.logger.debug(f"Could not get count for {safe_bucket}: {safe_aws_text(e, bucket_name)}") bucket_sizes[bucket_name] = { "size_bytes": size_bytes, diff --git a/src/extended_data/connectors/aws/sso.py b/src/extended_data/connectors/aws/sso.py index bcab456..0634a44 100644 --- a/src/extended_data/connectors/aws/sso.py +++ b/src/extended_data/connectors/aws/sso.py @@ -12,6 +12,7 @@ from deepmerge import always_merger +from extended_data.connectors.aws._diagnostics import safe_aws_ref from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.primitives import is_nothing, unhump_map @@ -75,7 +76,7 @@ def get_identity_store_id( raise RuntimeError(msg) identity_store_id = instance_list[0]["IdentityStoreId"] - self.logger.info(f"Identity store ID: {identity_store_id}") + self.logger.info(f"Identity store ID: {safe_aws_ref(identity_store_id)}") return self.extend_result(identity_store_id) def get_sso_instance_arn( @@ -109,7 +110,7 @@ def get_sso_instance_arn( raise RuntimeError(msg) instance_arn = instance_list[0]["InstanceArn"] - self.logger.info(f"SSO instance ARN: {instance_arn}") + self.logger.info(f"SSO instance ARN: {safe_aws_ref(instance_arn)}") return self.extend_result(instance_arn) # ========================================================================= @@ -247,7 +248,8 @@ def create_sso_user( Returns: Created user response. """ - self.logger.info(f"Creating SSO user: {user_name}") + safe_user_name = safe_aws_ref(user_name) + self.logger.info(f"Creating SSO user: {safe_user_name}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: @@ -275,7 +277,7 @@ def create_sso_user( user_body["Emails"] = to_builtin(list(emails)) result = identitystore.create_user(**user_body) - self.logger.info(f"Created SSO user: {user_name} ({result.get('UserId')})") + self.logger.info(f"Created SSO user: {safe_user_name} ({safe_aws_ref(result.get('UserId'))})") return self.extend_result(result) def delete_sso_user( @@ -291,7 +293,8 @@ def delete_sso_user( identity_store_id: Identity store ID. Auto-detected if not provided. execution_role_arn: ARN of role to assume for cross-account access. """ - self.logger.info(f"Deleting SSO user: {user_id}") + safe_user_id = safe_aws_ref(user_id) + self.logger.info(f"Deleting SSO user: {safe_user_id}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: @@ -306,7 +309,7 @@ def delete_sso_user( IdentityStoreId=identity_store_id, UserId=user_id, ) - self.logger.info(f"Deleted SSO user: {user_id}") + self.logger.info(f"Deleted SSO user: {safe_user_id}") # ========================================================================= # Groups @@ -461,7 +464,8 @@ def create_sso_group( Returns: Created group response. """ - self.logger.info(f"Creating SSO group: {display_name}") + safe_display_name = safe_aws_ref(display_name) + self.logger.info(f"Creating SSO group: {safe_display_name}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: @@ -477,7 +481,7 @@ def create_sso_group( DisplayName=display_name, Description=description, ) - self.logger.info(f"Created SSO group: {display_name} ({result.get('GroupId')})") + self.logger.info(f"Created SSO group: {safe_display_name} ({safe_aws_ref(result.get('GroupId'))})") return self.extend_result(result) def delete_sso_group( @@ -493,7 +497,8 @@ def delete_sso_group( identity_store_id: Identity store ID. Auto-detected if not provided. execution_role_arn: ARN of role to assume for cross-account access. """ - self.logger.info(f"Deleting SSO group: {group_id}") + safe_group_id = safe_aws_ref(group_id) + self.logger.info(f"Deleting SSO group: {safe_group_id}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: @@ -508,7 +513,7 @@ def delete_sso_group( IdentityStoreId=identity_store_id, GroupId=group_id, ) - self.logger.info(f"Deleted SSO group: {group_id}") + self.logger.info(f"Deleted SSO group: {safe_group_id}") def add_user_to_group( self, @@ -528,7 +533,9 @@ def add_user_to_group( Returns: Membership response. """ - self.logger.info(f"Adding user {user_id} to group {group_id}") + safe_user_id = safe_aws_ref(user_id) + safe_group_id = safe_aws_ref(group_id) + self.logger.info(f"Adding user {safe_user_id} to group {safe_group_id}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: @@ -544,7 +551,7 @@ def add_user_to_group( GroupId=group_id, MemberId={"UserId": user_id}, ) - self.logger.info(f"Added user {user_id} to group {group_id}") + self.logger.info(f"Added user {safe_user_id} to group {safe_group_id}") return self.extend_result(result) def remove_user_from_group( @@ -560,7 +567,8 @@ def remove_user_from_group( identity_store_id: Identity store ID. Auto-detected if not provided. execution_role_arn: ARN of role to assume for cross-account access. """ - self.logger.info(f"Removing membership: {membership_id}") + safe_membership_id = safe_aws_ref(membership_id) + self.logger.info(f"Removing membership: {safe_membership_id}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not identity_store_id: @@ -575,7 +583,7 @@ def remove_user_from_group( IdentityStoreId=identity_store_id, MembershipId=membership_id, ) - self.logger.info(f"Removed membership: {membership_id}") + self.logger.info(f"Removed membership: {safe_membership_id}") # ========================================================================= # Permission Sets @@ -719,7 +727,8 @@ def list_account_assignments( Returns: List of account assignment dictionaries. """ - self.logger.info(f"Listing account assignments for {account_id}") + safe_account_id = safe_aws_ref(account_id) + self.logger.info(f"Listing account assignments for {safe_account_id}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not instance_arn: @@ -752,7 +761,7 @@ def list_account_assignments( if unhump_assignments: assignments = [unhump_map(a) for a in assignments] - self.logger.info(f"Retrieved {len(assignments)} assignments for {account_id}") + self.logger.info(f"Retrieved {len(assignments)} assignments for {safe_account_id}") return self.extend_result(assignments) def create_account_assignment( @@ -777,7 +786,9 @@ def create_account_assignment( Returns: Account assignment creation status. """ - self.logger.info(f"Creating account assignment: {principal_type} {principal_id} -> {account_id}") + safe_principal_id = safe_aws_ref(principal_id) + safe_account_id = safe_aws_ref(account_id) + self.logger.info(f"Creating account assignment: {principal_type} {safe_principal_id} -> {safe_account_id}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not instance_arn: @@ -796,7 +807,7 @@ def create_account_assignment( PrincipalType=principal_type, PrincipalId=principal_id, ) - self.logger.info(f"Created account assignment for {principal_id}") + self.logger.info(f"Created account assignment for {safe_principal_id}") return self.extend_result(result) def delete_account_assignment( @@ -821,7 +832,9 @@ def delete_account_assignment( Returns: Account assignment deletion status. """ - self.logger.info(f"Deleting account assignment: {principal_type} {principal_id} -> {account_id}") + safe_principal_id = safe_aws_ref(principal_id) + safe_account_id = safe_aws_ref(account_id) + self.logger.info(f"Deleting account assignment: {principal_type} {safe_principal_id} -> {safe_account_id}") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) if not instance_arn: @@ -840,5 +853,5 @@ def delete_account_assignment( PrincipalType=principal_type, PrincipalId=principal_id, ) - self.logger.info(f"Deleted account assignment for {principal_id}") + self.logger.info(f"Deleted account assignment for {safe_principal_id}") return self.extend_result(result) diff --git a/tests/connectors/test_aws_codedeploy.py b/tests/connectors/test_aws_codedeploy.py index cb7c914..61c3423 100644 --- a/tests/connectors/test_aws_codedeploy.py +++ b/tests/connectors/test_aws_codedeploy.py @@ -18,13 +18,24 @@ ) -def _client_error(operation: str) -> ClientError: +def _client_error(operation: str, message: str = "denied") -> ClientError: return ClientError( - error_response={"Error": {"Code": "AccessDenied", "Message": "denied"}}, + error_response={"Error": {"Code": "AccessDenied", "Message": message}}, operation_name=operation, ) +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + +def _logging_adapter() -> MagicMock: + adapter = MagicMock() + adapter.logger = MagicMock() + return adapter + + class TestGetAwsCodeDeployDeployments: def test_returns_details_and_normalizes_statuses(self): codedeploy_client = MagicMock() @@ -59,10 +70,31 @@ def test_returns_details_and_normalizes_statuses(self): def test_raises_runtime_error_on_client_failure(self): codedeploy_client = MagicMock() - codedeploy_client.list_deployments.side_effect = _client_error("ListDeployments") + codedeploy_client.list_deployments.side_effect = _client_error( + "ListDeployments", + "denied private-app prod-group token-private tag-private token=raw-token", + ) + logging_adapter = _logging_adapter() + + with pytest.raises(RuntimeError) as exc_info: + get_aws_codedeploy_deployments( + application_name="private-app", + deployment_group_name="prod-group", + next_token="token-private", + tag_filters=[{"Value": "tag-private"}], + codedeploy_client=codedeploy_client, + logging_adapter=logging_adapter, + ) - with pytest.raises(RuntimeError): - get_aws_codedeploy_deployments(codedeploy_client=codedeploy_client) + diagnostics = _logged_text(logging_adapter.logger) + str(exc_info.value) + assert "private-app" not in diagnostics + assert "prod-group" not in diagnostics + assert "token-private" not in diagnostics + assert "tag-private" not in diagnostics + assert "raw-token" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + assert all("exc_info" not in logged_call.kwargs for logged_call in logging_adapter.logger.method_calls) class TestCreateCodeDeployDeployment: @@ -99,10 +131,11 @@ def test_waits_for_success_and_returns_details(self): def test_waiter_failure_raises_runtime_error(self): codedeploy_client = MagicMock() - codedeploy_client.create_deployment.return_value = {"deploymentId": "dep-456"} + codedeploy_client.create_deployment.return_value = {"deploymentId": "dep-sensitive"} codedeploy_client.get_deployment.return_value = { - "deploymentInfo": {"deploymentId": "dep-456", "status": "Failed"} + "deploymentInfo": {"deploymentId": "dep-sensitive", "status": "Failed"} } + logging_adapter = _logging_adapter() waiter = MagicMock() waiter.wait.side_effect = WaiterError( @@ -112,7 +145,7 @@ def test_waiter_failure_raises_runtime_error(self): ) codedeploy_client.get_waiter.return_value = waiter - with pytest.raises(RuntimeError): + with pytest.raises(RuntimeError) as exc_info: create_codedeploy_deployment( application_name="app", deployment_group_name="group", @@ -122,8 +155,75 @@ def test_waiter_failure_raises_runtime_error(self): }, wait=True, codedeploy_client=codedeploy_client, + logging_adapter=logging_adapter, ) + diagnostics = _logged_text(logging_adapter.logger) + str(exc_info.value) + assert "dep-sensitive" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + + def test_detail_fetch_failure_logs_redact_deployment_id(self): + """Detail hydration failures should not log deployment identifiers or raw provider messages.""" + codedeploy_client = MagicMock() + codedeploy_client.create_deployment.return_value = {"deploymentId": "dep-sensitive"} + codedeploy_client.get_deployment.side_effect = _client_error( + "GetDeployment", + "denied for dep-sensitive token=raw-token", + ) + logging_adapter = _logging_adapter() + + result = create_codedeploy_deployment( + application_name="app", + deployment_group_name="group", + revision={ + "revisionType": "S3", + "s3Location": {"bucket": "bucket", "key": "bundle.zip", "bundleType": "zip"}, + }, + wait=False, + include_details=True, + codedeploy_client=codedeploy_client, + logging_adapter=logging_adapter, + ) + + assert result["deployment_id"] == "dep-sensitive" + logs = _logged_text(logging_adapter.logger) + assert "[REDACTED]" in logs + assert "dep-sensitive" not in logs + assert "raw-token" not in logs + assert all("exc_info" not in logged_call.kwargs for logged_call in logging_adapter.logger.method_calls) + + def test_create_failure_redacts_request_context(self): + """Create failures should redact app/group/revision identifiers from diagnostics.""" + codedeploy_client = MagicMock() + codedeploy_client.create_deployment.side_effect = _client_error( + "CreateDeployment", + "denied private-app prod-group prod-bucket bundle.zip secret=raw-secret", + ) + logging_adapter = _logging_adapter() + + with pytest.raises(RuntimeError) as exc_info: + create_codedeploy_deployment( + application_name="private-app", + deployment_group_name="prod-group", + revision={ + "revisionType": "S3", + "s3Location": {"bucket": "prod-bucket", "key": "bundle.zip", "bundleType": "zip"}, + }, + codedeploy_client=codedeploy_client, + logging_adapter=logging_adapter, + ) + + diagnostics = _logged_text(logging_adapter.logger) + str(exc_info.value) + assert "private-app" not in diagnostics + assert "prod-group" not in diagnostics + assert "prod-bucket" not in diagnostics + assert "bundle.zip" not in diagnostics + assert "raw-secret" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + assert all("exc_info" not in logged_call.kwargs for logged_call in logging_adapter.logger.method_calls) + def test_validates_file_exists_behavior(self): codedeploy_client = MagicMock() diff --git a/tests/connectors/test_aws_connector.py b/tests/connectors/test_aws_connector.py index c7fd0d0..a7b9894 100644 --- a/tests/connectors/test_aws_connector.py +++ b/tests/connectors/test_aws_connector.py @@ -65,9 +65,11 @@ def test_assume_role_success(self, mock_session_class, base_connector_kwargs): @patch("extended_data.connectors.aws.boto3.Session") def test_assume_role_failure(self, mock_session_class, base_connector_kwargs): """Test failed role assumption.""" + role_arn = "arn:aws:iam::123456789012:role/TestRole" mock_sts_client = MagicMock() mock_sts_client.assume_role.side_effect = ClientError( - {"Error": {"Code": "AccessDenied", "Message": "Not authorized"}}, "AssumeRole" + {"Error": {"Code": "AccessDenied", "Message": f"Not authorized for {role_arn} token=raw-token"}}, + "AssumeRole", ) mock_default_session = MagicMock() @@ -77,11 +79,16 @@ def test_assume_role_failure(self, mock_session_class, base_connector_kwargs): connector = AWSConnector(**base_connector_kwargs) connector.default_aws_session = mock_default_session - role_arn = "arn:aws:iam::123456789012:role/TestRole" - - with pytest.raises(RuntimeError, match="Failed to assume role"): + with pytest.raises(RuntimeError, match="Failed to assume role") as exc_info: connector.assume_role(role_arn, "test-session") + diagnostics = _logged_text(connector.logger) + str(exc_info.value) + assert role_arn not in diagnostics + assert "raw-token" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) + def test_get_aws_session_default(self, base_connector_kwargs): """Test getting default AWS session.""" connector = AWSConnector(**base_connector_kwargs) @@ -126,6 +133,30 @@ def test_get_aws_resource(self, mock_session_class, base_connector_kwargs): assert resource == mock_resource mock_session.resource.assert_called_once() + @patch("extended_data.connectors.aws.boto3.Session") + def test_get_aws_resource_failure_redacts_exception_context(self, mock_session_class, base_connector_kwargs): + """Resource creation failures should not chain raw provider exceptions into diagnostics.""" + mock_session = MagicMock() + mock_session.resource.side_effect = ClientError( + {"Error": {"Code": "AccessDenied", "Message": "denied for arn:role/private token=raw-token"}}, + "CreateResource", + ) + mock_session_class.return_value = mock_session + + connector = AWSConnector(**base_connector_kwargs) + connector.default_aws_session = mock_session + connector.get_aws_session = MagicMock(return_value=mock_session) + + with pytest.raises(RuntimeError) as exc_info: + connector.get_aws_resource("s3", execution_role_arn="arn:role/private") + + diagnostics = _logged_text(connector.logger) + str(exc_info.value) + assert "arn:role/private" not in diagnostics + assert "raw-token" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) + def test_list_secrets_returns_arns_with_filters(self, base_connector_kwargs): """Ensure listing secrets returns ARNs when not fetching values.""" connector = AWSConnector(**base_connector_kwargs) @@ -276,6 +307,8 @@ def test_get_secret_redacts_client_error_diagnostics(self, base_connector_kwargs assert "raw_token" not in diagnostics assert "hunter2" not in diagnostics assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) def test_get_secret_redacts_missing_secret_log(self, base_connector_kwargs): """AWS missing-secret logs should not expose raw requested IDs.""" @@ -350,6 +383,8 @@ def test_create_secret_redacts_error_diagnostics(self, base_connector_kwargs): assert "raw-secret" not in diagnostics assert "key_123" not in diagnostics assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) def test_update_secret_calls_aws(self, base_connector_kwargs): """Ensure update_secret forwards call to boto3 client.""" diff --git a/tests/connectors/test_aws_s3.py b/tests/connectors/test_aws_s3.py index aa82a3c..5270a36 100644 --- a/tests/connectors/test_aws_s3.py +++ b/tests/connectors/test_aws_s3.py @@ -19,6 +19,11 @@ from extended_data.connectors.aws import AWSConnector +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + @pytest.fixture def aws_connector(): """Create AWS connector with mocked clients.""" @@ -183,6 +188,22 @@ def test_get_object_not_found(self, aws_connector): assert result is None + def test_get_object_not_found_logs_redact_bucket_and_key(self, aws_connector): + """Missing object diagnostics should not expose S3 resource identifiers.""" + mock_s3 = MagicMock() + error = ClientError({"Error": {"Code": "NoSuchKey"}}, "GetObject") + mock_s3.get_object.side_effect = error + aws_connector.get_aws_client = MagicMock(return_value=mock_s3) + + result = aws_connector.get_object("prod-secrets-bucket", "customers/acme/private.json") + + assert result is None + mock_s3.get_object.assert_called_once_with(Bucket="prod-secrets-bucket", Key="customers/acme/private.json") + logs = _logged_text(aws_connector.logger) + assert "[REDACTED]" in logs + assert "prod-secrets-bucket" not in logs + assert "customers/acme/private.json" not in logs + def test_get_object_other_error(self, aws_connector): """Test getting an object with other error.""" mock_s3 = MagicMock() @@ -506,6 +527,19 @@ def test_create_bucket_simple(self, aws_connector): assert call_args["Bucket"] == "my-bucket" assert call_args["ACL"] == "private" + def test_create_bucket_logs_redact_bucket_name_but_preserve_call_args(self, aws_connector): + """Bucket creation logs should redact resource names without changing API args.""" + mock_s3 = MagicMock() + mock_s3.create_bucket.return_value = {"Location": "/prod-secrets-bucket"} + aws_connector.get_aws_client = MagicMock(return_value=mock_s3) + + aws_connector.create_bucket("prod-secrets-bucket") + + assert mock_s3.create_bucket.call_args.kwargs["Bucket"] == "prod-secrets-bucket" + logs = _logged_text(aws_connector.logger) + assert "[REDACTED]" in logs + assert "prod-secrets-bucket" not in logs + def test_create_bucket_with_region(self, aws_connector): """Test creating bucket in specific region.""" mock_s3 = MagicMock() @@ -613,3 +647,17 @@ def get_client(client_name, **kwargs): assert result["test-bucket"]["size_bytes"] == 1073741824 assert result["test-bucket"]["size_gb"] == 1.0 assert result["test-bucket"]["object_count"] == 100 + + def test_get_bucket_sizes_error_logs_redact_bucket_name(self, aws_connector): + """CloudWatch metric diagnostics should not leak bucket names.""" + mock_cloudwatch = MagicMock() + mock_cloudwatch.get_metric_statistics.side_effect = RuntimeError("denied for prod-secrets-bucket") + aws_connector.get_aws_client = MagicMock(return_value=mock_cloudwatch) + + result = aws_connector.get_bucket_sizes(bucket_names=["prod-secrets-bucket"]) + + assert result["prod-secrets-bucket"]["size_bytes"] == 0 + assert result["prod-secrets-bucket"]["object_count"] == 0 + logs = _logged_text(aws_connector.logger) + assert "[REDACTED]" in logs + assert "prod-secrets-bucket" not in logs diff --git a/tests/connectors/test_aws_sso.py b/tests/connectors/test_aws_sso.py index 4077c0c..9708b57 100644 --- a/tests/connectors/test_aws_sso.py +++ b/tests/connectors/test_aws_sso.py @@ -16,6 +16,11 @@ from extended_data.connectors.aws import AWSConnector +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + @pytest.fixture def aws_connector(): """Create AWS connector with mocked clients.""" @@ -46,6 +51,9 @@ def test_get_identity_store_id(self, aws_connector): assert isinstance(result, ExtendedString) assert result == "d-1234567890" aws_connector.get_aws_client.assert_called_once_with(client_name="sso-admin", execution_role_arn=None) + logs = _logged_text(aws_connector.logger) + assert "[REDACTED]" in logs + assert "d-1234567890" not in logs def test_get_identity_store_id_no_instance(self, aws_connector): """Test getting identity store ID with no instances.""" @@ -73,6 +81,9 @@ def test_get_sso_instance_arn(self, aws_connector): assert isinstance(result, ExtendedString) assert result == "arn:aws:sso:::instance/ssoins-1234567890" + logs = _logged_text(aws_connector.logger) + assert "[REDACTED]" in logs + assert "arn:aws:sso:::instance/ssoins-1234567890" not in logs def test_get_sso_instance_arn_no_instance(self, aws_connector): """Test getting SSO instance ARN with no instances.""" @@ -300,6 +311,34 @@ def get_client(client_name, **kwargs): assert result["GroupId"] == "group-1" mock_identitystore.create_group.assert_called_once() + def test_create_sso_group_logs_redact_identifiers_but_preserve_call_args(self, aws_connector): + """Group mutation diagnostics should redact names and IDs.""" + mock_identitystore = MagicMock() + mock_identitystore.create_group.return_value = { + "GroupId": "group-sensitive-1", + "IdentityStoreId": "d-sensitive", + } + + def get_client(client_name, **kwargs): + if client_name == "identitystore": + return mock_identitystore + mock_sso_admin = MagicMock() + mock_sso_admin.list_instances.return_value = {"Instances": [{"IdentityStoreId": "d-sensitive"}]} + return mock_sso_admin + + aws_connector.get_aws_client = MagicMock(side_effect=get_client) + + aws_connector.create_sso_group("Executive Audit", description="Admin group") + + call_args = mock_identitystore.create_group.call_args.kwargs + assert call_args["DisplayName"] == "Executive Audit" + assert call_args["IdentityStoreId"] == "d-sensitive" + logs = _logged_text(aws_connector.logger) + assert "[REDACTED]" in logs + assert "Executive Audit" not in logs + assert "group-sensitive-1" not in logs + assert "d-sensitive" not in logs + def test_delete_sso_group(self, aws_connector): """Test deleting an SSO group.""" mock_identitystore = MagicMock() @@ -423,3 +462,35 @@ def test_create_account_assignment(self, aws_connector): assert isinstance(result["AccountAssignmentCreationStatus"], ExtendedDict) assert "AccountAssignmentCreationStatus" in result mock_sso_admin.create_account_assignment.assert_called_once() + + def test_create_account_assignment_logs_redact_identifiers_but_preserve_call_args(self, aws_connector): + """Account assignment diagnostics should redact resource identifiers.""" + mock_sso_admin = MagicMock() + mock_sso_admin.list_instances.return_value = { + "Instances": [{"InstanceArn": "arn:aws:sso:::instance/ssoins-sensitive"}] + } + mock_sso_admin.create_account_assignment.return_value = { + "AccountAssignmentCreationStatus": { + "Status": "SUCCEEDED", + "RequestId": "req-123", + } + } + + aws_connector.get_aws_client = MagicMock(return_value=mock_sso_admin) + + aws_connector.create_account_assignment( + account_id="123456789012", + permission_set_arn="arn:aws:sso:::permissionSet/ssoins-sensitive/ps-sensitive", + principal_id="user-sensitive-1", + principal_type="USER", + ) + + call_args = mock_sso_admin.create_account_assignment.call_args.kwargs + assert call_args["TargetId"] == "123456789012" + assert call_args["PermissionSetArn"] == "arn:aws:sso:::permissionSet/ssoins-sensitive/ps-sensitive" + assert call_args["PrincipalId"] == "user-sensitive-1" + logs = _logged_text(aws_connector.logger) + assert "[REDACTED]" in logs + assert "123456789012" not in logs + assert "user-sensitive-1" not in logs + assert "ssoins-sensitive" not in logs From 66d38908092666cbfed70faf7a03216eaa17cbda Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:12:20 -0500 Subject: [PATCH 213/287] fix: harden google connector diagnostics --- .../connectors/google/__init__.py | 7 +- .../connectors/google/_diagnostics.py | 33 ++++++++ .../connectors/google/billing.py | 33 +++++--- src/extended_data/connectors/google/cloud.py | 42 ++++++---- .../connectors/google/services.py | 83 ++++++++++++------- .../connectors/google/workspace.py | 41 ++++----- tests/connectors/test_google_billing.py | 30 +++++-- tests/connectors/test_google_cloud.py | 41 +++++++++ tests/connectors/test_google_connector.py | 13 +-- tests/connectors/test_google_services.py | 66 +++++++++++++++ tests/connectors/test_google_workspace.py | 41 +++++++++ 11 files changed, 336 insertions(+), 94 deletions(-) create mode 100644 src/extended_data/connectors/google/_diagnostics.py diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index 0b6964f..694c2e3 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -102,11 +102,12 @@ def __init__( service_account_info = json.loads(service_account_info) except json.JSONDecodeError as e: safe_payload = redact_sensitive_text(service_account_info, values=[service_account_info]) - self.logger.exception( + error_message = ( "Failed to parse GOOGLE_SERVICE_ACCOUNT JSON: " - f"{redact_sensitive_text(e)}. Payload: {safe_payload}" + f"{redact_sensitive_text(e, values=[service_account_info])}. Payload: {safe_payload}" ) - raise + self.logger.error(error_message) # noqa: TRY400 - traceback can expose raw service-account payloads. + raise ValueError(error_message) from None if not isinstance(service_account_info, dict): msg = "Google service account info must be a JSON object" diff --git a/src/extended_data/connectors/google/_diagnostics.py b/src/extended_data/connectors/google/_diagnostics.py new file mode 100644 index 0000000..7848443 --- /dev/null +++ b/src/extended_data/connectors/google/_diagnostics.py @@ -0,0 +1,33 @@ +"""Google connector diagnostic redaction helpers.""" + +from __future__ import annotations + +from collections.abc import Iterable, Mapping +from typing import Any + +from extended_data.primitives.redaction import redact_sensitive_text + + +def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: + """Yield scalar values from nested diagnostic context.""" + for value in values: + if value is None: + continue + if isinstance(value, Mapping): + yield from _iter_diagnostic_values(value.values()) + elif isinstance(value, (str, bytes)): + yield value + elif isinstance(value, Iterable): + yield from _iter_diagnostic_values(value) + else: + yield value + + +def safe_google_text(value: Any, *sensitive_values: Any) -> str: + """Redact secrets and caller-provided resource identifiers from diagnostics.""" + return redact_sensitive_text(value, values=_iter_diagnostic_values(sensitive_values)) + + +def safe_google_ref(value: Any) -> str: + """Redact a single Google resource reference for diagnostic logs.""" + return safe_google_text(value, value) diff --git a/src/extended_data/connectors/google/billing.py b/src/extended_data/connectors/google/billing.py index 220df07..0ecb5e2 100644 --- a/src/extended_data/connectors/google/billing.py +++ b/src/extended_data/connectors/google/billing.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, cast +from extended_data.connectors.google._diagnostics import safe_google_ref, safe_google_text from extended_data.containers import ExtendedDict, ExtendedList, to_builtin from extended_data.primitives import unhump_map @@ -89,7 +90,7 @@ def get_billing_account(self, billing_account_id: str) -> ExtendedDict | None: return self.extend_result(service.billingAccounts().get(name=name).execute()) except HttpError as e: if e.resp.status == 404: - self.logger.warning(f"Billing account not found: {billing_account_id}") + self.logger.warning(f"Billing account not found: {safe_google_ref(billing_account_id)}") return None raise @@ -110,7 +111,7 @@ def get_project_billing_info(self, project_id: str) -> ExtendedDict | None: return self.extend_result(service.projects().getBillingInfo(name=f"projects/{project_id}").execute()) except HttpError as e: if e.resp.status == 404: - self.logger.warning(f"Project billing info not found: {project_id}") + self.logger.warning(f"Project billing info not found: {safe_google_ref(project_id)}") return None raise @@ -128,7 +129,9 @@ def update_project_billing_info( Returns: Updated billing info dictionary. """ - self.logger.info(f"Linking project {project_id} to {billing_account_name}") + safe_project = safe_google_ref(project_id) + safe_billing_account = safe_google_ref(billing_account_name) + self.logger.info(f"Linking project {safe_project} to {safe_billing_account}") service = self.get_billing_service() if not billing_account_name.startswith("billingAccounts/"): @@ -143,7 +146,7 @@ def update_project_billing_info( .execute() ) - self.logger.info(f"Linked project {project_id} to billing account") + self.logger.info(f"Linked project {safe_project} to billing account") return self.extend_result(result) def disable_project_billing(self, project_id: str) -> ExtendedDict: @@ -155,7 +158,8 @@ def disable_project_billing(self, project_id: str) -> ExtendedDict: Returns: Updated billing info dictionary. """ - self.logger.info(f"Disabling billing for project {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Disabling billing for project {safe_project}") service = self.get_billing_service() result = ( @@ -167,7 +171,7 @@ def disable_project_billing(self, project_id: str) -> ExtendedDict: .execute() ) - self.logger.info(f"Disabled billing for project {project_id}") + self.logger.info(f"Disabled billing for project {safe_project}") return self.extend_result(result) def list_billing_account_projects( @@ -184,7 +188,7 @@ def list_billing_account_projects( Returns: List of project billing info dictionaries. """ - self.logger.info(f"Listing projects for billing account {billing_account_id}") + self.logger.info(f"Listing projects for billing account {safe_google_ref(billing_account_id)}") service = self.get_billing_service() name = billing_account_id @@ -247,7 +251,7 @@ def set_billing_account_iam_policy( Returns: Updated IAM policy dictionary. """ - self.logger.info(f"Setting IAM policy on billing account {billing_account_id}") + self.logger.info(f"Setting IAM policy on billing account {safe_google_ref(billing_account_id)}") service = self.get_billing_service() name = billing_account_id @@ -279,7 +283,8 @@ def get_bigquery_billing_dataset( """ from googleapiclient.errors import HttpError - self.logger.info(f"Getting BigQuery billing dataset {project_id}.{dataset_id}") + safe_dataset_ref = safe_google_text(f"{project_id}.{dataset_id}", project_id, dataset_id) + self.logger.info(f"Getting BigQuery billing dataset {safe_dataset_ref}") # Build BigQuery client from google.oauth2 import service_account @@ -316,7 +321,7 @@ def get_bigquery_billing_dataset( except HttpError as e: if e.resp.status == 404: - self.logger.warning(f"Billing dataset not found: {project_id}.{dataset_id}") + self.logger.warning(f"Billing dataset not found: {safe_dataset_ref}") return None raise @@ -342,7 +347,9 @@ def setup_billing_export( """ from googleapiclient.errors import HttpError - self.logger.info(f"Setting up billing export for {billing_account_id}") + safe_billing_account = safe_google_ref(billing_account_id) + safe_dataset = safe_google_ref(dataset_id) + self.logger.info(f"Setting up billing export for {safe_billing_account}") # Build BigQuery client from google.oauth2 import service_account @@ -359,7 +366,7 @@ def setup_billing_export( # Check if dataset exists try: dataset = service.datasets().get(projectId=project_id, datasetId=dataset_id).execute() - self.logger.info(f"Dataset {dataset_id} already exists") + self.logger.info(f"Dataset {safe_dataset} already exists") except HttpError as e: if e.resp.status != 404: raise @@ -379,7 +386,7 @@ def setup_billing_export( } dataset = service.datasets().insert(projectId=project_id, body=dataset_body).execute() - self.logger.info(f"Created billing export dataset: {dataset_id}") + self.logger.info(f"Created billing export dataset: {safe_dataset}") return self.extend_result( { diff --git a/src/extended_data/connectors/google/cloud.py b/src/extended_data/connectors/google/cloud.py index 91422e4..2a5a1a6 100644 --- a/src/extended_data/connectors/google/cloud.py +++ b/src/extended_data/connectors/google/cloud.py @@ -9,6 +9,7 @@ from collections.abc import Mapping from typing import TYPE_CHECKING, Any +from extended_data.connectors.google._diagnostics import safe_google_ref, safe_google_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.primitives import unhump_map @@ -52,7 +53,7 @@ def get_organization_id(self) -> ExtendedString: org_name = organizations[0]["name"] org_id = org_name.split("/")[-1] - self.logger.info(f"Organization ID: {org_id}") + self.logger.info(f"Organization ID: {safe_google_ref(org_id)}") return self.extend_result(org_id) def get_organization(self) -> ExtendedDict: @@ -138,7 +139,7 @@ def get_project(self, project_id: str) -> ExtendedDict | None: return self.extend_result(service.projects().get(name=f"projects/{project_id}").execute()) except HttpError as e: if e.resp.status == 404: - self.logger.warning(f"Project not found: {project_id}") + self.logger.warning(f"Project not found: {safe_google_ref(project_id)}") return None raise @@ -160,7 +161,8 @@ def create_project( Returns: Operation response dictionary. """ - self.logger.info(f"Creating project: {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Creating project: {safe_project}") service = self.get_cloud_resource_manager_service() project_body: dict[str, Any] = { @@ -174,7 +176,7 @@ def create_project( project_body["labels"] = to_builtin(labels) result = service.projects().create(body=project_body).execute() - self.logger.info(f"Created project: {project_id}") + self.logger.info(f"Created project: {safe_project}") return self.extend_result(result) def delete_project(self, project_id: str) -> ExtendedDict: @@ -186,11 +188,12 @@ def delete_project(self, project_id: str) -> ExtendedDict: Returns: Operation response dictionary. """ - self.logger.info(f"Deleting project: {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Deleting project: {safe_project}") service = self.get_cloud_resource_manager_service() result = service.projects().delete(name=f"projects/{project_id}").execute() - self.logger.info(f"Deleted project: {project_id}") + self.logger.info(f"Deleted project: {safe_project}") return self.extend_result(result) def move_project( @@ -207,7 +210,9 @@ def move_project( Returns: Operation response dictionary. """ - self.logger.info(f"Moving project {project_id} to {destination_parent}") + safe_project = safe_google_ref(project_id) + safe_destination = safe_google_ref(destination_parent) + self.logger.info(f"Moving project {safe_project} to {safe_destination}") service = self.get_cloud_resource_manager_service() result = ( @@ -218,7 +223,7 @@ def move_project( ) .execute() ) - self.logger.info(f"Moved project {project_id}") + self.logger.info(f"Moved project {safe_project}") return self.extend_result(result) def list_folders( @@ -235,7 +240,7 @@ def list_folders( Returns: List of folder dictionaries. """ - self.logger.info(f"Listing folders under {parent}") + self.logger.info(f"Listing folders under {safe_google_ref(parent)}") service = self.get_cloud_resource_manager_service() folders: list[dict[str, Any]] = [] @@ -306,7 +311,7 @@ def set_org_policy( Returns: Updated policy dictionary. """ - self.logger.info(f"Setting org policy on {resource}") + self.logger.info(f"Setting org policy on {safe_google_ref(resource)}") service = self.get_cloud_resource_manager_service() return self.extend_result( @@ -380,7 +385,8 @@ def set_iam_policy( Returns: Updated IAM policy dictionary. """ - self.logger.info(f"Setting IAM policy on {resource_type}/{resource}") + safe_resource = safe_google_text(f"{resource_type}/{resource}", resource) + self.logger.info(f"Setting IAM policy on {safe_resource}") service = self.get_cloud_resource_manager_service() body = {"policy": to_builtin(policy)} @@ -413,7 +419,7 @@ def set_iam_policy( .execute() ) - self.logger.info(f"Set IAM policy on {resource_type}/{resource}") + self.logger.info(f"Set IAM policy on {safe_resource}") return self.extend_result(result) def add_iam_binding( @@ -434,7 +440,9 @@ def add_iam_binding( Returns: Updated IAM policy dictionary. """ - self.logger.info(f"Adding IAM binding: {role} -> {member} on {resource}") + self.logger.info( + f"Adding IAM binding: {role} -> {safe_google_ref(member)} on {safe_google_ref(resource)}" + ) policy = self.get_iam_policy(resource, resource_type) bindings = policy.get("bindings", []) @@ -469,7 +477,7 @@ def list_service_accounts( Returns: List of service account dictionaries. """ - self.logger.info(f"Listing service accounts in {project_id}") + self.logger.info(f"Listing service accounts in {safe_google_ref(project_id)}") service = self.get_iam_service() accounts: list[dict[str, Any]] = [] @@ -512,7 +520,9 @@ def create_service_account( Returns: Created service account dictionary. """ - self.logger.info(f"Creating service account: {account_id} in {project_id}") + safe_account = safe_google_ref(account_id) + safe_project = safe_google_ref(project_id) + self.logger.info(f"Creating service account: {safe_account} in {safe_project}") service = self.get_iam_service() result = ( @@ -531,5 +541,5 @@ def create_service_account( .execute() ) - self.logger.info(f"Created service account: {result.get('email')}") + self.logger.info(f"Created service account: {safe_google_ref(result.get('email'))}") return self.extend_result(result) diff --git a/src/extended_data/connectors/google/services.py b/src/extended_data/connectors/google/services.py index e65c606..3511f21 100644 --- a/src/extended_data/connectors/google/services.py +++ b/src/extended_data/connectors/google/services.py @@ -11,6 +11,7 @@ from collections.abc import Mapping, MutableMapping from typing import TYPE_CHECKING, Any +from extended_data.connectors.google._diagnostics import safe_google_ref, safe_google_text from extended_data.containers import ExtendedDict, ExtendedList from extended_data.primitives import unhump_map @@ -135,7 +136,8 @@ def list_compute_instances( Returns: List of instance dictionaries. """ - self.logger.info(f"Listing Compute Engine instances in {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Listing Compute Engine instances in {safe_project}") service = self.get_compute_service() instances: list[dict[str, Any]] = [] @@ -197,7 +199,8 @@ def list_gke_clusters( Returns: List of cluster dictionaries. """ - self.logger.info(f"Listing GKE clusters in {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Listing GKE clusters in {safe_project}") service = self.get_container_service() parent = f"projects/{project_id}/locations/{location}" @@ -236,7 +239,7 @@ def get_gke_cluster( return self.extend_result(service.projects().locations().clusters().get(name=name).execute()) except HttpError as e: if e.resp.status == 404: - self.logger.warning(f"GKE cluster not found: {cluster_id}") + self.logger.warning(f"GKE cluster not found: {safe_google_ref(cluster_id)}") return None raise @@ -258,7 +261,8 @@ def list_storage_buckets( Returns: List of bucket dictionaries. """ - self.logger.info(f"Listing Cloud Storage buckets in {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Listing Cloud Storage buckets in {safe_project}") service = self.get_storage_service() buckets: list[dict[str, Any]] = [] @@ -301,7 +305,8 @@ def list_sql_instances( Returns: List of SQL instance dictionaries. """ - self.logger.info(f"Listing Cloud SQL instances in {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Listing Cloud SQL instances in {safe_project}") service = self.get_sqladmin_service() instances: list[dict[str, Any]] = [] @@ -344,7 +349,8 @@ def list_pubsub_topics( Returns: List of topic dictionaries. """ - self.logger.info(f"Listing Pub/Sub topics in {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Listing Pub/Sub topics in {safe_project}") service = self.get_pubsub_service() topics: list[dict[str, Any]] = [] @@ -383,7 +389,8 @@ def list_pubsub_subscriptions( Returns: List of subscription dictionaries. """ - self.logger.info(f"Listing Pub/Sub subscriptions in {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Listing Pub/Sub subscriptions in {safe_project}") service = self.get_pubsub_service() subscriptions: list[dict[str, Any]] = [] @@ -426,7 +433,8 @@ def list_enabled_services( Returns: List of service dictionaries. """ - self.logger.info(f"Listing enabled services in {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Listing enabled services in {safe_project}") service = self.get_serviceusage_service() services: list[dict[str, Any]] = [] @@ -468,13 +476,15 @@ def enable_service( Returns: Operation response dictionary. """ - self.logger.info(f"Enabling service {service_name} in {project_id}") + safe_project = safe_google_ref(project_id) + safe_service_name = safe_google_ref(service_name) + self.logger.info(f"Enabling service {safe_service_name} in {safe_project}") service = self.get_serviceusage_service() name = f"projects/{project_id}/services/{service_name}" result = service.services().enable(name=name).execute() - self.logger.info(f"Enabled service {service_name}") + self.logger.info(f"Enabled service {safe_service_name}") return self.extend_result(result) def disable_service( @@ -493,7 +503,9 @@ def disable_service( Returns: Operation response dictionary. """ - self.logger.info(f"Disabling service {service_name} in {project_id}") + safe_project = safe_google_ref(project_id) + safe_service_name = safe_google_ref(service_name) + self.logger.info(f"Disabling service {safe_service_name} in {safe_project}") service = self.get_serviceusage_service() name = f"projects/{project_id}/services/{service_name}" @@ -503,7 +515,7 @@ def disable_service( result = service.services().disable(name=name, body=body).execute() - self.logger.info(f"Disabled service {service_name}") + self.logger.info(f"Disabled service {safe_service_name}") return self.extend_result(result) def batch_enable_services( @@ -520,7 +532,8 @@ def batch_enable_services( Returns: Operation response dictionary. """ - self.logger.info(f"Batch enabling {len(service_names)} services in {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Batch enabling {len(service_names)} services in {safe_project}") service = self.get_serviceusage_service() parent = f"projects/{project_id}" @@ -556,7 +569,8 @@ def list_kms_keyrings( Returns: List of key ring dictionaries. """ - self.logger.info(f"Listing KMS key rings in {project_id}/{location}") + safe_parent = safe_google_text(f"{project_id}/{location}", project_id, location) + self.logger.info(f"Listing KMS key rings in {safe_parent}") service = self.get_cloudkms_service() keyrings: list[dict[str, Any]] = [] @@ -598,7 +612,9 @@ def create_kms_keyring( Returns: Created key ring dictionary. """ - self.logger.info(f"Creating KMS key ring {keyring_id} in {project_id}/{location}") + safe_parent = safe_google_text(f"{project_id}/{location}", project_id, location) + safe_keyring = safe_google_ref(keyring_id) + self.logger.info(f"Creating KMS key ring {safe_keyring} in {safe_parent}") service = self.get_cloudkms_service() parent = f"projects/{project_id}/locations/{location}" @@ -614,7 +630,7 @@ def create_kms_keyring( .execute() ) - self.logger.info(f"Created key ring {keyring_id}") + self.logger.info(f"Created key ring {safe_keyring}") return self.extend_result(result) def create_kms_key( @@ -639,7 +655,9 @@ def create_kms_key( Returns: Created crypto key dictionary. """ - self.logger.info(f"Creating KMS key {key_id} in {keyring_id}") + safe_key = safe_google_ref(key_id) + safe_keyring = safe_google_ref(keyring_id) + self.logger.info(f"Creating KMS key {safe_key} in {safe_keyring}") service = self.get_cloudkms_service() parent = f"projects/{project_id}/locations/{location}/keyRings/{keyring_id}" @@ -661,7 +679,7 @@ def create_kms_key( .execute() ) - self.logger.info(f"Created crypto key {key_id}") + self.logger.info(f"Created crypto key {safe_key}") return self.extend_result(result) # ========================================================================= @@ -690,47 +708,48 @@ def is_project_empty( Returns: True if the project has no resources. """ - self.logger.info(f"Checking if project {project_id} is empty") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Checking if project {safe_project} is empty") try: if check_compute: instances = self.list_compute_instances(project_id) if instances: - self.logger.info(f"Project {project_id} has {len(instances)} compute instances") + self.logger.info(f"Project {safe_project} has {len(instances)} compute instances") return False if check_gke: clusters = self.list_gke_clusters(project_id) if clusters: - self.logger.info(f"Project {project_id} has {len(clusters)} GKE clusters") + self.logger.info(f"Project {safe_project} has {len(clusters)} GKE clusters") return False if check_storage: buckets = self.list_storage_buckets(project_id) if buckets: - self.logger.info(f"Project {project_id} has {len(buckets)} storage buckets") + self.logger.info(f"Project {safe_project} has {len(buckets)} storage buckets") return False if check_sql: sql_instances = self.list_sql_instances(project_id) if sql_instances: - self.logger.info(f"Project {project_id} has {len(sql_instances)} SQL instances") + self.logger.info(f"Project {safe_project} has {len(sql_instances)} SQL instances") return False if check_pubsub: topics = self.list_pubsub_topics(project_id) if topics: - self.logger.info(f"Project {project_id} has {len(topics)} Pub/Sub topics") + self.logger.info(f"Project {safe_project} has {len(topics)} Pub/Sub topics") return False except Exception as e: # API might not be enabled, treat as empty for that service if _has_http_status(e, 403): - self.logger.debug(f"API access denied, skipping check: {e}") + self.logger.debug(f"API access denied, skipping check: {safe_google_text(e, project_id)}") else: raise - self.logger.info(f"Project {project_id} appears to be empty") + self.logger.info(f"Project {safe_project} appears to be empty") return True def get_project_iam_users( @@ -745,7 +764,8 @@ def get_project_iam_users( Returns: Dictionary mapping member identifiers to their roles. """ - self.logger.info(f"Getting IAM users for project {project_id}") + safe_project = safe_google_ref(project_id) + self.logger.info(f"Getting IAM users for project {safe_project}") service = self.get_cloud_resource_manager_service() response = service.projects().getIamPolicy(resource=f"projects/{project_id}", body={}).execute() @@ -758,7 +778,7 @@ def get_project_iam_users( users[member] = {"roles": [], "member_type": member.split(":")[0]} users[member]["roles"].append(role) - self.logger.info(f"Found {len(users)} IAM members for project {project_id}") + self.logger.info(f"Found {len(users)} IAM members for project {safe_project}") return self.extend_result(users) def get_pubsub_resources_for_project( @@ -777,7 +797,7 @@ def get_pubsub_resources_for_project( Returns: Dictionary with 'topics' and 'subscriptions' lists. """ - self.logger.info(f"Getting Pub/Sub resources for project {project_id}") + self.logger.info(f"Getting Pub/Sub resources for project {safe_google_ref(project_id)}") topics = self.list_pubsub_topics(project_id) result: dict[str, Any] = { @@ -868,7 +888,10 @@ def find_inactive_projects( except Exception as e: if _has_http_status(e, 403): # Can't check, skip - self.logger.debug(f"Cannot check resources for {project_id}: {e}") + self.logger.debug( + f"Cannot check resources for {safe_google_ref(project_id)}: " + f"{safe_google_text(e, project_id)}" + ) else: raise diff --git a/src/extended_data/connectors/google/workspace.py b/src/extended_data/connectors/google/workspace.py index 5665c59..d394b35 100644 --- a/src/extended_data/connectors/google/workspace.py +++ b/src/extended_data/connectors/google/workspace.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, cast +from extended_data.connectors.google._diagnostics import safe_google_ref, safe_google_text from extended_data.containers import ExtendedDict, ExtendedList, to_builtin from extended_data.primitives import unhump_map @@ -94,7 +95,7 @@ def get_user( return self.extend_result(service.users().get(userKey=user_key).execute()) except HttpError as e: if e.resp.status == 404: - self.logger.warning(f"User not found: {user_key}") + self.logger.warning(f"User not found: {safe_google_ref(user_key)}") return None raise @@ -144,7 +145,7 @@ def create_user( } result = service.users().insert(body=to_builtin(user_body)).execute() - self.logger.info(f"Created user: {primary_email}") + self.logger.info(f"Created user: {safe_google_ref(primary_email)}") return self.extend_result(result) def update_user( @@ -165,7 +166,7 @@ def update_user( """ service = self.get_admin_directory_service(subject=subject) result = service.users().update(userKey=user_key, body=to_builtin(fields)).execute() - self.logger.info(f"Updated user: {user_key}") + self.logger.info(f"Updated user: {safe_google_ref(user_key)}") return self.extend_result(result) def delete_user( @@ -181,7 +182,7 @@ def delete_user( """ service = self.get_admin_directory_service(subject=subject) service.users().delete(userKey=user_key).execute() - self.logger.info(f"Deleted user: {user_key}") + self.logger.info(f"Deleted user: {safe_google_ref(user_key)}") def list_workspace_groups( self, @@ -248,7 +249,7 @@ def get_group( return self.extend_result(service.groups().get(groupKey=group_key).execute()) except HttpError as e: if e.resp.status == 404: - self.logger.warning(f"Group not found: {group_key}") + self.logger.warning(f"Group not found: {safe_google_ref(group_key)}") return None raise @@ -279,7 +280,7 @@ def create_group( } result = service.groups().insert(body=to_builtin(group_body)).execute() - self.logger.info(f"Created group: {email}") + self.logger.info(f"Created group: {safe_google_ref(email)}") return self.extend_result(result) def delete_group( @@ -295,7 +296,7 @@ def delete_group( """ service = self.get_admin_directory_service(subject=subject) service.groups().delete(groupKey=group_key).execute() - self.logger.info(f"Deleted group: {group_key}") + self.logger.info(f"Deleted group: {safe_google_ref(group_key)}") def list_group_members( self, @@ -333,7 +334,7 @@ def list_group_members( if not page_token: break - self.logger.info(f"Retrieved {len(members)} members from group {group_key}") + self.logger.info(f"Retrieved {len(members)} members from group {safe_google_ref(group_key)}") if unhump_members: members = [unhump_map(m) for m in members] @@ -366,7 +367,7 @@ def add_group_member( } result = service.members().insert(groupKey=group_key, body=to_builtin(member_body)).execute() - self.logger.info(f"Added {email} to group {group_key} with role {role}") + self.logger.info(f"Added {safe_google_ref(email)} to group {safe_google_ref(group_key)} with role {role}") return self.extend_result(result) def remove_group_member( @@ -384,7 +385,7 @@ def remove_group_member( """ service = self.get_admin_directory_service(subject=subject) service.members().delete(groupKey=group_key, memberKey=member_key).execute() - self.logger.info(f"Removed {member_key} from group {group_key}") + self.logger.info(f"Removed {safe_google_ref(member_key)} from group {safe_google_ref(group_key)}") def list_org_units( self, @@ -476,9 +477,9 @@ def create_or_update_user( if update_if_exists: # Update existing user result = service.users().update(userKey=primary_email, body=to_builtin(user_body)).execute() - self.logger.info(f"Updated existing user: {primary_email}") + self.logger.info(f"Updated existing user: {safe_google_ref(primary_email)}") return self.extend_result(result) - self.logger.info(f"User already exists: {primary_email}") + self.logger.info(f"User already exists: {safe_google_ref(primary_email)}") return self.extend_result(existing) except HttpError as e: if e.resp.status != 404: @@ -486,7 +487,7 @@ def create_or_update_user( # User doesn't exist, create new result = service.users().insert(body=to_builtin(user_body)).execute() - self.logger.info(f"Created user: {primary_email}") + self.logger.info(f"Created user: {safe_google_ref(primary_email)}") return self.extend_result(result) def create_or_update_group( @@ -531,9 +532,9 @@ def create_or_update_group( if update_if_exists: # Update existing group result = service.groups().update(groupKey=email, body=to_builtin(group_body)).execute() - self.logger.info(f"Updated existing group: {email}") + self.logger.info(f"Updated existing group: {safe_google_ref(email)}") return self.extend_result(result) - self.logger.info(f"Group already exists: {email}") + self.logger.info(f"Group already exists: {safe_google_ref(email)}") return self.extend_result(existing) except HttpError as e: if e.resp.status != 404: @@ -541,7 +542,7 @@ def create_or_update_group( # Group doesn't exist, create new result = service.groups().insert(body=to_builtin(group_body)).execute() - self.logger.info(f"Created group: {email}") + self.logger.info(f"Created group: {safe_google_ref(email)}") return self.extend_result(result) def list_available_licenses( @@ -619,11 +620,13 @@ def list_available_licenses( except HttpError as e: if e.resp.status == 404: # Product not available - self.logger.debug(f"Product {prod_id} not available") + self.logger.debug(f"Product {safe_google_ref(prod_id)} not available") elif e.resp.status == 403: - self.logger.debug(f"No access to product {prod_id}") + self.logger.debug(f"No access to product {safe_google_ref(prod_id)}") else: - self.logger.warning(f"Error listing licenses for {prod_id}: {e}") + self.logger.warning( + f"Error listing licenses for {safe_google_ref(prod_id)}: {safe_google_text(e, prod_id)}" + ) self.logger.info(f"Retrieved {len(licenses)} license assignments") return self.extend_result(licenses) diff --git a/tests/connectors/test_google_billing.py b/tests/connectors/test_google_billing.py index d5bf243..117f20b 100644 --- a/tests/connectors/test_google_billing.py +++ b/tests/connectors/test_google_billing.py @@ -6,6 +6,7 @@ from collections import deque from collections.abc import Iterable from typing import Any +from unittest.mock import MagicMock import pytest @@ -16,12 +17,9 @@ from extended_data.connectors.google.billing import GoogleBillingMixin -class _StubLogger: - def info(self, *args, **kwargs): # pragma: no cover - pass-through logger stub - pass - - def warning(self, *args, **kwargs): # pragma: no cover - pass +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) class _ImmediateResponse: @@ -87,7 +85,7 @@ def projects(self): class _TestGoogleBilling(GoogleBillingMixin): def __init__(self, service: _StubBillingService): - self.logger = _StubLogger() + self.logger = MagicMock() self._service = service def get_billing_service(self): @@ -147,6 +145,24 @@ def test_update_project_billing_info_prefixes_account_name(): ] +def test_update_project_billing_info_logs_redact_identifiers_but_preserve_call_args(): + service = _StubBillingService(account_responses=[], project_responses=[]) + connector = _TestGoogleBilling(service) + + connector.update_project_billing_info("sensitive-project", "1234-PRIVATE") + + assert service.projects().update_calls == [ + { + "name": "projects/sensitive-project", + "body": {"billingAccountName": "billingAccounts/1234-PRIVATE"}, + } + ] + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "sensitive-project" not in logs + assert "1234-PRIVATE" not in logs + + def test_disable_project_billing_sets_empty_account(): service = _StubBillingService(account_responses=[], project_responses=[]) connector = _TestGoogleBilling(service) diff --git a/tests/connectors/test_google_cloud.py b/tests/connectors/test_google_cloud.py index 0650e61..77520ab 100644 --- a/tests/connectors/test_google_cloud.py +++ b/tests/connectors/test_google_cloud.py @@ -14,6 +14,11 @@ from extended_data.connectors.google import GoogleConnector +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + @pytest.fixture def google_connector(): """Create Google connector with mocked services.""" @@ -193,6 +198,23 @@ def test_create_project(self, google_connector): assert isinstance(result["projectId"], ExtendedString) assert result["projectId"] == "new-project" + def test_create_project_logs_redact_identifier_but_preserve_body(self, google_connector): + """Project creation logs should not expose project IDs.""" + mock_service = MagicMock() + mock_projects = mock_service.projects.return_value + mock_projects.create.return_value.execute.return_value = { + "projectId": "sensitive-project", + "name": "Sensitive Project", + } + google_connector.get_cloud_resource_manager_service = MagicMock(return_value=mock_service) + + google_connector.create_project("sensitive-project", "Sensitive Project") + + assert mock_projects.create.call_args.kwargs["body"]["projectId"] == "sensitive-project" + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "sensitive-project" not in logs + def test_delete_project(self, google_connector): """Test deleting a project.""" mock_service = MagicMock() @@ -286,6 +308,25 @@ def test_set_iam_policy(self, google_connector): call_body = mock_projects.setIamPolicy.call_args.kwargs["body"] assert isinstance(call_body["policy"], dict) + def test_add_iam_binding_logs_redact_member_and_resource_but_preserve_policy(self, google_connector): + """IAM binding logs should redact member/resource identifiers without changing policy.""" + google_connector.get_iam_policy = MagicMock(return_value=extend_data({"bindings": []})) + google_connector.set_iam_policy = MagicMock(return_value=extend_data({"bindings": []})) + + google_connector.add_iam_binding( + "sensitive-project", + "roles/viewer", + "user:sensitive.user@example.com", + ) + + policy = google_connector.set_iam_policy.call_args.args[1] + assert policy["bindings"][0]["members"] == ["user:sensitive.user@example.com"] + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "sensitive-project" not in logs + assert "sensitive.user@example.com" not in logs + assert "roles/viewer" in logs + def test_list_service_accounts(self, google_connector): """Test listing service accounts.""" mock_service = MagicMock() diff --git a/tests/connectors/test_google_connector.py b/tests/connectors/test_google_connector.py index e7f1ec2..d9889fd 100644 --- a/tests/connectors/test_google_connector.py +++ b/tests/connectors/test_google_connector.py @@ -6,8 +6,6 @@ from unittest.mock import MagicMock, patch import pytest -import json - pytest.importorskip("google.oauth2.service_account") pytest.importorskip("googleapiclient") @@ -95,13 +93,16 @@ def test_init_redacts_invalid_service_account_json_logs(self, base_connector_kwa """Invalid service-account JSON diagnostics should not expose key material.""" invalid_service_account = '{"private_key": "-----BEGIN RSA PRIVATE KEY-----\\nMIIE...test"' - with pytest.raises(json.JSONDecodeError): + with pytest.raises(ValueError) as exc_info: GoogleConnector(service_account_info=invalid_service_account, **base_connector_kwargs) logs = _logged_text(base_connector_kwargs["logger"].logger) - assert "MIIE...test" not in logs - assert "BEGIN RSA PRIVATE KEY" not in logs - assert "[REDACTED]" in logs + diagnostics = logs + str(exc_info.value) + assert "MIIE...test" not in diagnostics + assert "BEGIN RSA PRIVATE KEY" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + assert all("exc_info" not in logged_call.kwargs for logged_call in base_connector_kwargs["logger"].logger.method_calls) @patch("extended_data.connectors.google.service_account.Credentials.from_service_account_info") def test_credentials_property(self, mock_from_sa, base_connector_kwargs): diff --git a/tests/connectors/test_google_services.py b/tests/connectors/test_google_services.py index 1c72685..d88d648 100644 --- a/tests/connectors/test_google_services.py +++ b/tests/connectors/test_google_services.py @@ -14,6 +14,11 @@ from extended_data.connectors.google import GoogleConnector +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + @pytest.fixture def google_connector(): """Create Google connector with mocked services.""" @@ -323,6 +328,30 @@ def test_create_kms_key(self, google_connector): assert isinstance(result["name"], ExtendedString) assert "new-key" in result["name"] + def test_create_kms_key_logs_redact_identifiers_but_preserve_call_args(self, google_connector): + """KMS mutation logs should not expose project/key resource identifiers.""" + mock_service = MagicMock() + mock_projects = mock_service.projects.return_value + mock_locations = mock_projects.locations.return_value + mock_keyrings = mock_locations.keyRings.return_value + mock_keys = mock_keyrings.cryptoKeys.return_value + mock_keys.create.return_value.execute.return_value = { + "name": "projects/sensitive-project/locations/us/keyRings/private-ring/cryptoKeys/private-key" + } + google_connector.get_cloudkms_service = MagicMock(return_value=mock_service) + + google_connector.create_kms_key("sensitive-project", "us", "private-ring", "private-key") + + assert mock_keys.create.call_args.kwargs["parent"] == ( + "projects/sensitive-project/locations/us/keyRings/private-ring" + ) + assert mock_keys.create.call_args.kwargs["cryptoKeyId"] == "private-key" + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "sensitive-project" not in logs + assert "private-ring" not in logs + assert "private-key" not in logs + class TestServiceUsage: """Tests for Service Usage operations.""" @@ -359,6 +388,23 @@ def test_enable_service(self, google_connector): assert isinstance(result["name"], ExtendedString) assert result["name"] == "operations/enable-compute" + def test_enable_service_logs_redact_identifiers_but_preserve_call_args(self, google_connector): + """Service Usage logs should not expose project or service names.""" + mock_service = MagicMock() + mock_services = mock_service.services.return_value + mock_services.enable.return_value.execute.return_value = {"name": "operations/enable-private"} + google_connector.get_serviceusage_service = MagicMock(return_value=mock_service) + + google_connector.enable_service("sensitive-project", "private.googleapis.com") + + assert mock_services.enable.call_args.kwargs["name"] == ( + "projects/sensitive-project/services/private.googleapis.com" + ) + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "sensitive-project" not in logs + assert "private.googleapis.com" not in logs + def test_disable_service(self, google_connector): """Test disabling an API.""" mock_service = MagicMock() @@ -396,6 +442,26 @@ def test_batch_enable_services(self, google_connector): class TestProjectResourceSummary: """Tests for derived project resource operations.""" + def test_is_project_empty_denied_check_logs_redact_project_and_error(self, google_connector): + """Denied resource checks should not expose project IDs or raw provider details.""" + denied = RuntimeError("denied sensitive-project token=raw-token") + denied.resp = MagicMock(status=403) # type: ignore[attr-defined] + google_connector.list_compute_instances = MagicMock(side_effect=denied) + + result = google_connector.is_project_empty( + "sensitive-project", + check_gke=False, + check_storage=False, + check_sql=False, + check_pubsub=False, + ) + + assert result is True + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "sensitive-project" not in logs + assert "raw-token" not in logs + def test_get_project_iam_users(self, google_connector): """Test deriving IAM members from a project policy.""" mock_service = MagicMock() diff --git a/tests/connectors/test_google_workspace.py b/tests/connectors/test_google_workspace.py index 7938c3e..21d2587 100644 --- a/tests/connectors/test_google_workspace.py +++ b/tests/connectors/test_google_workspace.py @@ -14,6 +14,11 @@ from extended_data.connectors.google import GoogleConnector +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + @pytest.fixture def google_connector(): """Create Google connector with mocked services.""" @@ -172,6 +177,23 @@ def test_update_user(self, google_connector): assert isinstance(body, dict) assert isinstance(body["customSchemas"], dict) + def test_update_user_logs_redact_identifier_but_preserve_call_args(self, google_connector): + """Workspace user mutation logs should not expose user keys.""" + mock_service = MagicMock() + mock_users = mock_service.users.return_value + mock_users.update.return_value.execute.return_value = { + "primaryEmail": "sensitive.user@example.com", + "suspended": True, + } + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + google_connector.update_user("sensitive.user@example.com", suspended=True) + + assert mock_users.update.call_args.kwargs["userKey"] == "sensitive.user@example.com" + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "sensitive.user@example.com" not in logs + def test_delete_user(self, google_connector): """Test deleting a user.""" mock_service = MagicMock() @@ -311,6 +333,25 @@ def test_add_group_member(self, google_connector): assert isinstance(result["email"], ExtendedString) assert result["email"] == "user1@example.com" + def test_add_group_member_logs_redact_identifiers_but_preserve_call_args(self, google_connector): + """Workspace membership logs should not expose member or group keys.""" + mock_service = MagicMock() + mock_members = mock_service.members.return_value + mock_members.insert.return_value.execute.return_value = { + "email": "sensitive.user@example.com", + "role": "MEMBER", + } + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + google_connector.add_group_member("private-group@example.com", "sensitive.user@example.com") + + assert mock_members.insert.call_args.kwargs["groupKey"] == "private-group@example.com" + assert mock_members.insert.call_args.kwargs["body"]["email"] == "sensitive.user@example.com" + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "private-group@example.com" not in logs + assert "sensitive.user@example.com" not in logs + def test_remove_group_member(self, google_connector): """Test removing a member from a group.""" mock_service = MagicMock() From 4268cae5d97b3c2509a07e301e97578bd9c11952 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:20:58 -0500 Subject: [PATCH 214/287] fix: harden github cursor diagnostics --- .../connectors/cursor/__init__.py | 51 +++++++-- .../connectors/github/__init__.py | 107 ++++++++++++------ .../connectors/github/_diagnostics.py | 33 ++++++ tests/connectors/test_cursor.py | 73 ++++++++++++ .../test_github_payload_contract.py | 67 ++++++++++- 5 files changed, 282 insertions(+), 49 deletions(-) create mode 100644 src/extended_data/connectors/github/_diagnostics.py diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index c4a2592..0b45aa5 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -23,6 +23,7 @@ import os import re +from collections.abc import Iterable, Mapping from dataclasses import dataclass from datetime import datetime from enum import Enum @@ -273,7 +274,7 @@ def validate_webhook_url(url: str) -> None: try: parsed = urlparse(url) except Exception as e: - raise CursorValidationError(f"Webhook URL is not a valid URL: {e}") from e + raise CursorValidationError(f"Webhook URL is not a valid URL: {_safe_cursor_text(e, url)}") from None # Security: Only allow HTTPS if parsed.scheme != "https": @@ -294,17 +295,43 @@ def validate_webhook_url(url: str) -> None: raise CursorValidationError(msg) -def sanitize_error(error: Any) -> str: +def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: + """Yield scalar values from nested diagnostic context.""" + for value in values: + if value is None: + continue + if isinstance(value, Mapping): + yield from _iter_diagnostic_values(value.values()) + elif isinstance(value, (str, bytes)): + yield value + elif isinstance(value, Iterable): + yield from _iter_diagnostic_values(value) + else: + yield value + + +def _safe_cursor_text(value: Any, *sensitive_values: Any) -> str: + """Redact secrets and caller-provided Cursor identifiers from diagnostics.""" + return redact_sensitive_text(value, values=_iter_diagnostic_values(sensitive_values)) + + +def _safe_cursor_ref(value: Any) -> str: + """Redact a single Cursor resource reference for diagnostic logs.""" + return _safe_cursor_text(value, value) + + +def sanitize_error(error: Any, *, values: Iterable[Any] | None = None) -> str: """Sanitize error messages to prevent sensitive data leakage. Args: error: The error to sanitize. + values: Explicit caller-provided values that must not appear in diagnostics. Returns: Sanitized error message string. """ message = str(error) if not isinstance(error, str) else error - return redact_sensitive_text(message) + return redact_sensitive_text(message, values=_iter_diagnostic_values(values or ())) # ============================================================================= @@ -350,7 +377,7 @@ def __init__( msg = "CURSOR_API_KEY is required. Set it in environment or pass to constructor." raise CursorError(msg) - self.logger.info(f"Initialized CursorConnector with base URL: {self._base_url}") + self.logger.info(f"Initialized CursorConnector with base URL: {_safe_cursor_ref(self._base_url)}") @staticmethod def is_available() -> bool: @@ -398,12 +425,12 @@ def _request_api( return response.json() - except httpx.TimeoutException as e: - raise CursorAPIError(f"Request timeout after {self._timeout}s") from e + except httpx.TimeoutException: + raise CursorAPIError(f"Request timeout after {self._timeout}s") from None except Exception as e: if isinstance(e, CursorAPIError): raise - raise CursorAPIError(sanitize_error(str(e))) from e + raise CursorAPIError(sanitize_error(str(e), values=[endpoint, json_body])) from None @staticmethod def _model_payload(model: BaseModel) -> dict[str, Any]: @@ -448,11 +475,11 @@ def get_agent_status(self, agent_id: str) -> ExtendedDict: CursorAPIError: If the API request fails or returns empty response. """ validate_agent_id(agent_id) - self.logger.info(f"Getting status for agent: {agent_id}") + self.logger.info(f"Getting status for agent: {_safe_cursor_ref(agent_id)}") data = self._request_api(f"/agents/{agent_id}") if not data: - raise CursorAPIError(f"Empty response when getting agent status for {agent_id}") + raise CursorAPIError(f"Empty response when getting agent status for {_safe_cursor_ref(agent_id)}") return self.extend_result(self._model_payload(Agent.model_validate(data))) def get_agent_conversation(self, agent_id: str) -> ExtendedDict: @@ -469,7 +496,7 @@ def get_agent_conversation(self, agent_id: str) -> ExtendedDict: CursorAPIError: If the API request fails. """ validate_agent_id(agent_id) - self.logger.info(f"Getting conversation for agent: {agent_id}") + self.logger.info(f"Getting conversation for agent: {_safe_cursor_ref(agent_id)}") data = self._request_api(f"/agents/{agent_id}/conversation") if not data: @@ -522,7 +549,7 @@ def launch_agent( if webhook_url: validate_webhook_url(webhook_url) - self.logger.info(f"Launching agent for repository: {repository}") + self.logger.info(f"Launching agent for repository: {_safe_cursor_ref(repository)}") body: dict[str, Any] = { "prompt": { @@ -577,7 +604,7 @@ def add_followup(self, agent_id: str, prompt_text: str) -> None: validate_agent_id(agent_id) validate_prompt_text(prompt_text) - self.logger.info(f"Adding follow-up to agent: {agent_id}") + self.logger.info(f"Adding follow-up to agent: {_safe_cursor_ref(agent_id)}") self._request_api( f"/agents/{agent_id}/followup", diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index 5b0a40e..56a533c 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -13,6 +13,7 @@ from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.github._diagnostics import safe_github_ref, safe_github_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple from extended_data.io import ( decode_file, @@ -109,11 +110,12 @@ def __init__( self.repo = None if github_repo: + repo_ref = f"{self.GITHUB_OWNER}/{self.GITHUB_REPO}" try: - self.repo = self.git.get_repo(f"{self.GITHUB_OWNER}/{self.GITHUB_REPO}") - self.logger.info(f"Connecting to Git repository {self.GITHUB_OWNER}/{self.GITHUB_REPO}") + self.repo = self.git.get_repo(repo_ref) + self.logger.info(f"Connecting to Git repository {safe_github_ref(repo_ref)}") except UnknownObjectException: - self.logger.warning(f"Repository {self.GITHUB_OWNER}/{self.GITHUB_REPO} does not exist") + self.logger.warning(f"Repository {safe_github_ref(repo_ref)} does not exist") if github_branch is None and self.repo: self.GITHUB_BRANCH: str | None = self.repo.default_branch @@ -125,26 +127,34 @@ def __init__( def get_repository_branch(self, branch_name: str) -> Any | None: """Get a repository branch by name.""" if self.repo is None: - self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot get branch {branch_name}") + self.logger.warning( + f"Repository not set for {safe_github_ref(self.GITHUB_OWNER)}, " + f"cannot get branch {safe_github_ref(branch_name)}" + ) return None try: return self.repo.get_branch(branch_name) except UnknownObjectException: - self.logger.warning(f"{branch_name} does not yet exist") + self.logger.warning(f"{safe_github_ref(branch_name)} does not yet exist") return None def create_repository_branch(self, branch_name: str, parent_branch: str | None = None) -> Any | None: """Create a new repository branch.""" if self.repo is None: - self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot create branch {branch_name}") + self.logger.warning( + f"Repository not set for {safe_github_ref(self.GITHUB_OWNER)}, " + f"cannot create branch {safe_github_ref(branch_name)}" + ) return None parent_branch_ref = self.get_repository_branch(parent_branch or self.repo.default_branch) if parent_branch_ref is None or is_nothing(parent_branch_ref): - raise RuntimeError( - f"Cannot create Git branch {branch_name}, parent branch {parent_branch} does not yet exist" + msg = ( + f"Cannot create Git branch {safe_github_ref(branch_name)}, " + f"parent branch {safe_github_ref(parent_branch)} does not yet exist" ) + raise RuntimeError(msg) try: return self.repo.create_git_ref( @@ -153,10 +163,11 @@ def create_repository_branch(self, branch_name: str, parent_branch: str | None = ) except GithubException as exc: if get_github_api_error(exc) == "Reference already exists": - self.logger.info(f"Branch {branch_name} already exists in Git repository") + self.logger.info(f"Branch {safe_github_ref(branch_name)} already exists in Git repository") return self.get_repository_branch(branch_name) - raise RuntimeError(f"Failed to create branch {branch_name}") from exc + msg = f"Failed to create branch {safe_github_ref(branch_name)}: {safe_github_text(exc, branch_name)}" + raise RuntimeError(msg) from None def get_repository_file( self, @@ -170,8 +181,11 @@ def get_repository_file( ) -> ExtendedDict | ExtendedList[Any] | ExtendedString | ExtendedTuple[Any] | None: """Get a file from the repository.""" file_path_text = os.fspath(file_path) + safe_file_path = safe_github_ref(file_path_text) if self.repo is None: - self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot get file {file_path_text}") + self.logger.warning( + f"Repository not set for {safe_github_ref(self.GITHUB_OWNER)}, cannot get file {safe_file_path}" + ) return None def state_negative_result(result: str) -> None: @@ -192,19 +206,21 @@ def get_retval(d: Any, s: str | None, p: str) -> Any: file_data: Any = {} if decode else "" file_sha = None - self.logger.debug(f"Getting repository file: {file_path_text}") + self.logger.debug(f"Getting repository file: {safe_file_path}") try: raw_file_data = self.repo.get_contents(file_path_text, ref=self.GITHUB_BRANCH) file_sha = raw_file_data.sha if is_nothing(raw_file_data.content): - self.logger.warning(f"{file_path_text} is empty of content: {self.GITHUB_BRANCH}") + self.logger.warning( + f"{safe_file_path} is empty of content: {safe_github_ref(self.GITHUB_BRANCH)}" + ) else: file_data = raw_file_data.decoded_content.decode(charset, errors) except (UnknownObjectException, AttributeError): - state_negative_result(f"{file_path_text} does not exist") + state_negative_result(f"{safe_file_path} does not exist") except ValueError as exc: - self.logger.warning(f"Reading {file_path_text} not supported: {exc}") + self.logger.warning(f"Reading {safe_file_path} not supported: {safe_github_text(exc, file_path_text)}") decode = False if not decode or is_nothing(file_data): @@ -215,7 +231,9 @@ def get_retval(d: Any, s: str | None, p: str) -> Any: try: decoded_data = decode_file(file_data, file_path=file_path_text, as_extended=True) except Exception as exc: - self.logger.warning(f"Failed to decode {file_path_text} as {encoding}: {exc}") + self.logger.warning( + f"Failed to decode {safe_file_path} as {encoding}: {safe_github_text(exc, file_path_text)}" + ) decoded_data = file_data return self.extend_result(get_retval(decoded_data, file_sha, file_path_text)) @@ -232,16 +250,19 @@ def update_repository_file( ) -> Any | None: """Update a file in the repository.""" file_path_text = os.fspath(file_path) + safe_file_path = safe_github_ref(file_path_text) if self.repo is None: - self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot update file {file_path_text}") + self.logger.warning( + f"Repository not set for {safe_github_ref(self.GITHUB_OWNER)}, cannot update file {safe_file_path}" + ) return None if is_nothing(file_data) and not allow_empty: - self.logger.warning(f"Empty file data for {file_path_text} not allowed") + self.logger.warning(f"Empty file data for {safe_file_path} not allowed") return None if msg: - self.logger.info(msg) + self.logger.info("Using caller-provided repository file message") if allow_encoding is None: allow_encoding = get_encoding_for_file_path(file_path_text) @@ -251,7 +272,7 @@ def update_repository_file( if not isinstance(file_data, str): file_data = str(file_data) - self.logger.info(f"Updating repository file: {file_path_text}") + self.logger.info(f"Updating repository file: {safe_file_path}") if file_sha is None: result = self.get_repository_file(file_path_text, return_sha=True) @@ -281,11 +302,14 @@ def update_repository_file( def delete_repository_file(self, file_path: FilePath, msg: str | None = None) -> Any | None: """Delete a file from the repository.""" file_path_text = os.fspath(file_path) + safe_file_path = safe_github_ref(file_path_text) if self.repo is None: - self.logger.warning(f"Repository not set for {self.GITHUB_OWNER}, cannot delete file {file_path_text}") + self.logger.warning( + f"Repository not set for {safe_github_ref(self.GITHUB_OWNER)}, cannot delete file {safe_file_path}" + ) return None - self.logger.info(f"Deleting repository file: {file_path_text}") + self.logger.info(f"Deleting repository file: {safe_file_path}") result = self.get_repository_file(file_path=file_path_text, return_sha=True) sha = None @@ -323,7 +347,7 @@ def list_org_members( Returns: Dictionary mapping usernames to member data. """ - self.logger.info(f"Listing members for organization: {self.GITHUB_OWNER}") + self.logger.info(f"Listing members for organization: {safe_github_ref(self.GITHUB_OWNER)}") members: dict[str, dict[str, Any]] = {} @@ -386,7 +410,7 @@ def get_org_member(self, username: str) -> ExtendedDict | None: } ) except UnknownObjectException: - self.logger.warning(f"User not found: {username}") + self.logger.warning(f"User not found: {safe_github_ref(username)}") return None # ========================================================================= @@ -407,7 +431,7 @@ def list_repositories( Returns: Dictionary mapping repo names to repository data. """ - self.logger.info(f"Listing repositories for organization: {self.GITHUB_OWNER}") + self.logger.info(f"Listing repositories for organization: {safe_github_ref(self.GITHUB_OWNER)}") repos: dict[str, dict[str, Any]] = {} @@ -475,7 +499,7 @@ def get_repository(self, repo_name: str) -> ExtendedDict | None: } ) except UnknownObjectException: - self.logger.warning(f"Repository not found: {repo_name}") + self.logger.warning(f"Repository not found: {safe_github_ref(repo_name)}") return None # ========================================================================= @@ -496,7 +520,7 @@ def list_teams( Returns: Dictionary mapping team slugs to team data. """ - self.logger.info(f"Listing teams for organization: {self.GITHUB_OWNER}") + self.logger.info(f"Listing teams for organization: {safe_github_ref(self.GITHUB_OWNER)}") teams: dict[str, dict[str, Any]] = {} @@ -567,7 +591,7 @@ def get_team(self, team_slug: str) -> ExtendedDict | None: } ) except UnknownObjectException: - self.logger.warning(f"Team not found: {team_slug}") + self.logger.warning(f"Team not found: {safe_github_ref(team_slug)}") return None def add_team_member(self, team_slug: str, username: str, role: str = "member") -> bool: @@ -581,15 +605,19 @@ def add_team_member(self, team_slug: str, username: str, role: str = "member") - Returns: True if successful. """ - self.logger.info(f"Adding {username} to team {team_slug}") + safe_username = safe_github_ref(username) + safe_team = safe_github_ref(team_slug) + self.logger.info(f"Adding {safe_username} to team {safe_team}") try: team = self.org.get_team_by_slug(team_slug) user = self.git.get_user(username) team.add_membership(user, role=role) - self.logger.info(f"Added {username} to team {team_slug}") + self.logger.info(f"Added {safe_username} to team {safe_team}") return True except (UnknownObjectException, GithubException) as e: - self.logger.exception(f"Failed to add {username} to team: {e}") + self.logger.error( # noqa: TRY400 - traceback can expose raw GitHub identifiers. + f"Failed to add {safe_username} to team {safe_team}: {safe_github_text(e, username, team_slug)}" + ) return False def remove_team_member(self, team_slug: str, username: str) -> bool: @@ -602,15 +630,19 @@ def remove_team_member(self, team_slug: str, username: str) -> bool: Returns: True if successful. """ - self.logger.info(f"Removing {username} from team {team_slug}") + safe_username = safe_github_ref(username) + safe_team = safe_github_ref(team_slug) + self.logger.info(f"Removing {safe_username} from team {safe_team}") try: team = self.org.get_team_by_slug(team_slug) user = self.git.get_user(username) team.remove_membership(user) - self.logger.info(f"Removed {username} from team {team_slug}") + self.logger.info(f"Removed {safe_username} from team {safe_team}") return True except (UnknownObjectException, GithubException) as e: - self.logger.exception(f"Failed to remove {username} from team: {e}") + self.logger.error( # noqa: TRY400 - traceback can expose raw GitHub identifiers. + f"Failed to remove {safe_username} from team {safe_team}: {safe_github_text(e, username, team_slug)}" + ) return False # ========================================================================= @@ -656,7 +688,7 @@ def get_users_with_verified_emails( Returns: Dictionary mapping usernames to member data with verified emails. """ - self.logger.info(f"Getting users with verified emails for {self.GITHUB_OWNER}") + self.logger.info(f"Getting users with verified emails for {safe_github_ref(self.GITHUB_OWNER)}") if members is None: members = self.list_org_members() @@ -696,7 +728,10 @@ def get_users_with_verified_emails( enriched[username] = enriched_data except Exception as e: - self.logger.warning(f"Failed to get verified emails for {username}: {e}") + self.logger.warning( + f"Failed to get verified emails for {safe_github_ref(username)}: " + f"{safe_github_text(e, username)}" + ) enriched[username] = dict(member_data) self.logger.info(f"Retrieved verified emails for {len(enriched)} users") diff --git a/src/extended_data/connectors/github/_diagnostics.py b/src/extended_data/connectors/github/_diagnostics.py new file mode 100644 index 0000000..8a81bea --- /dev/null +++ b/src/extended_data/connectors/github/_diagnostics.py @@ -0,0 +1,33 @@ +"""GitHub connector diagnostic redaction helpers.""" + +from __future__ import annotations + +from collections.abc import Iterable, Mapping +from typing import Any + +from extended_data.primitives.redaction import redact_sensitive_text + + +def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: + """Yield scalar values from nested diagnostic context.""" + for value in values: + if value is None: + continue + if isinstance(value, Mapping): + yield from _iter_diagnostic_values(value.values()) + elif isinstance(value, (str, bytes)): + yield value + elif isinstance(value, Iterable): + yield from _iter_diagnostic_values(value) + else: + yield value + + +def safe_github_text(value: Any, *sensitive_values: Any) -> str: + """Redact secrets and caller-provided GitHub identifiers from diagnostics.""" + return redact_sensitive_text(value, values=_iter_diagnostic_values(sensitive_values)) + + +def safe_github_ref(value: Any) -> str: + """Redact a single GitHub resource reference for diagnostic logs.""" + return safe_github_text(value, value) diff --git a/tests/connectors/test_cursor.py b/tests/connectors/test_cursor.py index f8bd307..0059b35 100644 --- a/tests/connectors/test_cursor.py +++ b/tests/connectors/test_cursor.py @@ -13,6 +13,7 @@ AgentState, Conversation, ConversationMessage, + CursorAPIError, CursorConnector, CursorError, CursorValidationError, @@ -26,6 +27,17 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data +def _logged_text(logger: MagicMock) -> str: + """Collect structured mock log calls into one searchable diagnostic string.""" + messages: list[str] = [] + for method_name in ("debug", "info", "warning", "error", "exception"): + method = getattr(logger, method_name) + for call in method.call_args_list: + messages.extend(str(arg) for arg in call.args) + messages.extend(str(value) for value in call.kwargs.values()) + return "\n".join(messages) + + class TestValidators: """Tests for input validators.""" @@ -124,6 +136,17 @@ def test_sanitize_error_uses_shared_secret_redaction(self): assert "raw_token" not in redacted assert "[REDACTED]" in redacted + def test_sanitize_error_redacts_explicit_values(self): + """Cursor sanitization should remove caller-provided identifiers, not just secret keys.""" + redacted = sanitize_error( + "request to /agents/secret-agent failed for secret-org/private-repo", + values=["secret-agent", "secret-org/private-repo"], + ) + + assert "secret-agent" not in redacted + assert "secret-org/private-repo" not in redacted + assert "[REDACTED]" in redacted + def test_agent_model_payload_redacts_error(self): """Cursor agent payload serialization should redact agent error text.""" agent = Agent( @@ -258,6 +281,32 @@ def test_get_agent_status_returns_extended_dict(self, mock_client_class): assert isinstance(agent["state"], ExtendedString) assert agent["pr_url"] == "https://github.com/org/repo/pull/1" + @patch("extended_data.connectors.cursor.httpx.Client") + def test_get_agent_status_empty_response_redacts_agent_id(self, mock_client_class): + """Empty status responses should not leak the raw agent ID in logs or errors.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.headers = {"content-type": "text/plain"} + mock_response.text = "" + mock_client.request.return_value = mock_response + + connector = CursorConnector(api_key="test-key") + connector.logger = MagicMock() + + with pytest.raises(CursorAPIError) as exc_info: + connector.get_agent_status("secret-agent") + + assert exc_info.value.__cause__ is None + assert "secret-agent" not in str(exc_info.value) + assert "[REDACTED]" in str(exc_info.value) + logs = _logged_text(connector.logger) + assert "secret-agent" not in logs + assert "[REDACTED]" in logs + @patch("extended_data.connectors.cursor.httpx.Client") def test_get_agent_conversation_returns_extended_dict(self, mock_client_class): """get_agent_conversation should return an extended conversation payload.""" @@ -314,6 +363,30 @@ def test_launch_agent(self, mock_client_class): assert isinstance(call_args.kwargs["json"]["prompt"]["images"], list) assert isinstance(call_args.kwargs["json"]["prompt"]["images"][0], dict) + @patch("extended_data.connectors.cursor.httpx.Client") + def test_launch_agent_redacts_repository_diagnostics_but_preserves_payload(self, mock_client_class): + """Agent launches should send raw repository data while redacting logs.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.text = '{"id": "new-agent", "state": "pending"}' + mock_response.json.return_value = {"id": "new-agent", "state": "pending"} + mock_client.request.return_value = mock_response + + connector = CursorConnector(api_key="test-key") + connector.logger = MagicMock() + connector.launch_agent(prompt_text="Implement feature X", repository="secret-org/private-repo") + + call_args = mock_client.request.call_args + assert call_args.kwargs["json"]["source"]["repository"] == "secret-org/private-repo" + logs = _logged_text(connector.logger) + assert "secret-org/private-repo" not in logs + assert "[REDACTED]" in logs + @patch("extended_data.connectors.cursor.httpx.Client") def test_launch_agent_validation(self, mock_client_class): """launch_agent should validate inputs.""" diff --git a/tests/connectors/test_github_payload_contract.py b/tests/connectors/test_github_payload_contract.py index c45641e..28dcce7 100644 --- a/tests/connectors/test_github_payload_contract.py +++ b/tests/connectors/test_github_payload_contract.py @@ -4,7 +4,11 @@ from unittest.mock import MagicMock -from extended_data.connectors.github import GitHubConnector +import pytest + +import extended_data.connectors.github as github_module + +from extended_data.connectors.github import GitHubConnector, GitHubFallbackError from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple @@ -22,6 +26,17 @@ def _connector() -> GitHubConnector: return connector +def _logged_text(logger: MagicMock) -> str: + """Collect structured mock log calls into one searchable diagnostic string.""" + messages: list[str] = [] + for method_name in ("debug", "info", "warning", "error", "exception"): + method = getattr(logger, method_name) + for call in method.call_args_list: + messages.extend(str(arg) for arg in call.args) + messages.extend(str(value) for value in call.kwargs.values()) + return "\n".join(messages) + + def test_repository_file_decodes_into_extended_payload_with_metadata() -> None: """Decoded repository files should enter the Tier 2 fabric immediately.""" connector = _connector() @@ -121,3 +136,53 @@ def test_workflow_builders_return_extended_data() -> None: assert isinstance(workflow, ExtendedDict) assert isinstance(workflow["jobs"]["test"]["steps"], ExtendedList) assert workflow["jobs"]["test"]["steps"][0]["run"].upper_first() == "Pytest" + + +def test_update_repository_file_redacts_diagnostics_but_preserves_payload() -> None: + """GitHub file updates should not leak caller paths or messages in logs.""" + connector = _connector() + raw_path = "private/path.txt" + raw_message = "commit mentions private/path.txt token=raw-token" + + connector.update_repository_file( + raw_path, + "raw file data", + file_sha="abc123", + msg=raw_message, + allow_encoding=False, + ) + + connector.repo.update_file.assert_called_once_with( + path=raw_path, + message=raw_message, + content="raw file data", + sha="abc123", + branch="main", + ) + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert raw_path not in logs + assert raw_message not in logs + assert "raw-token" not in logs + + +def test_add_team_member_failure_redacts_diagnostics_without_traceback(monkeypatch: pytest.MonkeyPatch) -> None: + """Team membership failures should redact user/team identifiers and avoid tracebacks.""" + monkeypatch.setattr(github_module, "GithubException", GitHubFallbackError) + monkeypatch.setattr(github_module, "UnknownObjectException", GitHubFallbackError) + + connector = _connector() + connector.org.get_team_by_slug.side_effect = GitHubFallbackError( + "team private-team user secret-user token=raw-token" + ) + + assert connector.add_team_member("private-team", "secret-user") is False + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private-team" not in logs + assert "secret-user" not in logs + assert "raw-token" not in logs + connector.logger.exception.assert_not_called() + for call in connector.logger.error.call_args_list: + assert call.kwargs.get("exc_info") is not True From 2c3e02ff1f81bf43f4eef4a29fd2f00fb75ce1b3 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:24:37 -0500 Subject: [PATCH 215/287] fix: harden aws organization diagnostics --- .../connectors/aws/_diagnostics.py | 8 +- .../connectors/aws/organizations.py | 18 +++-- tests/connectors/test_aws_organizations.py | 79 +++++++++++++++++++ 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/src/extended_data/connectors/aws/_diagnostics.py b/src/extended_data/connectors/aws/_diagnostics.py index 577e1fb..630744d 100644 --- a/src/extended_data/connectors/aws/_diagnostics.py +++ b/src/extended_data/connectors/aws/_diagnostics.py @@ -2,12 +2,17 @@ from __future__ import annotations +import re + from collections.abc import Iterable, Mapping from typing import Any from extended_data.primitives.redaction import redact_sensitive_text +AWS_ACCOUNT_ID_RE = re.compile(r"\b\d{12}\b") + + def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: """Yield scalar values from nested diagnostic context.""" for value in values: @@ -25,7 +30,8 @@ def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: def safe_aws_text(value: Any, *sensitive_values: Any) -> str: """Redact secrets and caller-provided resource identifiers from AWS diagnostics.""" - return redact_sensitive_text(value, values=_iter_diagnostic_values(sensitive_values)) + redacted = redact_sensitive_text(value, values=_iter_diagnostic_values(sensitive_values)) + return AWS_ACCOUNT_ID_RE.sub("[REDACTED]", redacted) def safe_aws_ref(value: Any) -> str: diff --git a/src/extended_data/connectors/aws/organizations.py b/src/extended_data/connectors/aws/organizations.py index 315ab22..66d69ba 100644 --- a/src/extended_data/connectors/aws/organizations.py +++ b/src/extended_data/connectors/aws/organizations.py @@ -15,6 +15,7 @@ from deepmerge import always_merger +from extended_data.connectors.aws._diagnostics import safe_aws_ref, safe_aws_text from extended_data.containers import ExtendedDict, to_builtin from extended_data.primitives import is_nothing, unhump_map @@ -90,10 +91,11 @@ def get_organization_accounts( try: root_parent_id = roots["Roots"][0]["Id"] - except (KeyError, IndexError) as exc: - raise RuntimeError(f"Failed to find root parent ID: {roots}") from exc + except (KeyError, IndexError): + msg = f"Failed to find root parent ID: {safe_aws_text(roots, roots)}" + raise RuntimeError(msg) from None - self.logger.info(f"Root parent ID: {root_parent_id}") + self.logger.info(f"Root parent ID: {safe_aws_ref(root_parent_id)}") accounts_paginator = orgs.get_paginator("list_accounts_for_parent") ou_paginator = orgs.get_paginator("list_organizational_units_for_parent") @@ -203,7 +205,7 @@ def get_controltower_accounts( pass except ClientError as e: - self.logger.warning(f"Could not list Control Tower accounts: {e}") + self.logger.warning(f"Could not list Control Tower accounts: {safe_aws_text(e)}") # Apply transformations if unhump_accounts: @@ -471,7 +473,7 @@ def label_account( labels: Dictionary of label key-value pairs to apply. execution_role_arn: ARN of role to assume for cross-account access. """ - self.logger.info(f"Labeling AWS account {account_id} with {len(labels)} tags") + self.logger.info(f"Labeling AWS account {safe_aws_ref(account_id)} with {len(labels)} tags") role_arn = execution_role_arn or getattr(self, "execution_role_arn", None) orgs = self.get_aws_client( @@ -481,7 +483,7 @@ def label_account( tags = [{"Key": str(k), "Value": str(v)} for k, v in labels.items()] orgs.tag_resource(ResourceId=account_id, Tags=tags) - self.logger.info(f"Applied {len(labels)} tags to account {account_id}") + self.logger.info(f"Applied {len(labels)} tags to account {safe_aws_ref(account_id)}") def classify_accounts( self, @@ -644,8 +646,8 @@ def label_aws_account( ) try: return self.extend_result(labeled_accounts[account_id]) - except KeyError as exc: # pragma: no cover - defensive guard - raise KeyError(f"AWS account {account_id} not found") from exc + except KeyError: # pragma: no cover - defensive guard + raise KeyError(f"AWS account {safe_aws_ref(account_id)} not found") from None def classify_aws_accounts( self, diff --git a/tests/connectors/test_aws_organizations.py b/tests/connectors/test_aws_organizations.py index 3d3598b..48c2585 100644 --- a/tests/connectors/test_aws_organizations.py +++ b/tests/connectors/test_aws_organizations.py @@ -4,12 +4,15 @@ from __future__ import annotations from typing import Any +from unittest.mock import MagicMock import pytest pytest.importorskip("boto3") pytest.importorskip("botocore") +from botocore.exceptions import ClientError + from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.aws.organizations import AWSOrganizationsMixin @@ -36,6 +39,11 @@ def list_roots(self): return {"Roots": [{"Id": "r-root"}]} +def _logged_text(logger: MagicMock) -> str: + """Return concatenated mock logger messages.""" + return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) + + class _TestAWSOrganizations(AWSOrganizationsMixin): def __init__(self) -> None: self.logger = _StubLogger() @@ -96,6 +104,7 @@ def test_classify_accounts_fetches_when_missing(mocker, organizations_connector: def test_label_account_tags_resource(organizations_connector: _TestAWSOrganizations): client = organizations_connector._clients["organizations"] + organizations_connector.logger = MagicMock() organizations_connector.label_account("123456789012", {"Env": "prod", "Owner": "platform"}) @@ -108,6 +117,76 @@ def test_label_account_tags_resource(organizations_connector: _TestAWSOrganizati ], } ] + logs = _logged_text(organizations_connector.logger) + assert "123456789012" not in logs + assert "[REDACTED]" in logs + + +def test_get_organization_accounts_redacts_root_parent_id() -> None: + class _Paginator: + def __init__(self, pages: list[dict[str, Any]]) -> None: + self.pages = pages + + def paginate(self, **_: Any) -> list[dict[str, Any]]: + return self.pages + + class _RootClient: + def list_roots(self): + return {"Roots": [{"Id": "r-sensitive-root"}]} + + def get_paginator(self, name: str): + if name == "list_accounts_for_parent": + return _Paginator([{"Accounts": []}]) + if name == "list_organizational_units_for_parent": + return _Paginator([{"OrganizationalUnits": []}]) + return _Paginator([]) + + connector = _TestAWSOrganizations() + connector.logger = MagicMock() + connector.register_client("organizations", _RootClient()) + + assert connector.get_organization_accounts() == {} + + logs = _logged_text(connector.logger) + assert "r-sensitive-root" not in logs + assert "[REDACTED]" in logs + + +def test_get_organization_accounts_redacts_missing_root_payload() -> None: + class _BadRootClient: + def list_roots(self): + return {"Roots": [{"AccountId": "123456789012"}]} + + connector = _TestAWSOrganizations() + connector.logger = MagicMock() + connector.register_client("organizations", _BadRootClient()) + + with pytest.raises(RuntimeError) as exc_info: + connector.get_organization_accounts() + + assert "123456789012" not in str(exc_info.value) + assert "[REDACTED]" in str(exc_info.value) + assert exc_info.value.__cause__ is None + + +def test_get_controltower_accounts_redacts_provider_warning() -> None: + class _ControlTowerClient: + def get_paginator(self, _: str): + raise ClientError( + {"Error": {"Code": "AccessDenied", "Message": "Denied for 123456789012 token=raw-token"}}, + "SearchProvisionedProducts", + ) + + connector = _TestAWSOrganizations() + connector.logger = MagicMock() + connector.register_client("servicecatalog", _ControlTowerClient()) + + assert connector.get_controltower_accounts() == {} + + logs = _logged_text(connector.logger) + assert "123456789012" not in logs + assert "raw-token" not in logs + assert "[REDACTED]" in logs def test_preprocess_organization_compiles_sections(mocker, organizations_connector: _TestAWSOrganizations): From 76e2b1f4a8eb93044e0ddf4f42df528888f0a1ce Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:28:21 -0500 Subject: [PATCH 216/287] fix: harden vault diagnostics --- .../connectors/vault/__init__.py | 43 +++++++--- tests/connectors/test_vault_connector.py | 80 +++++++++++++++++++ 2 files changed, 113 insertions(+), 10 deletions(-) diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index ce6cda6..32cbc1b 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -3,6 +3,7 @@ from __future__ import annotations from collections import deque +from collections.abc import Iterable, Mapping from datetime import datetime, timezone from typing import TYPE_CHECKING, Any @@ -45,12 +46,27 @@ def _load_hvac() -> Any: def _safe_log_text(value: Any, *sensitive_values: Any) -> str: """Return a redacted string for Vault diagnostic output.""" - return redact_sensitive_text(value, values=sensitive_values) + return redact_sensitive_text(value, values=_iter_diagnostic_values(sensitive_values)) def _safe_ref_text(value: Any) -> str: """Return a redacted string for sensitive Vault resource references.""" - return redact_sensitive_text(value, values=[value]) + return _safe_log_text(value, value) + + +def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: + """Yield scalar values from nested diagnostic context.""" + for value in values: + if value is None: + continue + if isinstance(value, Mapping): + yield from _iter_diagnostic_values(value.values()) + elif isinstance(value, (str, bytes)): + yield value + elif isinstance(value, Iterable): + yield from _iter_diagnostic_values(value) + else: + yield value class VaultConnector(VendorConnectorBase): @@ -102,7 +118,10 @@ def vault_client(self) -> hvac.Client: return self._vault_client except VaultError as e: - self.logger.exception(f"Error initializing Vault client with token: {_safe_log_text(e)}") + self.logger.error( # noqa: TRY400 - traceback can expose raw Vault credentials. + f"Error initializing Vault client with token: " + f"{_safe_log_text(e, vault_url, vault_namespace, vault_token)}" + ) # Fallback to AppRole authentication self.logger.info("Attempting AppRole authentication") @@ -131,8 +150,9 @@ def vault_client(self) -> hvac.Client: return self._vault_client except VaultError as e: - self.logger.exception(f"Error during AppRole authentication: {_safe_log_text(e)}") - raise + msg = f"Error during AppRole authentication: {_safe_log_text(e, app_role_path, role_id, secret_id)}" + self.logger.error(msg) # noqa: TRY400 - traceback can expose raw Vault credentials. + raise RuntimeError(msg) from None msg = "Vault authentication failed: no valid token or AppRole credentials provided" raise RuntimeError(msg) @@ -153,7 +173,7 @@ def _set_token_expiration(self) -> None: # No need to manually set tzinfo if running on Python 3.7 or newer. # If supporting Python <3.7, manual tzinfo assignment is required. except VaultError as e: - self.logger.exception(f"Failed to lookup Vault token expiration: {_safe_log_text(e)}") + self.logger.warning(f"Failed to lookup Vault token expiration: {_safe_log_text(e)}") def _is_token_valid(self) -> bool: """Check if the current Vault token is still valid.""" @@ -415,7 +435,9 @@ def write_secret( self.logger.info(f"Wrote secret to {_safe_ref_text(path)}") return True except VaultError as e: - self.logger.exception(f"Failed to write secret {_safe_ref_text(path)}: {_safe_log_text(e, path)}") + self.logger.error( # noqa: TRY400 - traceback can expose raw Vault secret paths. + f"Failed to write secret {_safe_ref_text(path)}: {_safe_log_text(e, path, data)}" + ) return False # --------------------------------------------------------------------- @@ -529,10 +551,11 @@ def generate_aws_credentials( response = aws_secrets.generate_credentials(name=role_name, mount_point=mount_point, **generate_kwargs) except VaultError as e: safe_role_name = _safe_ref_text(role_name) - self.logger.exception( - f"Failed to generate AWS credentials for role {safe_role_name}: {_safe_log_text(e, role_name)}" + self.logger.error( # noqa: TRY400 - traceback can expose raw Vault role names. + f"Failed to generate AWS credentials for role {safe_role_name}: " + f"{_safe_log_text(e, role_name, mount_point, generate_kwargs)}" ) - raise RuntimeError(f"Failed to generate AWS credentials for role {safe_role_name}") from e + raise RuntimeError(f"Failed to generate AWS credentials for role {safe_role_name}") from None credentials = response.get("data") or {} if not credentials: diff --git a/tests/connectors/test_vault_connector.py b/tests/connectors/test_vault_connector.py index f3bce0c..26a0cc3 100644 --- a/tests/connectors/test_vault_connector.py +++ b/tests/connectors/test_vault_connector.py @@ -58,6 +58,60 @@ def test_vault_client_with_token(self, mock_hvac_class, base_connector_kwargs): assert client == mock_client mock_hvac_class.assert_called() + @patch("extended_data.connectors.vault.hvac.Client") + def test_vault_client_token_failure_redacts_without_traceback(self, mock_hvac_class, base_connector_kwargs): + """Token client initialization failures should avoid traceback diagnostics.""" + mock_hvac_class.side_effect = VaultError("token failure test-token Authorization: Bearer raw_token") + connector = VaultConnector( + vault_url="https://vault.example.com", vault_token="test-token", **base_connector_kwargs + ) + + with pytest.raises(RuntimeError, match="Vault authentication failed"): + _ = connector.vault_client + + logs = _logged_text(connector.logger) + assert "test-token" not in logs + assert "raw_token" not in logs + assert "[REDACTED]" in logs + connector.logger.exception.assert_not_called() + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) + + @patch("extended_data.connectors.vault.hvac.Client") + def test_vault_client_approle_failure_redacts_without_raw_cause(self, mock_hvac_class, base_connector_kwargs): + """AppRole authentication failures should raise a redacted RuntimeError.""" + mock_client = MagicMock() + mock_client.is_authenticated.return_value = False + mock_client.auth.approle.login.side_effect = VaultError( + "approle failed role-raw secret-raw token=raw-token" + ) + mock_hvac_class.return_value = mock_client + + connector = VaultConnector(vault_url="https://vault.example.com", **base_connector_kwargs) + + def get_input(name, **kwargs): + values = { + "VAULT_NAMESPACE": None, + "VAULT_TOKEN": None, + "VAULT_APPROLE_PATH": "approle", + "VAULT_ROLE_ID": "role-raw", + "VAULT_SECRET_ID": "secret-raw", + } + return values.get(name, kwargs.get("default")) + + connector.get_input = MagicMock(side_effect=get_input) + + with pytest.raises(RuntimeError) as exc_info: + _ = connector.vault_client + + diagnostics = _logged_text(connector.logger) + str(exc_info.value) + assert "role-raw" not in diagnostics + assert "secret-raw" not in diagnostics + assert "raw-token" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + connector.logger.exception.assert_not_called() + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) + def test_is_token_valid(self, base_connector_kwargs): """Test token validity check.""" connector = VaultConnector( @@ -336,6 +390,29 @@ def test_generate_aws_credentials_error(self, base_connector_kwargs): with pytest.raises(RuntimeError): connector.generate_aws_credentials(role_name="prod") + def test_write_secret_failure_redacts_without_traceback(self, base_connector_kwargs): + """Vault write failures should not expose paths, values, or tracebacks.""" + connector = VaultConnector( + vault_url="https://vault.example.com", vault_token="test-token", **base_connector_kwargs + ) + + mock_client = MagicMock() + connector._vault_client = mock_client + connector._vault_token_expiration = datetime(2099, 1, 1, tzinfo=timezone.utc) + mock_client.secrets.kv.v2.create_or_update_secret.side_effect = VaultError( + "write failed at prod/db password=hunter2 token=raw-token" + ) + + assert connector.write_secret("prod/db", {"password": "hunter2"}) is False + + logs = _logged_text(connector.logger) + assert "prod/db" not in logs + assert "hunter2" not in logs + assert "raw-token" not in logs + assert "[REDACTED]" in logs + connector.logger.exception.assert_not_called() + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) + def test_generate_aws_credentials_redacts_error_diagnostics(self, base_connector_kwargs): """Vault credential failures should redact role names and exception payloads.""" connector = VaultConnector( @@ -360,3 +437,6 @@ def test_generate_aws_credentials_redacts_error_diagnostics(self, base_connector assert "hunter2" not in message assert "[REDACTED]" in logs assert "[REDACTED]" in message + assert exc_info.value.__cause__ is None + connector.logger.exception.assert_not_called() + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) From 5145554493fff454f94ab81feeef450137f77c54 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:31:41 -0500 Subject: [PATCH 217/287] fix: harden zoom diagnostics --- src/extended_data/connectors/zoom/__init__.py | 40 +++++-- tests/connectors/test_zoom_connector.py | 102 +++++++++++++++++- 2 files changed, 129 insertions(+), 13 deletions(-) diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index e762da2..a934862 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -4,6 +4,7 @@ import base64 +from collections.abc import Iterable, Mapping from typing import Any import requests @@ -20,7 +21,22 @@ def _safe_zoom_text(value: Any, *sensitive_values: Any) -> str: """Redact secrets and request identifiers from Zoom diagnostics.""" - return redact_sensitive_text(value, values=sensitive_values) + return redact_sensitive_text(value, values=_iter_diagnostic_values(sensitive_values)) + + +def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: + """Yield scalar values from nested diagnostic context.""" + for value in values: + if value is None: + continue + if isinstance(value, Mapping): + yield from _iter_diagnostic_values(value.values()) + elif isinstance(value, (str, bytes)): + yield value + elif isinstance(value, Iterable): + yield from _iter_diagnostic_values(value) + else: + yield value def _zoom_error(action: str, exc: BaseException, *sensitive_values: Any) -> str: @@ -61,8 +77,14 @@ def get_access_token(self) -> str | None: response.raise_for_status() return response.json().get("access_token") except requests.exceptions.RequestException as exc: - msg = "Failed to get Zoom access token" - raise RuntimeError(msg) from exc + msg = _zoom_error( + "Failed to get Zoom access token", + exc, + self.client_id, + self.client_secret, + self.account_id, + ) + raise RuntimeError(msg) from None def get_headers(self) -> dict[str, str]: """Get headers with authorization for Zoom API calls.""" @@ -100,7 +122,7 @@ def list_users(self) -> ExtendedDict: if not next_page_token: break except requests.exceptions.RequestException as exc: - raise RuntimeError(_zoom_error("Failed to get Zoom users", exc)) from exc + raise RuntimeError(_zoom_error("Failed to get Zoom users", exc, next_page_token, params)) from None return self.extend_result(users) @@ -115,7 +137,7 @@ def remove_zoom_user(self, email: str) -> None: except requests.exceptions.RequestException as exc: error_msg = _zoom_error("Failed to remove Zoom user", exc, email) self.errors.append(error_msg) - self.logger.exception(error_msg) + self.logger.error(error_msg) # noqa: TRY400 - traceback can expose raw Zoom user identifiers. def create_zoom_user(self, email: str, first_name: str, last_name: str) -> bool: """Create a Zoom user with a paid license.""" @@ -133,7 +155,7 @@ def create_zoom_user(self, email: str, first_name: str, last_name: str) -> bool: except requests.exceptions.RequestException as exc: error_msg = _zoom_error("Failed to create Zoom user", exc, email, first_name, last_name) self.errors.append(error_msg) - self.logger.exception(error_msg) + self.logger.error(error_msg) # noqa: TRY400 - traceback can expose raw Zoom user identifiers. return False def get_user(self, user_id: str) -> ExtendedDict: @@ -153,7 +175,7 @@ def get_user(self, user_id: str) -> ExtendedDict: response.raise_for_status() return self.extend_result(response.json()) except requests.exceptions.RequestException as exc: - raise RuntimeError(_zoom_error("Failed to get Zoom user", exc, user_id)) from exc + raise RuntimeError(_zoom_error("Failed to get Zoom user", exc, user_id)) from None def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> ExtendedList[ExtendedDict]: """List meetings for a specific user. @@ -175,7 +197,7 @@ def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> Extend data = response.json() return self.extend_result(data.get("meetings", [])) except requests.exceptions.RequestException as exc: - raise RuntimeError(_zoom_error("Failed to list Zoom meetings", exc, user_id)) from exc + raise RuntimeError(_zoom_error("Failed to list Zoom meetings", exc, user_id, params)) from None def get_meeting(self, meeting_id: str) -> ExtendedDict: """Get details of a specific meeting. @@ -194,7 +216,7 @@ def get_meeting(self, meeting_id: str) -> ExtendedDict: response.raise_for_status() return self.extend_result(response.json()) except requests.exceptions.RequestException as exc: - raise RuntimeError(_zoom_error("Failed to get Zoom meeting", exc, meeting_id)) from exc + raise RuntimeError(_zoom_error("Failed to get Zoom meeting", exc, meeting_id)) from None from extended_data.connectors.zoom.tools import ( diff --git a/tests/connectors/test_zoom_connector.py b/tests/connectors/test_zoom_connector.py index daf7476..c6de0a0 100644 --- a/tests/connectors/test_zoom_connector.py +++ b/tests/connectors/test_zoom_connector.py @@ -54,9 +54,9 @@ def test_get_access_token_success(self, mock_post, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.post") def test_get_access_token_failure(self, mock_post, base_connector_kwargs): """Test failed access token retrieval.""" - import requests - - mock_post.side_effect = requests.exceptions.RequestException("Connection error") + mock_post.side_effect = requests.exceptions.RequestException( + "Connection error test-account-id client_secret=raw-secret" + ) connector = ZoomConnector( client_id="test-client-id", @@ -65,9 +65,16 @@ def test_get_access_token_failure(self, mock_post, base_connector_kwargs): **base_connector_kwargs, ) - with pytest.raises(RuntimeError, match="Failed to get Zoom access token"): + with pytest.raises(RuntimeError, match="Failed to get Zoom access token") as exc_info: connector.get_access_token() + message = str(exc_info.value) + assert "test-account-id" not in message + assert "test-client-secret" not in message + assert "raw-secret" not in message + assert "[REDACTED]" in message + assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_list_users_redacts_request_failure_details(self, mock_post, mock_get, base_connector_kwargs): @@ -94,6 +101,7 @@ def test_list_users_redacts_request_failure_details(self, mock_post, mock_get, b assert "hunter2" not in message assert "raw_token" not in message assert "[REDACTED]" in message + assert exc_info.value.__cause__ is None @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") @@ -189,6 +197,37 @@ def test_remove_zoom_user_redacts_error_state_and_logs(self, mock_post, mock_del assert "private-user@example.com" not in diagnostics assert "raw_token" not in diagnostics assert "[REDACTED]" in diagnostics + connector.logger.exception.assert_not_called() + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) + + @patch("extended_data.connectors.zoom.requests.post") + def test_create_zoom_user_redacts_error_state_and_logs(self, mock_post, base_connector_kwargs): + """Zoom create failures should redact user PII and avoid traceback logs.""" + mock_token_response = MagicMock() + mock_token_response.json.return_value = {"access_token": "test-token"} + mock_token_response.raise_for_status = MagicMock() + mock_post.side_effect = [ + mock_token_response, + requests.exceptions.RequestException("failed Jane SecretUser newuser@example.com token=raw-token"), + ] + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + assert connector.create_zoom_user("newuser@example.com", "Jane", "SecretUser") is False + + diagnostics = "\n".join(connector.errors) + _logged_text(connector.logger) + assert "newuser@example.com" not in diagnostics + assert "Jane" not in diagnostics + assert "SecretUser" not in diagnostics + assert "raw-token" not in diagnostics + assert "[REDACTED]" in diagnostics + connector.logger.exception.assert_not_called() + assert all("exc_info" not in logged_call.kwargs for logged_call in connector.logger.method_calls) @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") @@ -249,6 +288,7 @@ def test_get_user_redacts_identifier_and_secret_details(self, mock_post, mock_ge assert "user1%40example.com" not in message assert "s3cr3t" not in message assert "[REDACTED]" in message + assert exc_info.value.__cause__ is None @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") @@ -282,6 +322,34 @@ def test_list_meetings(self, mock_post, mock_get, base_connector_kwargs): assert len(meetings) == 2 assert meetings[0]["id"] == "111" + @patch("extended_data.connectors.zoom.requests.get") + @patch("extended_data.connectors.zoom.requests.post") + def test_list_meetings_redacts_identifier_and_secret_details(self, mock_post, mock_get, base_connector_kwargs): + """Zoom meeting list failures should not chain raw user identifiers.""" + mock_token_response = MagicMock() + mock_token_response.json.return_value = {"access_token": "test-token"} + mock_token_response.raise_for_status = MagicMock() + mock_post.return_value = mock_token_response + mock_get.side_effect = requests.exceptions.RequestException( + "failed for private-user@example.com type=scheduled token=raw-token" + ) + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError) as exc_info: + connector.list_meetings("private-user@example.com") + + message = str(exc_info.value) + assert "private-user@example.com" not in message + assert "raw-token" not in message + assert "[REDACTED]" in message + assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_get_meeting(self, mock_post, mock_get, base_connector_kwargs): @@ -312,3 +380,29 @@ def test_get_meeting(self, mock_post, mock_get, base_connector_kwargs): assert isinstance(meeting["topic"], ExtendedString) assert meeting["id"] == "111" assert meeting["topic"] == "Team Meeting" + + @patch("extended_data.connectors.zoom.requests.get") + @patch("extended_data.connectors.zoom.requests.post") + def test_get_meeting_redacts_identifier_and_secret_details(self, mock_post, mock_get, base_connector_kwargs): + """Zoom meeting lookup failures should not chain raw meeting identifiers.""" + mock_token_response = MagicMock() + mock_token_response.json.return_value = {"access_token": "test-token"} + mock_token_response.raise_for_status = MagicMock() + mock_post.return_value = mock_token_response + mock_get.side_effect = requests.exceptions.RequestException("meeting private-meeting token=raw-token") + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError) as exc_info: + connector.get_meeting("private-meeting") + + message = str(exc_info.value) + assert "private-meeting" not in message + assert "raw-token" not in message + assert "[REDACTED]" in message + assert exc_info.value.__cause__ is None From 8dbc4921defab308eb35d368c1f22d83fd5257bb Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:35:23 -0500 Subject: [PATCH 218/287] fix: harden slack diagnostics --- .../connectors/slack/__init__.py | 18 +-- tests/connectors/test_slack_connector.py | 118 ++++++++++++++++++ 2 files changed, 128 insertions(+), 8 deletions(-) diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index b6c0041..56af5e8 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -65,9 +65,9 @@ class SlackAPIError(RuntimeError): """Slack API error wrapper.""" def __init__(self, response: Any) -> None: - self.response = response + self.response = _slack_response_payload(response) self.status_code = response.status_code if hasattr(response, "status_code") else None - super().__init__(f"Slack API error: {redact_sensitive_text(response)}") + super().__init__(f"Slack API error: {redact_sensitive_text(self.response)}") def _slack_response_payload(response: Any) -> dict[str, Any]: @@ -295,11 +295,13 @@ def send_message( channels = self.get_bot_channels() if channel_name not in channels: - raise RuntimeError(f"Bot not in channel {channel_name}. Add the bot first.") + safe_channel_name = redact_sensitive_text(channel_name, values=[channel_name]) + raise RuntimeError(f"Bot not in channel {safe_channel_name}. Add the bot first.") channel_id = channels[channel_name].get("id") if is_nothing(channel_id): - raise RuntimeError(f"{channel_name} does not have a channel ID") + safe_channel_name = redact_sensitive_text(channel_name, values=[channel_name]) + raise RuntimeError(f"{safe_channel_name} does not have a channel ID") opts: dict[str, Any] = {"channel": channel_id, "text": text} if not is_nothing(blocks): @@ -311,7 +313,7 @@ def send_message( return self.extend_result(self.bot_web_client.chat_postMessage(**to_builtin(opts)).get("ts")) except SlackApiError as exc: if raise_on_api_error: - raise SlackAPIError(exc.response) from exc + raise SlackAPIError(exc.response) from None return self.extend_result(_slack_response_payload(exc.response)) def get_bot_channels(self) -> ExtendedDict: @@ -327,7 +329,7 @@ def get_bot_channels(self) -> ExtendedDict: channels = {channel["name"]: channel for channel in self.bot_web_client.users_conversations()["channels"]} return self.extend_result(channels) except SlackApiError as exc: - raise SlackAPIError(exc.response) from exc + raise SlackAPIError(exc.response) from None def list_users( self, @@ -543,12 +545,12 @@ def _call_api( if total_delay > MAX_RETRY_TIMEOUT_SECONDS: raise TimeoutError( f"Slack WebClient {safe_method} timed out after {total_delay} seconds" - ) from exc + ) from None self.logger.warning(f"Rate limited. Retrying in {delay} seconds") sleep(delay) attempt += 1 else: - raise SlackAPIError(exc.response) from exc + raise SlackAPIError(exc.response) from None if is_nothing(response) or is_nothing(group_by): return response diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index 8ca2454..b6ace1f 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -58,6 +58,8 @@ def test_slack_api_error_redacts_sensitive_response_text() -> None: assert "hunter2" not in message assert "raw_token" not in message assert "[REDACTED]" in message + assert error.response["password"] == "[REDACTED]" + assert error.response["authorization"] == "[REDACTED]" class TestSlackConnector: @@ -169,6 +171,91 @@ def __init__(self, response): assert result["error"] == "channel_not_found" assert result["password"] == "[REDACTED]" + @patch("extended_data.connectors.slack.WebClient") + def test_send_message_api_error_redacts_response_without_raw_cause( + self, + mock_webclient_class, + base_connector_kwargs, + ): + """Raising Slack send failures should not preserve raw SDK exceptions.""" + + class FakeSlackApiError(Exception): + def __init__(self, response): + self.response = response + + mock_bot_client = MagicMock() + mock_bot_client.users_conversations.return_value = {"channels": [{"name": "general", "id": "C12345"}]} + mock_bot_client.chat_postMessage.side_effect = FakeSlackApiError( + {"ok": False, "error": "channel_not_found", "password": "hunter2", "token": "raw-token"} + ) + + mock_user_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with ( + patch("extended_data.connectors.slack.SlackApiError", FakeSlackApiError), + pytest.raises(SlackAPIError) as exc_info, + ): + connector.send_message(channel_name="general", text="Test message", blocks=[]) + + diagnostics = str(exc_info.value) + str(exc_info.value.response) + assert "hunter2" not in diagnostics + assert "raw-token" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + + @patch("extended_data.connectors.slack.WebClient") + def test_send_message_redacts_missing_channel_name(self, mock_webclient_class, base_connector_kwargs): + """Missing-channel errors should not echo caller-provided channel names.""" + mock_bot_client = MagicMock() + mock_bot_client.users_conversations.return_value = {"channels": []} + + mock_user_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with pytest.raises(RuntimeError) as exc_info: + connector.send_message(channel_name="private-channel", text="Test message", blocks=[]) + + assert "private-channel" not in str(exc_info.value) + assert "[REDACTED]" in str(exc_info.value) + + @patch("extended_data.connectors.slack.WebClient") + def test_get_bot_channels_api_error_redacts_response_without_raw_cause( + self, + mock_webclient_class, + base_connector_kwargs, + ): + """Bot-channel lookup failures should wrap redacted Slack responses.""" + + class FakeSlackApiError(Exception): + def __init__(self, response): + self.response = response + + mock_bot_client = MagicMock() + mock_bot_client.users_conversations.side_effect = FakeSlackApiError( + {"ok": False, "error": "token_revoked", "authorization": "Bearer raw_token"} + ) + + mock_user_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with ( + patch("extended_data.connectors.slack.SlackApiError", FakeSlackApiError), + pytest.raises(SlackAPIError) as exc_info, + ): + connector.get_bot_channels() + + diagnostics = str(exc_info.value) + str(exc_info.value.response) + assert "raw_token" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.slack.WebClient") def test_call_api_redacts_grouping_failure_payload(self, mock_webclient_class, base_connector_kwargs): """Slack grouping failures should not dump raw secret-bearing response data.""" @@ -189,6 +276,37 @@ def test_call_api_redacts_grouping_failure_payload(self, mock_webclient_class, b assert "raw_token" not in message assert "[REDACTED]" in message + @patch("extended_data.connectors.slack.WebClient") + def test_call_api_non_rate_error_redacts_response_without_raw_cause( + self, + mock_webclient_class, + base_connector_kwargs, + ): + """Slack API failures should not preserve raw SDK exception causes.""" + + class FakeSlackApiError(Exception): + def __init__(self, response): + self.response = response + + mock_response = {"ok": False, "error": "bad_auth", "authorization": "Bearer raw_token"} + mock_user_client = MagicMock() + mock_user_client.users_list.side_effect = FakeSlackApiError(mock_response) + mock_bot_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with ( + patch("extended_data.connectors.slack.SlackApiError", FakeSlackApiError), + pytest.raises(SlackAPIError) as exc_info, + ): + connector._call_api("users_list") + + diagnostics = str(exc_info.value) + str(exc_info.value.response) + assert "raw_token" not in diagnostics + assert "[REDACTED]" in diagnostics + assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.slack.SlackConnector._call_api") @patch("extended_data.connectors.slack.WebClient") def test_list_users_filters_deleted( From 70cd3643194beefb00873548a0c0aedbbec5df4e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:41:24 -0500 Subject: [PATCH 219/287] fix: harden google jules diagnostics --- src/extended_data/connectors/google/jules.py | 20 ++++++-- tests/connectors/test_google_jules.py | 54 ++++++++++++++++++++ 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index 15e79ba..62a3d60 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -26,6 +26,7 @@ from __future__ import annotations +from contextlib import suppress from enum import Enum from typing import Any @@ -34,8 +35,9 @@ from pydantic import BaseModel, Field from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.google._diagnostics import safe_google_text from extended_data.containers import ExtendedDict, ExtendedList -from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text +from extended_data.primitives.redaction import redact_sensitive_data __all__ = [ @@ -163,17 +165,25 @@ def _build_headers(self) -> dict[str, str]: def _handle_response(self, response: httpx.Response) -> dict[str, Any]: """Handle API response, raising on errors.""" if not response.is_success: + diagnostic_values = self._response_diagnostic_values(response) try: error = response.json().get("error", {}) raise JulesError( - redact_sensitive_text(error.get("message", response.text)), + safe_google_text(error.get("message", response.text), diagnostic_values), error.get("code", response.status_code), - redact_sensitive_data(error.get("details")), + redact_sensitive_data(error.get("details"), values=diagnostic_values), ) - except (ValueError, KeyError) as exc: - raise JulesError(redact_sensitive_text(response.text), response.status_code) from exc + except (ValueError, KeyError): + raise JulesError(safe_google_text(response.text, diagnostic_values), response.status_code) from None return response.json() + def _response_diagnostic_values(self, response: httpx.Response) -> list[str]: + """Collect caller-controlled response identifiers for diagnostics redaction.""" + values = [self._base_url] + with suppress(RuntimeError): + values.append(str(response.request.url)) + return values + # ========================================================================= # Sources # ========================================================================= diff --git a/tests/connectors/test_google_jules.py b/tests/connectors/test_google_jules.py index a1e4f00..b7159f8 100644 --- a/tests/connectors/test_google_jules.py +++ b/tests/connectors/test_google_jules.py @@ -19,6 +19,14 @@ def _response(payload: dict, status_code: int = 200) -> httpx.Response: ) +def _text_response(text: str, status_code: int = 500, url: str = "https://jules.googleapis.com/v1alpha/test") -> httpx.Response: + return httpx.Response( + status_code, + text=text, + request=httpx.Request("GET", url), + ) + + def test_session_pull_request_model_property() -> None: """The standalone Session model still exposes typed convenience properties.""" session = Session( @@ -181,3 +189,49 @@ def test_handle_response_redacts_sensitive_jules_error_details() -> None: assert "hunter2" not in message assert "raw_token" not in message assert exc_info.value.details == [{"api_key": "[REDACTED]"}] + + +def test_handle_response_redacts_request_url_in_jules_error() -> None: + """Jules API errors should redact caller-controlled request URLs.""" + connector = JulesConnector(api_key="test-key") + request_url = "https://jules.googleapis.com/v1alpha/sessions/private-session?api_key=raw_key" + response = httpx.Response( + 403, + json={ + "error": { + "message": f"denied while calling {request_url}", + "code": 403, + "details": [{"debug": request_url}], + } + }, + request=httpx.Request("GET", request_url), + ) + + with pytest.raises(JulesError) as exc_info: + connector._handle_response(response) + + error = exc_info.value + assert request_url not in str(error) + assert request_url not in repr(error.details) + assert error.__cause__ is None + + +def test_handle_response_malformed_error_has_sanitized_message_without_cause() -> None: + """Malformed Jules errors should not chain parser internals or expose request URLs.""" + connector = JulesConnector(api_key="test-key") + request_url = "https://jules.googleapis.com/v1alpha/sessions/private-session?api_key=raw_key" + response = _text_response( + f"upstream failed while calling {request_url} with password=hunter2", + status_code=502, + url=request_url, + ) + + with pytest.raises(JulesError) as exc_info: + connector._handle_response(response) + + error = exc_info.value + message = str(error) + assert error.code == 502 + assert error.__cause__ is None + assert request_url not in message + assert "hunter2" not in message From 7f667fa0790c969f74a3e482499bb0c39d793a53 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:47:30 -0500 Subject: [PATCH 220/287] test: harden connector payload surface contract --- .../connectors/slack/__init__.py | 6 ++--- .../test_connector_payload_contracts.py | 27 +++++++++++++++++-- tests/connectors/test_slack_connector.py | 7 +++++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index 56af5e8..c701b49 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -12,11 +12,11 @@ # batched was added in Python 3.12 if sys.version_info >= (3, 12): - from itertools import batched + from itertools import batched as _batched else: from itertools import islice - def batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: + def _batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: """Batch an iterable into chunks of size n for Python < 3.12.""" it = iter(iterable) while batch := tuple(islice(it, n)): @@ -134,7 +134,7 @@ def get_field_context_message_blocks(field_name: str, context_data: Mapping[str, get_divider(), ] - for field_keys in batched(context_data.keys(), 10): + for field_keys in _batched(context_data.keys(), 10): context_elements: list[dict[str, str]] = [] for field_key in field_keys: field_value = context_data.get(field_key) diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index a011e2b..69a1d56 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -264,6 +264,30 @@ ) +RAW_CONTAINER_ANNOTATIONS = {"Dict", "List", "Set", "Tuple", "dict", "list", "set", "tuple"} + + +def _annotation_includes_raw_container(annotation: ast.AST) -> bool: + """Return whether an annotation AST includes a built-in raw container type.""" + if isinstance(annotation, ast.Name): + return annotation.id in RAW_CONTAINER_ANNOTATIONS + if isinstance(annotation, ast.Attribute): + return annotation.attr in RAW_CONTAINER_ANNOTATIONS + if isinstance(annotation, ast.Subscript): + return _annotation_includes_raw_container(annotation.value) or _annotation_includes_raw_container( + annotation.slice + ) + if isinstance(annotation, ast.BinOp): + return _annotation_includes_raw_container(annotation.left) or _annotation_includes_raw_container( + annotation.right + ) + if isinstance(annotation, ast.Tuple): + return any(_annotation_includes_raw_container(item) for item in annotation.elts) + if isinstance(annotation, ast.List): + return any(_annotation_includes_raw_container(item) for item in annotation.elts) + return False + + class _RawContainerReturnVisitor(ast.NodeVisitor): def __init__(self, relative_path: str) -> None: self.relative_path = relative_path @@ -293,8 +317,7 @@ def _visit_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None: if not is_nested_function and not node.name.startswith("_") and node.returns is not None: annotation = ast.unparse(node.returns) - has_raw_container = any(token in annotation for token in ("dict", "list")) - if has_raw_container and "Extended" not in annotation: + if _annotation_includes_raw_container(node.returns): boundary = (self.relative_path, qualname) if boundary not in RAW_CONNECTOR_BOUNDARIES: self.offenders.append(f"{self.relative_path}:{node.lineno}: {qualname} -> {annotation}") diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index b6ace1f..927fe4f 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -8,6 +8,8 @@ import pytest +import extended_data.connectors.slack as slack_module + from extended_data.connectors.slack import ( SlackAPIError, SlackConnector, @@ -50,6 +52,11 @@ def test_slack_block_helpers_return_extended_payloads(): assert isinstance(rich[0]["elements"], ExtendedList) +def test_slack_module_does_not_export_internal_batching_helper() -> None: + """Compatibility helpers should not become public connector surface.""" + assert not hasattr(slack_module, "batched") + + def test_slack_api_error_redacts_sensitive_response_text() -> None: """Slack API errors should not expose raw secret-bearing response values.""" error = SlackAPIError({"ok": False, "password": "hunter2", "authorization": "Bearer raw_token"}) From b1515d1c5081bc66bf023153175895cabd4d0a44 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 18:50:46 -0500 Subject: [PATCH 221/287] test: prevent examples from echoing credential fragments --- examples/inputs/decorator_api.py | 2 +- tests/examples/test_safe_examples.py | 30 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/examples/inputs/decorator_api.py b/examples/inputs/decorator_api.py index 29b1fc3..57e6989 100644 --- a/examples/inputs/decorator_api.py +++ b/examples/inputs/decorator_api.py @@ -42,7 +42,7 @@ def authenticated_call(self, api_key: str, endpoint: str = "/users") -> str: The api_key is required and sourced from API_KEY input. The endpoint parameter uses its default if not in inputs. """ - return f"Calling {endpoint} with key {api_key[:4]}..." + return f"Calling {endpoint} with configured API key" @input_config("config", source_name="CONFIG", decode_from_json=True) def parse_config(self, config: dict[str, str] | None = None) -> dict[str, str]: diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index b37470c..cad6a0b 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -57,6 +57,7 @@ ) ROOT_DISALLOWED_TIER1_IMPORTS = tuple(sorted(primitives.__all__)) PYTHON_MARKDOWN_BLOCK_RE = re.compile(r"```python\n(?P.*?)\n```", re.DOTALL) +SENSITIVE_IDENTIFIER_RE = re.compile(r"(api_?key|secret|token|password|authorization)", re.IGNORECASE) def _readme_usage_snippet() -> str: @@ -192,6 +193,35 @@ def test_secrets_example_does_not_print_raw_sync_results() -> None: assert f"print(result['{field}'])" not in text +def _is_sensitive_identifier(node: ast.AST) -> bool: + if isinstance(node, ast.Name): + return bool(SENSITIVE_IDENTIFIER_RE.search(node.id)) + if isinstance(node, ast.Attribute): + return bool(SENSITIVE_IDENTIFIER_RE.search(node.attr)) + return False + + +def _expression_contains_sensitive_identifier(node: ast.AST) -> bool: + return any(_is_sensitive_identifier(child) for child in ast.walk(node)) + + +def test_examples_do_not_echo_partial_sensitive_values() -> None: + """Examples should not teach printing, slicing, or returning credential fragments.""" + offenders: list[str] = [] + + for example_path in ALL_EXAMPLES: + tree = ast.parse((REPO_ROOT / example_path).read_text(encoding="utf-8")) + for node in ast.walk(tree): + if isinstance(node, ast.Subscript) and _is_sensitive_identifier(node.value): + offenders.append(f"{example_path}:{node.lineno}: slices or indexes a sensitive value") + if isinstance(node, ast.FormattedValue) and _expression_contains_sensitive_identifier(node.value): + offenders.append(f"{example_path}:{node.lineno}: interpolates a sensitive value") + if isinstance(node, ast.Return) and node.value is not None and _is_sensitive_identifier(node.value): + offenders.append(f"{example_path}:{node.lineno}: returns a sensitive value directly") + + assert offenders == [] + + @pytest.mark.parametrize("example_path", ALL_EXAMPLES) def test_example_compiles(example_path: str, tmp_path: Path) -> None: """Every example should at least remain syntactically valid.""" From edc6c957ccae83d18f92a199ff79020ec0633f1d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:05:32 -0500 Subject: [PATCH 222/287] fix: redact lifecycle logging diagnostics --- README.md | 5 ++++- docs/package-surface.md | 5 +++++ src/extended_data/logging/logging.py | 23 ++++++++++++++++---- src/extended_data/logging/utils.py | 3 ++- tests/logging/test_exit_run.py | 32 ++++++++++++++++++++++++++++ tests/logging/test_logging.py | 19 +++++++++++++++++ tests/logging/test_properties.py | 11 +++++----- 7 files changed, 87 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4869619..0c04c11 100644 --- a/README.md +++ b/README.md @@ -241,7 +241,10 @@ and `to_export_safe()`. `Logging` stores marked log message collections as `ExtendedDict` and `ExtendedSet` values while keeping Python logger and handler objects plain. Use `get_stored_messages()` or `snapshot_stored_messages()` when downstream -data workflows need detached promoted copies of collected messages. +data workflows need detached promoted copies of collected messages. Runtime log +messages and attached JSON payloads use the same Tier 1 redaction policy as +connector diagnostics, and `exit_run()` formatting failures report redacted +result snapshots instead of raw payload data. More detail lives in [`docs/package-surface.md`](docs/package-surface.md). diff --git a/docs/package-surface.md b/docs/package-surface.md index 5915807..ca10c31 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -228,6 +228,11 @@ storage marker, with each marker containing an `ExtendedSet` of promoted messages. `get_stored_messages()` returns a detached promoted message set for one marker, and `snapshot_stored_messages()` returns a detached `ExtendedDict` copy of all stored collections for downstream export or workflow composition. +Runtime log messages and attached JSON payloads are redacted with the Tier 1 +redaction primitives before they reach Python logging handlers or stored message +collections. `exit_run()` formatting failures also report a redacted result +snapshot and suppress the internal formatting exception chain so diagnostics do +not echo raw payload data. `ConnectorFabric` caches and coordinates vendor connectors while sharing input loading, logging, data normalization, retry behavior, and serialization. diff --git a/src/extended_data/logging/logging.py b/src/extended_data/logging/logging.py index 09837cd..fc9f47f 100644 --- a/src/extended_data/logging/logging.py +++ b/src/extended_data/logging/logging.py @@ -43,6 +43,8 @@ from extended_data.primitives import ( get_unique_signature, is_nothing, + redact_sensitive_data, + redact_sensitive_text, string_to_bool, to_camel_case, to_kebab_case, @@ -342,6 +344,7 @@ def logged_statement( final_msg = self._prepare_message(msg, context_marker, identifiers) final_msg = add_json_data(final_msg, json_data, labeled_json_data) + final_msg = redact_sensitive_text(final_msg) # Normalize levels once here before passing to storage final_allowed = self._normalize_levels(allowed_levels) if allowed_levels is not None else self.allowed_levels @@ -456,6 +459,15 @@ def _transform_nested_keys( result[transformed_key] = value return result + @staticmethod + def _format_exit_run_error_snapshot(data: Any) -> str: + """Return a redacted diagnostic snapshot for exit_run failures.""" + redacted_data = redact_sensitive_data(data) + try: + return wrap_raw_data_for_export(redacted_data, allow_encoding=True) + except Exception: + return redact_sensitive_text(redacted_data) + def exit_run( self, results: Mapping[str, Any] | None = None, @@ -644,7 +656,10 @@ def encode_result_with_base64(r: Any) -> str: sys.stdout.write(data) sys.exit(0) - except ExitRunError as exc: - err_msg = f"Failed to dump results because of a formatting error:\n\n{data}" - self.logger.critical(err_msg, exc_info=True) - raise RuntimeError(err_msg) from exc + except ExitRunError: + err_msg = ( + "Failed to dump results because of a formatting error:\n\n" + f"{self._format_exit_run_error_snapshot(data)}" + ) + self.logger.critical(err_msg) + raise RuntimeError(err_msg) from None diff --git a/src/extended_data/logging/utils.py b/src/extended_data/logging/utils.py index 0da694e..e84bbba 100644 --- a/src/extended_data/logging/utils.py +++ b/src/extended_data/logging/utils.py @@ -10,6 +10,7 @@ from extended_data.io import make_raw_data_export_safe, wrap_raw_data_for_export from extended_data.logging.const import DEFAULT_LOG_LEVEL +from extended_data.primitives.redaction import redact_sensitive_data def get_log_level(level: int | str) -> int: @@ -87,7 +88,7 @@ def sanitize_json_data(data: Any) -> Any: """ # Use Extended Data core' make_raw_data_export_safe for comprehensive handling # This handles datetime, Path, large numbers, and more - return make_raw_data_export_safe(data, export_to_yaml=False) + return redact_sensitive_data(make_raw_data_export_safe(data, export_to_yaml=False)) def add_labeled_json( diff --git a/tests/logging/test_exit_run.py b/tests/logging/test_exit_run.py index 2f64204..68a47ca 100644 --- a/tests/logging/test_exit_run.py +++ b/tests/logging/test_exit_run.py @@ -258,6 +258,38 @@ def test_exit_run_sort_missing_field_raises(self, logger: Logging, tmp_path: Pat exit_on_completion=False, ) + def test_exit_run_formatting_errors_redact_result_snapshot(self, logger: Logging, tmp_path: Path) -> None: + """Formatting failures should report redacted diagnostics without chained traces.""" + os.chdir(tmp_path) + results = { + "a": { + "otherField": "value", + "password": "hunter2", + "headers": {"authorization": "Bearer raw_token"}, + } + } + + with ( + patch.object(logger.logger, "critical") as mock_critical, + pytest.raises(RuntimeError, match="formatting error") as exc_info, + ): + logger.exit_run( + results, + sort_by_field="missingField", + exit_on_completion=False, + ) + + mock_critical.assert_called_once() + logged_message = mock_critical.call_args.args[0] + assert mock_critical.call_args.kwargs == {} + assert exc_info.value.__cause__ is None + assert exc_info.value.__suppress_context__ is True + for raw_secret in ["hunter2", "raw_token"]: + assert raw_secret not in logged_message + assert raw_secret not in str(exc_info.value) + assert "[REDACTED]" in logged_message + assert "Traceback" not in logged_message + def test_exit_run_with_errors_raises(self, logger: Logging, tmp_path: Path) -> None: """Test that exit_run raises when error_list is not empty.""" os.chdir(tmp_path) diff --git a/tests/logging/test_logging.py b/tests/logging/test_logging.py index d562065..759a4b6 100644 --- a/tests/logging/test_logging.py +++ b/tests/logging/test_logging.py @@ -56,6 +56,25 @@ def test_json_logging(logger: Logging) -> None: assert "value" in result +def test_logging_redacts_sensitive_message_and_json_payloads(logger: Logging) -> None: + """Runtime log messages apply the shared Tier 1 redaction policy.""" + result = logger.logged_statement( + "Request failed with Authorization: Bearer raw_token", + json_data={"password": "hunter2", "nested": {"api_key": "key_123"}}, + labeled_json_data={"Request": {"client_secret": "secret_123"}}, + storage_marker="events", + log_level="info", + ) + + assert result is not None + stored = next(iter(logger.stored_messages["events"])) + for raw_secret in ["raw_token", "hunter2", "key_123", "secret_123"]: + assert raw_secret not in result + assert raw_secret not in stored + assert result.count("[REDACTED]") >= 4 + assert stored.count("[REDACTED]") >= 4 + + def test_storage_marker(logger: Logging) -> None: """Test storing messages under specific markers. diff --git a/tests/logging/test_properties.py b/tests/logging/test_properties.py index e226b06..1c667b8 100644 --- a/tests/logging/test_properties.py +++ b/tests/logging/test_properties.py @@ -6,6 +6,7 @@ from hypothesis import strategies as st from extended_data.logging import Logging +from extended_data.primitives import redact_sensitive_text # Strategy for valid log levels @@ -57,7 +58,7 @@ def test_basic_logging_properties(message: str, log_level: str) -> None: result = logger.logged_statement(message, log_level=log_level) # type: ignore[arg-type] assert result is not None - assert message in result + assert redact_sensitive_text(message) in result @given(message=log_messages, marker=marker_names) @@ -75,7 +76,7 @@ def test_context_marker_properties(message: str, marker: str) -> None: ) assert result is not None - assert f"[{marker}]" in result + assert redact_sensitive_text(f"[{marker}]") in result @given(message=log_messages, marker=marker_names) @@ -93,7 +94,7 @@ def test_storage_marker_properties(message: str, marker: str) -> None: ) assert marker in logger.stored_messages - assert message in next(iter(logger.stored_messages[marker])) + assert redact_sensitive_text(message) in next(iter(logger.stored_messages[marker])) @given(message=log_messages, verbosity=verbosity_levels, marker=marker_names) @@ -116,7 +117,7 @@ def test_verbosity_bypass_properties(message: str, verbosity: int, marker: str) ) assert result is not None - assert message in result + assert redact_sensitive_text(message) in result @given(message=log_messages, verbosity=verbosity_levels) @@ -149,6 +150,6 @@ def test_verbosity_control_properties(message: str, verbosity: int) -> None: if verbosity <= logger.verbosity_threshold: assert result is not None - assert message in result + assert redact_sensitive_text(message) in result else: assert result is None From 9e0d4dab42cb01978421e0142012af9636859825 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:09:51 -0500 Subject: [PATCH 223/287] fix: require meshy task ids in create responses --- src/extended_data/connectors/meshy/animate.py | 2 +- src/extended_data/connectors/meshy/base.py | 10 +++ src/extended_data/connectors/meshy/image3d.py | 10 +-- .../connectors/meshy/retexture.py | 2 +- src/extended_data/connectors/meshy/rigging.py | 2 +- src/extended_data/connectors/meshy/text3d.py | 4 +- tests/connectors/meshy/test_task_ids.py | 62 ++++++++++++++++--- 7 files changed, 70 insertions(+), 22 deletions(-) diff --git a/src/extended_data/connectors/meshy/animate.py b/src/extended_data/connectors/meshy/animate.py index fe6ee56..37298dd 100644 --- a/src/extended_data/connectors/meshy/animate.py +++ b/src/extended_data/connectors/meshy/animate.py @@ -28,7 +28,7 @@ def create(request: AnimationRequest) -> ExtendedString: version="v1", json=request.model_dump(exclude_none=True), ) - return extend_data(response.json().get("result")) + return base.task_id_from_response(response) def get(task_id: str) -> ExtendedDict: diff --git a/src/extended_data/connectors/meshy/base.py b/src/extended_data/connectors/meshy/base.py index 9568b4d..89760ea 100644 --- a/src/extended_data/connectors/meshy/base.py +++ b/src/extended_data/connectors/meshy/base.py @@ -21,6 +21,7 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential +from extended_data.containers import ExtendedString from extended_data.inputs import InputProvider from extended_data.primitives.redaction import redact_sensitive_text @@ -127,6 +128,15 @@ def unexpected_response_message(data: Any) -> str: return f"Unexpected API response: missing 'result' key. Response: {redact_sensitive_text(data)}" +def task_id_from_response(response: httpx.Response) -> ExtendedString: + """Extract a non-empty Meshy task id from a create/refine response.""" + data = response.json() + result = data.get("result") if isinstance(data, Mapping) else None + if not isinstance(result, str) or not result.strip(): + raise RuntimeError(unexpected_response_message(data)) + return ExtendedString(result) + + @retry( retry=retry_if_exception_type((RateLimitError, httpx.TimeoutException)), stop=stop_after_attempt(5), diff --git a/src/extended_data/connectors/meshy/image3d.py b/src/extended_data/connectors/meshy/image3d.py index e639b5c..d96322b 100644 --- a/src/extended_data/connectors/meshy/image3d.py +++ b/src/extended_data/connectors/meshy/image3d.py @@ -24,10 +24,7 @@ def create(request: Image3DRequest) -> ExtendedString: version="v2", json=request.model_dump(exclude_none=True), ) - data = response.json() - if "result" not in data: - raise RuntimeError(base.unexpected_response_message(data)) - return extend_data(data["result"]) + return base.task_id_from_response(response) def get(task_id: str) -> ExtendedDict: @@ -45,10 +42,7 @@ def refine(task_id: str) -> ExtendedString: version="v2", json={}, ) - data = response.json() - if "result" not in data: - raise RuntimeError(base.unexpected_response_message(data)) - return extend_data(data["result"]) + return base.task_id_from_response(response) def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: diff --git a/src/extended_data/connectors/meshy/retexture.py b/src/extended_data/connectors/meshy/retexture.py index b972b01..992cf65 100644 --- a/src/extended_data/connectors/meshy/retexture.py +++ b/src/extended_data/connectors/meshy/retexture.py @@ -23,7 +23,7 @@ def create(request: RetextureRequest) -> ExtendedString: version="v1", json=request.model_dump(exclude_none=True), ) - return extend_data(response.json().get("result")) + return base.task_id_from_response(response) def get(task_id: str) -> ExtendedDict: diff --git a/src/extended_data/connectors/meshy/rigging.py b/src/extended_data/connectors/meshy/rigging.py index 8a6bdc9..952b8f7 100644 --- a/src/extended_data/connectors/meshy/rigging.py +++ b/src/extended_data/connectors/meshy/rigging.py @@ -23,7 +23,7 @@ def create(request: RiggingRequest) -> ExtendedString: version="v1", json=request.model_dump(exclude_none=True), ) - return extend_data(response.json().get("result")) + return base.task_id_from_response(response) def get(task_id: str) -> ExtendedDict: diff --git a/src/extended_data/connectors/meshy/text3d.py b/src/extended_data/connectors/meshy/text3d.py index e142045..044661a 100644 --- a/src/extended_data/connectors/meshy/text3d.py +++ b/src/extended_data/connectors/meshy/text3d.py @@ -24,7 +24,7 @@ def create(request: Text3DRequest) -> ExtendedString: version="v2", json=request.model_dump(exclude_none=True), ) - return extend_data(response.json().get("result")) + return base.task_id_from_response(response) def get(task_id: str) -> ExtendedDict: @@ -42,7 +42,7 @@ def refine(task_id: str) -> ExtendedString: version="v2", json={}, ) - return extend_data(response.json().get("result")) + return base.task_id_from_response(response) def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: diff --git a/tests/connectors/meshy/test_task_ids.py b/tests/connectors/meshy/test_task_ids.py index aa0b0c6..15aedec 100644 --- a/tests/connectors/meshy/test_task_ids.py +++ b/tests/connectors/meshy/test_task_ids.py @@ -81,6 +81,53 @@ def test_retexture_task_id_is_extended_string() -> None: assert created == "retexture-task" +@pytest.mark.parametrize( + ("request_path", "call"), + [ + ( + "extended_data.connectors.meshy.text3d.base.request", + lambda: text3d.create(Text3DRequest(prompt="a sword")), + ), + ( + "extended_data.connectors.meshy.text3d.base.request", + lambda: text3d.refine("text-task"), + ), + ( + "extended_data.connectors.meshy.image3d.base.request", + lambda: image3d.create(Image3DRequest(image_url="https://example.com/source.png")), + ), + ( + "extended_data.connectors.meshy.image3d.base.request", + lambda: image3d.refine("image-task"), + ), + ( + "extended_data.connectors.meshy.animate.base.request", + lambda: animate.create(AnimationRequest(rig_task_id="rig-task", action_id=42)), + ), + ( + "extended_data.connectors.meshy.rigging.base.request", + lambda: rigging.create(RiggingRequest(input_task_id="model-task")), + ), + ( + "extended_data.connectors.meshy.retexture.base.request", + lambda: retexture.create(RetextureRequest(input_task_id="model-task", text_style_prompt="gold")), + ), + ], +) +def test_meshy_task_id_responses_fail_loudly_without_string_result(request_path: str, call) -> None: + """Task creation/refinement must not convert malformed vendor payloads into None.""" + response = _json_response({"password": "hunter2", "authorization": "Bearer raw_token", "result": None}) + + with patch(request_path, return_value=response): + with pytest.raises(RuntimeError, match="missing 'result' key") as exc_info: + call() + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + + def test_text3d_get_returns_extended_payload() -> None: payload = { "id": "text-task", @@ -186,15 +233,12 @@ def test_meshy_poll_redacts_failed_task_errors(monkeypatch: pytest.MonkeyPatch, assert "[REDACTED]" in message -def test_image3d_create_redacts_unexpected_response() -> None: - """Image3D create diagnostics should not echo secret-bearing response payloads.""" - response = _json_response({"password": "hunter2", "authorization": "Bearer raw_token"}) +@pytest.mark.parametrize("payload", [{"result": ""}, {"result": 123}, ["not", "a", "mapping"]]) +def test_meshy_task_id_response_requires_non_empty_string_result(payload: object) -> None: + """Task ids are string API handles, not arbitrary JSON payload values.""" + response = MagicMock() + response.json.return_value = payload with patch("extended_data.connectors.meshy.image3d.base.request", return_value=response): - with pytest.raises(RuntimeError) as exc_info: + with pytest.raises(RuntimeError, match="missing 'result' key"): image3d.create(Image3DRequest(image_url="https://example.com/source.png")) - - message = str(exc_info.value) - assert "hunter2" not in message - assert "raw_token" not in message - assert "[REDACTED]" in message From 8d9b9d3074f7aab63b26567cb19b468eca7d0e7f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:14:11 -0500 Subject: [PATCH 224/287] fix: redact meshy status response validation --- src/extended_data/connectors/meshy/animate.py | 5 ++-- src/extended_data/connectors/meshy/base.py | 15 ++++++++-- src/extended_data/connectors/meshy/image3d.py | 5 ++-- .../connectors/meshy/retexture.py | 5 ++-- src/extended_data/connectors/meshy/rigging.py | 5 ++-- src/extended_data/connectors/meshy/text3d.py | 5 ++-- tests/connectors/meshy/test_task_ids.py | 30 +++++++++++++++++++ 7 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/extended_data/connectors/meshy/animate.py b/src/extended_data/connectors/meshy/animate.py index 37298dd..8a66db7 100644 --- a/src/extended_data/connectors/meshy/animate.py +++ b/src/extended_data/connectors/meshy/animate.py @@ -17,7 +17,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import AnimationRequest, AnimationResult, TaskStatus -from extended_data.containers import ExtendedDict, ExtendedString, extend_data +from extended_data.containers import ExtendedDict, ExtendedString def create(request: AnimationRequest) -> ExtendedString: @@ -34,8 +34,7 @@ def create(request: AnimationRequest) -> ExtendedString: def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"animations/{task_id}", version="v1") - result = AnimationResult(**response.json()) - return extend_data(result.model_dump(mode="json")) + return base.task_payload_from_response(response, AnimationResult, "animations") def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: diff --git a/src/extended_data/connectors/meshy/base.py b/src/extended_data/connectors/meshy/base.py index 89760ea..cb0f99d 100644 --- a/src/extended_data/connectors/meshy/base.py +++ b/src/extended_data/connectors/meshy/base.py @@ -15,13 +15,14 @@ import time from collections.abc import Mapping -from typing import Any +from typing import Any, cast import httpx +from pydantic import BaseModel, ValidationError from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential -from extended_data.containers import ExtendedString +from extended_data.containers import ExtendedDict, ExtendedString, extend_data from extended_data.inputs import InputProvider from extended_data.primitives.redaction import redact_sensitive_text @@ -137,6 +138,16 @@ def task_id_from_response(response: httpx.Response) -> ExtendedString: return ExtendedString(result) +def task_payload_from_response(response: httpx.Response, model_type: type[BaseModel], endpoint: str) -> ExtendedDict: + """Validate a Meshy task payload and return a promoted public mapping.""" + data = response.json() + try: + result = model_type(**data) + except ValidationError: + raise RuntimeError(f"Unexpected API response for {endpoint}: {redact_sensitive_text(data)}") from None + return cast(ExtendedDict, extend_data(result.model_dump(mode="json"))) + + @retry( retry=retry_if_exception_type((RateLimitError, httpx.TimeoutException)), stop=stop_after_attempt(5), diff --git a/src/extended_data/connectors/meshy/image3d.py b/src/extended_data/connectors/meshy/image3d.py index d96322b..a24f384 100644 --- a/src/extended_data/connectors/meshy/image3d.py +++ b/src/extended_data/connectors/meshy/image3d.py @@ -13,7 +13,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import Image3DRequest, Image3DResult, TaskStatus -from extended_data.containers import ExtendedDict, ExtendedString, extend_data +from extended_data.containers import ExtendedDict, ExtendedString def create(request: Image3DRequest) -> ExtendedString: @@ -30,8 +30,7 @@ def create(request: Image3DRequest) -> ExtendedString: def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"image-to-3d/{task_id}", version="v2") - result = Image3DResult(**response.json()) - return extend_data(result.model_dump(mode="json")) + return base.task_payload_from_response(response, Image3DResult, "image-to-3d") def refine(task_id: str) -> ExtendedString: diff --git a/src/extended_data/connectors/meshy/retexture.py b/src/extended_data/connectors/meshy/retexture.py index 992cf65..e5685b0 100644 --- a/src/extended_data/connectors/meshy/retexture.py +++ b/src/extended_data/connectors/meshy/retexture.py @@ -12,7 +12,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import RetextureRequest, RetextureResult, TaskStatus -from extended_data.containers import ExtendedDict, ExtendedString, extend_data +from extended_data.containers import ExtendedDict, ExtendedString def create(request: RetextureRequest) -> ExtendedString: @@ -29,8 +29,7 @@ def create(request: RetextureRequest) -> ExtendedString: def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"retexture/{task_id}", version="v1") - result = RetextureResult(**response.json()) - return extend_data(result.model_dump(mode="json")) + return base.task_payload_from_response(response, RetextureResult, "retexture") def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: diff --git a/src/extended_data/connectors/meshy/rigging.py b/src/extended_data/connectors/meshy/rigging.py index 952b8f7..cfa7631 100644 --- a/src/extended_data/connectors/meshy/rigging.py +++ b/src/extended_data/connectors/meshy/rigging.py @@ -12,7 +12,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import RiggingRequest, RiggingResult, TaskStatus -from extended_data.containers import ExtendedDict, ExtendedString, extend_data +from extended_data.containers import ExtendedDict, ExtendedString def create(request: RiggingRequest) -> ExtendedString: @@ -29,8 +29,7 @@ def create(request: RiggingRequest) -> ExtendedString: def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"rigging/{task_id}", version="v1") - result = RiggingResult(**response.json()) - return extend_data(result.model_dump(mode="json")) + return base.task_payload_from_response(response, RiggingResult, "rigging") def poll(task_id: str, interval: float = 5.0, timeout: float = 600.0) -> ExtendedDict: diff --git a/src/extended_data/connectors/meshy/text3d.py b/src/extended_data/connectors/meshy/text3d.py index 044661a..865883b 100644 --- a/src/extended_data/connectors/meshy/text3d.py +++ b/src/extended_data/connectors/meshy/text3d.py @@ -13,7 +13,7 @@ from extended_data.connectors.meshy import base from extended_data.connectors.meshy.models import ArtStyle, TaskStatus, Text3DRequest, Text3DResult -from extended_data.containers import ExtendedDict, ExtendedString, extend_data +from extended_data.containers import ExtendedDict, ExtendedString def create(request: Text3DRequest) -> ExtendedString: @@ -30,8 +30,7 @@ def create(request: Text3DRequest) -> ExtendedString: def get(task_id: str) -> ExtendedDict: """Get task status.""" response = base.request("GET", f"text-to-3d/{task_id}", version="v2") - result = Text3DResult(**response.json()) - return extend_data(result.model_dump(mode="json")) + return base.task_payload_from_response(response, Text3DResult, "text-to-3d") def refine(task_id: str) -> ExtendedString: diff --git a/tests/connectors/meshy/test_task_ids.py b/tests/connectors/meshy/test_task_ids.py index 15aedec..8c3dc78 100644 --- a/tests/connectors/meshy/test_task_ids.py +++ b/tests/connectors/meshy/test_task_ids.py @@ -209,6 +209,36 @@ def test_retexture_get_returns_extended_payload() -> None: assert result["model_urls"]["glb"] == "https://example.com/retexture.glb" +@pytest.mark.parametrize( + ("request_path", "call"), + [ + ("extended_data.connectors.meshy.text3d.base.request", lambda: text3d.get("text-task")), + ("extended_data.connectors.meshy.image3d.base.request", lambda: image3d.get("image-task")), + ("extended_data.connectors.meshy.animate.base.request", lambda: animate.get("animation-task")), + ("extended_data.connectors.meshy.rigging.base.request", lambda: rigging.get("rig-task")), + ("extended_data.connectors.meshy.retexture.base.request", lambda: retexture.get("retexture-task")), + ], +) +def test_meshy_get_responses_redact_validation_failures(request_path: str, call) -> None: + """Malformed status payloads should not expose raw vendor data through Pydantic errors.""" + response = _json_response({ + "status": "SUCCEEDED", + "created_at": 1700000000, + "password": "hunter2", + "authorization": "Bearer raw_token", + }) + + with patch(request_path, return_value=response): + with pytest.raises(RuntimeError, match="Unexpected API response") as exc_info: + call() + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "ValidationError" not in message + assert "[REDACTED]" in message + + @pytest.mark.parametrize("module", [text3d, image3d, retexture, rigging, animate]) def test_meshy_poll_redacts_failed_task_errors(monkeypatch: pytest.MonkeyPatch, module: object) -> None: """All Meshy polling helpers should redact vendor task failure messages.""" From 6793cccac3683eca2217ad15eca4ca58f26d9693 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:19:40 -0500 Subject: [PATCH 225/287] fix: redact anthropic response validation --- .../connectors/anthropic/__init__.py | 74 +++++++++++++-- tests/connectors/test_anthropic.py | 90 +++++++++++++++++++ 2 files changed, 156 insertions(+), 8 deletions(-) diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index 89b2d3e..0584e1e 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -35,7 +35,7 @@ import httpx -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, ValidationError from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin @@ -301,6 +301,43 @@ def _model_payload(model: BaseModel) -> dict[str, Any]: """Serialize an Anthropic model into JSON-compatible API field names.""" return model.model_dump(mode="json") + @staticmethod + def _unexpected_response_error(operation: str, data: Any, *, status_code: int | None = None) -> AnthropicAPIError: + """Build a redacted malformed-response error.""" + return AnthropicAPIError( + f"Unexpected Anthropic response for {operation}: {redact_sensitive_text(data)}", + status_code=status_code, + error_type="unexpected_response", + ) + + def _response_json(self, response: httpx.Response, operation: str) -> Any: + """Parse a response body or raise a redacted malformed-response error.""" + try: + return response.json() + except Exception as exc: + raise self._unexpected_response_error( + operation, + exc, + status_code=response.status_code, + ) from None + + def _parse_model_response( + self, + response: httpx.Response, + model_type: type[BaseModel], + operation: str, + ) -> dict[str, Any]: + """Validate one Anthropic model response and return a JSON payload.""" + data = self._response_json(response, operation) + try: + return self._model_payload(model_type.model_validate(data)) + except ValidationError: + raise self._unexpected_response_error( + operation, + data, + status_code=response.status_code, + ) from None + @staticmethod def _message_text(message: Mapping[str, Any]) -> str: """Extract concatenated text blocks from an extended message payload.""" @@ -379,7 +416,7 @@ def create_message( if not response.is_success: self._handle_error(response) - return self.extend_result(self._model_payload(Message.model_validate(response.json()))) + return self.extend_result(self._parse_model_response(response, Message, "create_message")) def count_tokens( self, @@ -419,8 +456,14 @@ def count_tokens( if not response.is_success: self._handle_error(response) - data = response.json() - return data.get("input_tokens", 0) + data = self._response_json(response, "count_tokens") + if not isinstance(data, Mapping) or not isinstance(data.get("input_tokens"), int): + raise self._unexpected_response_error( + "count_tokens", + data, + status_code=response.status_code, + ) + return data["input_tokens"] # ========================================================================= # Model Operations @@ -442,9 +485,24 @@ def list_models(self) -> ExtendedList[ExtendedDict]: if not response.is_success: self._handle_error(response) - data = response.json() - models_data = data.get("data", []) - return self.extend_result([self._model_payload(Model.model_validate(m)) for m in models_data]) + data = self._response_json(response, "list_models") + models_data = data.get("data") if isinstance(data, Mapping) else None + if not isinstance(models_data, list): + raise self._unexpected_response_error( + "list_models", + data, + status_code=response.status_code, + ) + + try: + parsed_models = [self._model_payload(Model.model_validate(model_data)) for model_data in models_data] + except ValidationError: + raise self._unexpected_response_error( + "list_models", + data, + status_code=response.status_code, + ) from None + return self.extend_result(parsed_models) def get_model(self, model_id: str) -> ExtendedDict: """Get information about a specific model. @@ -465,7 +523,7 @@ def get_model(self, model_id: str) -> ExtendedDict: if not response.is_success: self._handle_error(response) - return self.extend_result(self._model_payload(Model.model_validate(response.json()))) + return self.extend_result(self._parse_model_response(response, Model, "get_model")) # ========================================================================= # Agent Execution (Sandbox Mode) diff --git a/tests/connectors/test_anthropic.py b/tests/connectors/test_anthropic.py index 3fe697c..83ef968 100644 --- a/tests/connectors/test_anthropic.py +++ b/tests/connectors/test_anthropic.py @@ -10,6 +10,7 @@ from extended_data.connectors.anthropic import ( CLAUDE_MODELS, + AnthropicAPIError, AnthropicAuthError, AnthropicConnector, AnthropicError, @@ -261,6 +262,95 @@ def test_get_model(self): assert isinstance(model["display_name"], ExtendedString) assert model["display_name"] == "Claude Sonnet 4" + def test_count_tokens_returns_vendor_token_count(self): + """count_tokens should return the explicit Anthropic response value.""" + import httpx + + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.json.return_value = {"input_tokens": 42} + mock_client.request.return_value = mock_response + + with patch.object(httpx, "Client", return_value=mock_client): + connector = AnthropicConnector(api_key="test-key") + assert connector.count_tokens(model="claude-sonnet-4-20250514", messages=[]) == 42 + + @pytest.mark.parametrize( + ("method_name", "call", "payload"), + [ + ( + "create_message", + lambda connector: connector.create_message( + model="claude-sonnet-4-20250514", + max_tokens=1024, + messages=[{"role": "user", "content": "Hi"}], + ), + {"role": "assistant", "password": "hunter2", "authorization": "Bearer raw_token"}, + ), + ( + "list_models", + lambda connector: connector.list_models(), + {"data": [{"id": "claude-sonnet-4-20250514", "api_key": "key_123"}]}, + ), + ( + "get_model", + lambda connector: connector.get_model("claude-sonnet-4-20250514"), + {"id": "claude-sonnet-4-20250514", "client_secret": "secret_123"}, + ), + ( + "count_tokens", + lambda connector: connector.count_tokens(model="claude-sonnet-4-20250514", messages=[]), + {"password": "hunter2", "authorization": "Bearer raw_token"}, + ), + ], + ) + def test_success_response_validation_errors_are_redacted(self, method_name, call, payload): + """Malformed success payloads should fail loudly without raw Pydantic details.""" + import httpx + + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.json.return_value = payload + mock_client.request.return_value = mock_response + + with patch.object(httpx, "Client", return_value=mock_client): + connector = AnthropicConnector(api_key="test-key") + with pytest.raises(AnthropicAPIError) as exc_info: + call(connector) + + message = str(exc_info.value) + assert exc_info.value.error_type == "unexpected_response" + assert method_name in message + for raw_secret in ["hunter2", "raw_token", "key_123", "secret_123"]: + assert raw_secret not in message + assert "ValidationError" not in message + assert "[REDACTED]" in message + + def test_success_response_json_errors_are_redacted(self): + """Malformed JSON diagnostics should not expose raw parser exception values.""" + import httpx + + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.json.side_effect = ValueError("bad password=hunter2 Authorization: Bearer raw_token") + mock_client.request.return_value = mock_response + + with patch.object(httpx, "Client", return_value=mock_client): + connector = AnthropicConnector(api_key="test-key") + with pytest.raises(AnthropicAPIError) as exc_info: + connector.get_model("claude-sonnet-4-20250514") + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + def test_handle_error_redacts_sensitive_vendor_message(self): """Anthropic errors should preserve status metadata without leaking secrets.""" import httpx From b681ea38677f19ffb160663b77ae605c41efd347 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:24:23 -0500 Subject: [PATCH 226/287] fix: redact cursor response validation --- .../connectors/cursor/__init__.py | 78 +++++++++++++++--- tests/connectors/test_cursor.py | 81 +++++++++++++++++++ 2 files changed, 149 insertions(+), 10 deletions(-) diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index 0b45aa5..39e1aaf 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -32,7 +32,7 @@ import httpx -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, ValidationError from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin @@ -440,6 +440,44 @@ def _model_payload(model: BaseModel) -> dict[str, Any]: payload["error"] = sanitize_error(payload["error"]) return payload + @staticmethod + def _unexpected_response_error(operation: str, data: Any, *sensitive_values: Any) -> CursorAPIError: + """Build a redacted malformed-response error.""" + return CursorAPIError( + f"Unexpected Cursor response for {operation}: {_safe_cursor_text(data, *sensitive_values)}" + ) + + def _parse_model_response( + self, + data: Any, + model_type: type[BaseModel], + operation: str, + *sensitive_values: Any, + ) -> dict[str, Any]: + """Validate one Cursor response model and return a JSON payload.""" + try: + return self._model_payload(model_type.model_validate(data)) + except ValidationError: + raise self._unexpected_response_error(operation, data, *sensitive_values) from None + + def _parse_model_list( + self, + data: Any, + key: str, + model_type: type[BaseModel], + operation: str, + *sensitive_values: Any, + ) -> list[dict[str, Any]]: + """Validate a Cursor response list and return JSON payloads.""" + items = data.get(key, []) if isinstance(data, Mapping) else None + if not isinstance(items, list): + raise self._unexpected_response_error(operation, data, *sensitive_values) + + try: + return [self._model_payload(model_type.model_validate(item)) for item in items] + except ValidationError: + raise self._unexpected_response_error(operation, data, *sensitive_values) from None + # ========================================================================= # Agent Operations # ========================================================================= @@ -458,8 +496,7 @@ def list_agents(self) -> ExtendedList[ExtendedDict]: if not data: return self.extend_result([]) - agents_data = data.get("agents", []) - return self.extend_result([self._model_payload(Agent.model_validate(a)) for a in agents_data]) + return self.extend_result(self._parse_model_list(data, "agents", Agent, "list_agents")) def get_agent_status(self, agent_id: str) -> ExtendedDict: """Get status of a specific agent. @@ -480,7 +517,7 @@ def get_agent_status(self, agent_id: str) -> ExtendedDict: data = self._request_api(f"/agents/{agent_id}") if not data: raise CursorAPIError(f"Empty response when getting agent status for {_safe_cursor_ref(agent_id)}") - return self.extend_result(self._model_payload(Agent.model_validate(data))) + return self.extend_result(self._parse_model_response(data, Agent, "get_agent_status", agent_id)) def get_agent_conversation(self, agent_id: str) -> ExtendedDict: """Get conversation history for an agent. @@ -502,8 +539,16 @@ def get_agent_conversation(self, agent_id: str) -> ExtendedDict: if not data: return self.extend_result(self._model_payload(Conversation(agent_id=agent_id, messages=[]))) - messages = [ConversationMessage.model_validate(m) for m in data.get("messages", [])] - return self.extend_result(self._model_payload(Conversation(agent_id=agent_id, messages=messages))) + message_data = data.get("messages", []) if isinstance(data, Mapping) else None + if not isinstance(message_data, list): + raise self._unexpected_response_error("get_agent_conversation", data, agent_id) + + try: + messages = [ConversationMessage.model_validate(message) for message in message_data] + conversation = Conversation(agent_id=agent_id, messages=messages) + except ValidationError: + raise self._unexpected_response_error("get_agent_conversation", data, agent_id) from None + return self.extend_result(self._model_payload(conversation)) def launch_agent( self, @@ -588,7 +633,18 @@ def launch_agent( if not data: msg = "Empty response when launching agent" raise CursorAPIError(msg) - return self.extend_result(self._model_payload(Agent.model_validate(data))) + return self.extend_result( + self._parse_model_response( + data, + Agent, + "launch_agent", + prompt_text, + repository, + ref, + branch_name, + webhook_url, + ) + ) def add_followup(self, agent_id: str, prompt_text: str) -> None: """Send a follow-up message to an agent. @@ -630,8 +686,7 @@ def list_repositories(self) -> ExtendedList[ExtendedDict]: if not data: return self.extend_result([]) - repos_data = data.get("repositories", []) - return self.extend_result([self._model_payload(Repository.model_validate(r)) for r in repos_data]) + return self.extend_result(self._parse_model_list(data, "repositories", Repository, "list_repositories")) # ========================================================================= # Model Operations @@ -651,4 +706,7 @@ def list_models(self) -> ExtendedList[ExtendedString]: if not data: return self.extend_result([]) - return self.extend_result(data.get("models", [])) + models = data.get("models", []) if isinstance(data, Mapping) else None + if not isinstance(models, list) or any(not isinstance(model, str) for model in models): + raise self._unexpected_response_error("list_models", data) + return self.extend_result(models) diff --git a/tests/connectors/test_cursor.py b/tests/connectors/test_cursor.py index 0059b35..2b200eb 100644 --- a/tests/connectors/test_cursor.py +++ b/tests/connectors/test_cursor.py @@ -38,6 +38,17 @@ def _logged_text(logger: MagicMock) -> str: return "\n".join(messages) +def _json_response(payload: object) -> MagicMock: + """Build a JSON Cursor API response mock.""" + response = MagicMock() + response.status_code = 200 + response.is_success = True + response.headers = {"content-type": "application/json"} + response.text = "{}" + response.json.return_value = payload + return response + + class TestValidators: """Tests for input validators.""" @@ -461,3 +472,73 @@ def test_list_models_empty_response_returns_extended_list(self, mock_client_clas assert isinstance(models, ExtendedList) assert models == [] + + @pytest.mark.parametrize( + ("method_name", "call", "payload", "raw_values"), + [ + ( + "list_agents", + lambda connector: connector.list_agents(), + {"agents": [{"state": "running", "password": "hunter2", "authorization": "Bearer raw_token"}]}, + ["hunter2", "raw_token"], + ), + ( + "get_agent_status", + lambda connector: connector.get_agent_status("secret-agent"), + {"id": "secret-agent", "api_key": "key_123"}, + ["secret-agent", "key_123"], + ), + ( + "get_agent_conversation", + lambda connector: connector.get_agent_conversation("secret-agent"), + {"messages": [{"role": "user", "password": "hunter2", "authorization": "Bearer raw_token"}]}, + ["secret-agent", "hunter2", "raw_token"], + ), + ( + "launch_agent", + lambda connector: connector.launch_agent( + prompt_text="rotate password=hunter2 for customer-prod", + repository="secret-org/private-repo", + ), + {"state": "pending", "task": "rotate password=hunter2 for secret-org/private-repo"}, + ["hunter2", "secret-org/private-repo", "customer-prod"], + ), + ( + "list_repositories", + lambda connector: connector.list_repositories(), + {"repositories": [{"url": "https://github.com/org/repo", "client_secret": "secret_123"}]}, + ["secret_123"], + ), + ( + "list_models", + lambda connector: connector.list_models(), + {"models": ["cursor-small", {"password": "hunter2"}]}, + ["hunter2"], + ), + ], + ) + @patch("extended_data.connectors.cursor.httpx.Client") + def test_success_response_validation_errors_are_redacted( + self, + mock_client_class, + method_name, + call, + payload, + raw_values, + ): + """Malformed success payloads should fail loudly without raw Pydantic details.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + mock_client.request.return_value = _json_response(payload) + + connector = CursorConnector(api_key="test-key") + + with pytest.raises(CursorAPIError) as exc_info: + call(connector) + + message = str(exc_info.value) + assert method_name in message + assert "ValidationError" not in message + assert "[REDACTED]" in message + for raw_value in raw_values: + assert raw_value not in message From d2300fb2daeb42f66f950d2f5668193b4c8f52f5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:30:27 -0500 Subject: [PATCH 227/287] fix: validate zoom response payloads --- src/extended_data/connectors/zoom/__init__.py | 73 ++++++++- tests/connectors/test_zoom_connector.py | 143 ++++++++++++++++++ 2 files changed, 208 insertions(+), 8 deletions(-) diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index a934862..200591f 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -44,6 +44,11 @@ def _zoom_error(action: str, exc: BaseException, *sensitive_values: Any) -> str: return f"{action}: {_safe_zoom_text(exc, *sensitive_values)}" +def _zoom_response_error(action: str, data: Any, *sensitive_values: Any) -> RuntimeError: + """Build a redacted malformed-response error.""" + return RuntimeError(f"{action}: {_safe_zoom_text(data, *sensitive_values)}") + + class ZoomConnector(VendorConnectorBase): """Zoom connector for user management.""" @@ -62,6 +67,34 @@ def __init__( self.client_secret = client_secret or self.get_input("ZOOM_CLIENT_SECRET", required=True) self.account_id = account_id or self.get_input("ZOOM_ACCOUNT_ID", required=True) + def _response_json(self, response: Any, action: str, *sensitive_values: Any) -> Any: + """Parse a Zoom JSON response or raise a redacted diagnostic.""" + try: + return response.json() + except Exception as exc: + raise _zoom_response_error(action, exc, *sensitive_values) from None + + def _response_mapping(self, response: Any, action: str, *sensitive_values: Any) -> dict[str, Any]: + """Parse and validate a Zoom object response.""" + data = self._response_json(response, action, *sensitive_values) + if not isinstance(data, Mapping): + raise _zoom_response_error(action, data, *sensitive_values) + return dict(data) + + def _response_list_field( + self, + response: Any, + field_name: str, + action: str, + *sensitive_values: Any, + ) -> list[dict[str, Any]]: + """Parse and validate a Zoom list field containing object payloads.""" + data = self._response_mapping(response, action, *sensitive_values) + items = data.get(field_name, []) + if not isinstance(items, list) or any(not isinstance(item, Mapping) for item in items): + raise _zoom_response_error(action, data, *sensitive_values) + return [dict(item) for item in items] + def get_access_token(self) -> str | None: """Get an OAuth access token from Zoom.""" url = "https://zoom.us/oauth/token" @@ -75,7 +108,23 @@ def get_access_token(self) -> str | None: try: response = requests.post(url, headers=headers, data=data, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - return response.json().get("access_token") + token_data = self._response_mapping( + response, + "Unexpected Zoom access token response", + self.client_id, + self.client_secret, + self.account_id, + ) + token = token_data.get("access_token") + if not isinstance(token, str) or not token.strip(): + raise _zoom_response_error( + "Unexpected Zoom access token response", + token_data, + self.client_id, + self.client_secret, + self.account_id, + ) + return token except requests.exceptions.RequestException as exc: msg = _zoom_error( "Failed to get Zoom access token", @@ -114,11 +163,18 @@ def list_users(self) -> ExtendedDict: try: response = requests.get(url, headers=headers, params=params, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - data = response.json() - for user in data.get("users", []): - users[user["email"]] = user + data = self._response_mapping(response, "Unexpected Zoom users response", next_page_token, params) + raw_users = data.get("users", []) + if not isinstance(raw_users, list): + raise _zoom_response_error("Unexpected Zoom users response", data, next_page_token, params) + for user in raw_users: + if not isinstance(user, Mapping) or not isinstance(user.get("email"), str): + raise _zoom_response_error("Unexpected Zoom users response", data, next_page_token, params) + users[user["email"]] = dict(user) next_page_token = data.get("next_page_token") + if next_page_token is not None and not isinstance(next_page_token, str): + raise _zoom_response_error("Unexpected Zoom users response", data, next_page_token, params) if not next_page_token: break except requests.exceptions.RequestException as exc: @@ -173,7 +229,7 @@ def get_user(self, user_id: str) -> ExtendedDict: try: response = requests.get(url, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - return self.extend_result(response.json()) + return self.extend_result(self._response_mapping(response, "Unexpected Zoom user response", user_id)) except requests.exceptions.RequestException as exc: raise RuntimeError(_zoom_error("Failed to get Zoom user", exc, user_id)) from None @@ -194,8 +250,9 @@ def list_meetings(self, user_id: str, meeting_type: str = "scheduled") -> Extend try: response = requests.get(url, headers=headers, params=params, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - data = response.json() - return self.extend_result(data.get("meetings", [])) + return self.extend_result( + self._response_list_field(response, "meetings", "Unexpected Zoom meetings response", user_id, params) + ) except requests.exceptions.RequestException as exc: raise RuntimeError(_zoom_error("Failed to list Zoom meetings", exc, user_id, params)) from None @@ -214,7 +271,7 @@ def get_meeting(self, meeting_id: str) -> ExtendedDict: try: response = requests.get(url, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() - return self.extend_result(response.json()) + return self.extend_result(self._response_mapping(response, "Unexpected Zoom meeting response", meeting_id)) except requests.exceptions.RequestException as exc: raise RuntimeError(_zoom_error("Failed to get Zoom meeting", exc, meeting_id)) from None diff --git a/tests/connectors/test_zoom_connector.py b/tests/connectors/test_zoom_connector.py index c6de0a0..b0b8784 100644 --- a/tests/connectors/test_zoom_connector.py +++ b/tests/connectors/test_zoom_connector.py @@ -16,6 +16,14 @@ def _logged_text(logger: MagicMock) -> str: return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) +def _token_response(token: str = "test-token") -> MagicMock: + """Build a successful Zoom OAuth response mock.""" + response = MagicMock() + response.json.return_value = {"access_token": token} + response.raise_for_status = MagicMock() + return response + + class TestZoomConnector: """Test suite for ZoomConnector.""" @@ -75,6 +83,33 @@ def test_get_access_token_failure(self, mock_post, base_connector_kwargs): assert "[REDACTED]" in message assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.zoom.requests.post") + def test_get_access_token_malformed_response_is_redacted(self, mock_post, base_connector_kwargs): + """Missing token responses should fail loudly without exposing OAuth credentials.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "password": "hunter2", + "authorization": "Bearer raw_token", + "account_id": "test-account-id", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError, match="Unexpected Zoom access token response") as exc_info: + connector.get_access_token() + + message = str(exc_info.value) + for raw_value in ["hunter2", "raw_token", "test-client-id", "test-client-secret", "test-account-id"]: + assert raw_value not in message + assert "[REDACTED]" in message + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_list_users_redacts_request_failure_details(self, mock_post, mock_get, base_connector_kwargs): @@ -103,6 +138,33 @@ def test_list_users_redacts_request_failure_details(self, mock_post, mock_get, b assert "[REDACTED]" in message assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.zoom.requests.get") + @patch("extended_data.connectors.zoom.requests.post") + def test_list_users_malformed_response_is_redacted(self, mock_post, mock_get, base_connector_kwargs): + """Malformed user list responses should not return partial or raw payloads.""" + mock_post.return_value = _token_response() + mock_users_response = MagicMock() + mock_users_response.json.return_value = { + "users": [{"password": "hunter2", "authorization": "Bearer raw_token"}] + } + mock_users_response.raise_for_status = MagicMock() + mock_get.return_value = mock_users_response + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError, match="Unexpected Zoom users response") as exc_info: + connector.list_users() + + message = str(exc_info.value) + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_list_users(self, mock_post, mock_get, base_connector_kwargs): @@ -290,6 +352,31 @@ def test_get_user_redacts_identifier_and_secret_details(self, mock_post, mock_ge assert "[REDACTED]" in message assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.zoom.requests.get") + @patch("extended_data.connectors.zoom.requests.post") + def test_get_user_malformed_response_is_redacted(self, mock_post, mock_get, base_connector_kwargs): + """Zoom user lookups should reject non-object payloads without leaking identifiers.""" + mock_post.return_value = _token_response() + mock_user_response = MagicMock() + mock_user_response.json.return_value = ["private-user@example.com", {"password": "hunter2"}] + mock_user_response.raise_for_status = MagicMock() + mock_get.return_value = mock_user_response + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError, match="Unexpected Zoom user response") as exc_info: + connector.get_user("private-user@example.com") + + message = str(exc_info.value) + assert "private-user@example.com" not in message + assert "hunter2" not in message + assert "[REDACTED]" in message + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_list_meetings(self, mock_post, mock_get, base_connector_kwargs): @@ -350,6 +437,34 @@ def test_list_meetings_redacts_identifier_and_secret_details(self, mock_post, mo assert "[REDACTED]" in message assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.zoom.requests.get") + @patch("extended_data.connectors.zoom.requests.post") + def test_list_meetings_malformed_response_is_redacted(self, mock_post, mock_get, base_connector_kwargs): + """Zoom meeting list responses should preserve the ExtendedList contract.""" + mock_post.return_value = _token_response() + mock_meetings_response = MagicMock() + mock_meetings_response.json.return_value = { + "meetings": [{"id": "111"}, "password=hunter2 Authorization: Bearer raw_token"] + } + mock_meetings_response.raise_for_status = MagicMock() + mock_get.return_value = mock_meetings_response + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError, match="Unexpected Zoom meetings response") as exc_info: + connector.list_meetings("private-user@example.com") + + message = str(exc_info.value) + assert "private-user@example.com" not in message + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + @patch("extended_data.connectors.zoom.requests.get") @patch("extended_data.connectors.zoom.requests.post") def test_get_meeting(self, mock_post, mock_get, base_connector_kwargs): @@ -406,3 +521,31 @@ def test_get_meeting_redacts_identifier_and_secret_details(self, mock_post, mock assert "raw-token" not in message assert "[REDACTED]" in message assert exc_info.value.__cause__ is None + + @patch("extended_data.connectors.zoom.requests.get") + @patch("extended_data.connectors.zoom.requests.post") + def test_get_meeting_json_parse_error_is_redacted(self, mock_post, mock_get, base_connector_kwargs): + """Zoom JSON parse failures should not expose raw meeting IDs or parser text.""" + mock_post.return_value = _token_response() + mock_meeting_response = MagicMock() + mock_meeting_response.json.side_effect = ValueError( + "bad meeting private-meeting password=hunter2 Authorization: Bearer raw_token" + ) + mock_meeting_response.raise_for_status = MagicMock() + mock_get.return_value = mock_meeting_response + + connector = ZoomConnector( + client_id="test-client-id", + client_secret="test-client-secret", + account_id="test-account-id", + **base_connector_kwargs, + ) + + with pytest.raises(RuntimeError, match="Unexpected Zoom meeting response") as exc_info: + connector.get_meeting("private-meeting") + + message = str(exc_info.value) + assert "private-meeting" not in message + assert "hunter2" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message From 4499f6a885541379f2bdc39aaf23678008d4e6d2 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:35:33 -0500 Subject: [PATCH 228/287] fix: validate google jules response payloads --- src/extended_data/connectors/google/jules.py | 138 +++++++++++++++---- tests/connectors/test_google_jules.py | 133 +++++++++++++++++- 2 files changed, 239 insertions(+), 32 deletions(-) diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index 62a3d60..3305688 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -26,13 +26,14 @@ from __future__ import annotations +from collections.abc import Mapping from contextlib import suppress from enum import Enum from typing import Any import httpx -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ValidationError from extended_data.connectors.base import VendorConnectorBase from extended_data.connectors.google._diagnostics import safe_google_text @@ -162,24 +163,71 @@ def _build_headers(self) -> dict[str, str]: "Content-Type": "application/json", } - def _handle_response(self, response: httpx.Response) -> dict[str, Any]: - """Handle API response, raising on errors.""" + def _handle_response(self, response: httpx.Response, operation: str, *sensitive_values: Any) -> dict[str, Any]: + """Handle API response, raising redacted errors for API or payload failures.""" + diagnostic_values = self._response_diagnostic_values(response, *sensitive_values) if not response.is_success: - diagnostic_values = self._response_diagnostic_values(response) - try: - error = response.json().get("error", {}) - raise JulesError( - safe_google_text(error.get("message", response.text), diagnostic_values), - error.get("code", response.status_code), - redact_sensitive_data(error.get("details"), values=diagnostic_values), - ) - except (ValueError, KeyError): - raise JulesError(safe_google_text(response.text, diagnostic_values), response.status_code) from None - return response.json() - - def _response_diagnostic_values(self, response: httpx.Response) -> list[str]: + self._raise_api_error(response, operation, diagnostic_values) + + data = self._response_json(response, operation, diagnostic_values) + if not isinstance(data, Mapping): + raise self._unexpected_response_error(operation, data, response.status_code, diagnostic_values) + return dict(data) + + def _raise_api_error( + self, + response: httpx.Response, + operation: str, + diagnostic_values: list[Any], + ) -> None: + """Raise a Jules API error with all details redacted.""" + try: + error_data = self._response_json(response, operation, diagnostic_values) + except JulesError: + raise JulesError(safe_google_text(response.text, diagnostic_values), response.status_code) from None + + raw_error = error_data.get("error", {}) if isinstance(error_data, Mapping) else {} + error = raw_error if isinstance(raw_error, Mapping) else {} + error_code = error.get("code", response.status_code) + if not isinstance(error_code, int): + error_code = response.status_code + + raise JulesError( + safe_google_text(error.get("message", response.text), diagnostic_values), + error_code, + redact_sensitive_data(error.get("details"), values=diagnostic_values), + ) + + def _response_json(self, response: httpx.Response, operation: str, diagnostic_values: list[Any]) -> Any: + """Parse JSON response content or raise a redacted malformed-response error.""" + if not response.content: + return {} + try: + return response.json() + except Exception: + raise self._unexpected_response_error( + operation, + response.text, + response.status_code, + diagnostic_values, + ) from None + + @staticmethod + def _unexpected_response_error( + operation: str, + data: Any, + status_code: int, + diagnostic_values: list[Any], + ) -> JulesError: + """Build a redacted malformed-response error.""" + return JulesError( + f"Unexpected Jules response for {operation}: {safe_google_text(data, diagnostic_values)}", + status_code, + ) + + def _response_diagnostic_values(self, response: httpx.Response, *sensitive_values: Any) -> list[Any]: """Collect caller-controlled response identifiers for diagnostics redaction.""" - values = [self._base_url] + values: list[Any] = [self._base_url, self._api_key, *sensitive_values] with suppress(RuntimeError): values.append(str(response.request.url)) return values @@ -193,6 +241,42 @@ def _model_payload(model: BaseModel) -> dict[str, Any]: """Serialize a Jules model using API field aliases.""" return model.model_dump(by_alias=True) + def _parse_model_response( + self, + data: Any, + model_type: type[BaseModel], + operation: str, + *sensitive_values: Any, + ) -> dict[str, Any]: + """Validate one Jules response model and return a JSON payload.""" + try: + return self._model_payload(model_type.model_validate(data)) + except ValidationError: + raise self._unexpected_response_error( + operation, + data, + 200, + list(sensitive_values), + ) from None + + def _parse_model_list( + self, + data: Mapping[str, Any], + field_name: str, + model_type: type[BaseModel], + operation: str, + *sensitive_values: Any, + ) -> list[dict[str, Any]]: + """Validate a Jules response list and return JSON payloads.""" + items = data.get(field_name) + if not isinstance(items, list): + raise self._unexpected_response_error(operation, data, 200, list(sensitive_values)) + + try: + return [self._model_payload(model_type.model_validate(item)) for item in items] + except ValidationError: + raise self._unexpected_response_error(operation, data, 200, list(sensitive_values)) from None + def list_sources(self, page_size: int = 100, page_token: str = "") -> ExtendedList[ExtendedDict]: """List available sources (connected GitHub repos). @@ -208,9 +292,9 @@ def list_sources(self, page_size: int = 100, page_token: str = "") -> ExtendedLi params["pageToken"] = page_token response = self.get("/sources", params=params) - data = self._handle_response(response) + data = self._handle_response(response, "list_sources", params) - return self.extend_result([self._model_payload(Source(**s)) for s in data.get("sources", [])]) + return self.extend_result(self._parse_model_list(data, "sources", Source, "list_sources", params)) # ========================================================================= # Sessions @@ -255,9 +339,9 @@ def create_session( body["requirePlanApproval"] = True response = self.post("/sessions", json=body) - data = self._handle_response(response) + data = self._handle_response(response, "create_session", body) - return self.extend_result(self._model_payload(Session(**data))) + return self.extend_result(self._parse_model_response(data, Session, "create_session", body)) def get_session(self, session_name: str) -> ExtendedDict: """Get a session by name. @@ -273,9 +357,9 @@ def get_session(self, session_name: str) -> ExtendedDict: session_name = f"sessions/{session_name}" response = self.get(f"/{session_name}") - data = self._handle_response(response) + data = self._handle_response(response, "get_session", session_name) - return self.extend_result(self._model_payload(Session(**data))) + return self.extend_result(self._parse_model_response(data, Session, "get_session", session_name)) def list_sessions(self, page_size: int = 20, page_token: str = "") -> ExtendedList[ExtendedDict]: """List sessions. @@ -292,9 +376,9 @@ def list_sessions(self, page_size: int = 20, page_token: str = "") -> ExtendedLi params["pageToken"] = page_token response = self.get("/sessions", params=params) - data = self._handle_response(response) + data = self._handle_response(response, "list_sessions", params) - return self.extend_result([self._model_payload(Session(**s)) for s in data.get("sessions", [])]) + return self.extend_result(self._parse_model_list(data, "sessions", Session, "list_sessions", params)) def approve_plan(self, session_name: str) -> ExtendedDict: """Approve the plan for a session that requires approval. @@ -309,7 +393,7 @@ def approve_plan(self, session_name: str) -> ExtendedDict: session_name = f"sessions/{session_name}" response = self.post(f"/{session_name}:approvePlan") - self._handle_response(response) + self._handle_response(response, "approve_plan", session_name) # API returns empty on success, fetch updated session return self.get_session(session_name) @@ -332,7 +416,7 @@ def add_user_response(self, session_name: str, message: str = "") -> ExtendedDic # The API uses sendMessage, not addUserResponse response = self.post(f"/{session_name}:sendMessage", json={}) - self._handle_response(response) + self._handle_response(response, "add_user_response", session_name, message) # API returns empty on success, fetch updated session return self.get_session(session_name) diff --git a/tests/connectors/test_google_jules.py b/tests/connectors/test_google_jules.py index b7159f8..9604e6d 100644 --- a/tests/connectors/test_google_jules.py +++ b/tests/connectors/test_google_jules.py @@ -11,7 +11,7 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString -def _response(payload: dict, status_code: int = 200) -> httpx.Response: +def _response(payload: object, status_code: int = 200) -> httpx.Response: return httpx.Response( status_code, json=payload, @@ -162,7 +162,7 @@ def test_handle_response_raises_jules_error() -> None: response = _response({"error": {"message": "denied", "code": 403, "details": [{"reason": "forbidden"}]}}, 403) with pytest.raises(JulesError) as exc_info: - connector._handle_response(response) + connector._handle_response(response, "test_operation") assert exc_info.value.code == 403 assert exc_info.value.details == [{"reason": "forbidden"}] @@ -183,7 +183,7 @@ def test_handle_response_redacts_sensitive_jules_error_details() -> None: ) with pytest.raises(JulesError) as exc_info: - connector._handle_response(response) + connector._handle_response(response, "test_operation") message = str(exc_info.value) assert "hunter2" not in message @@ -208,7 +208,7 @@ def test_handle_response_redacts_request_url_in_jules_error() -> None: ) with pytest.raises(JulesError) as exc_info: - connector._handle_response(response) + connector._handle_response(response, "get_session", "sessions/private-session") error = exc_info.value assert request_url not in str(error) @@ -227,7 +227,7 @@ def test_handle_response_malformed_error_has_sanitized_message_without_cause() - ) with pytest.raises(JulesError) as exc_info: - connector._handle_response(response) + connector._handle_response(response, "get_session", "sessions/private-session") error = exc_info.value message = str(error) @@ -235,3 +235,126 @@ def test_handle_response_malformed_error_has_sanitized_message_without_cause() - assert error.__cause__ is None assert request_url not in message assert "hunter2" not in message + + +def test_handle_response_rejects_non_object_success_response() -> None: + """Successful Jules responses must still be JSON objects.""" + connector = JulesConnector(api_key="test-key") + request_url = "https://jules.googleapis.com/v1alpha/sessions/private-session?api_key=raw_key" + response = httpx.Response( + 200, + json=["sessions/private-session", {"password": "hunter2"}], + request=httpx.Request("GET", request_url), + ) + + with pytest.raises(JulesError, match="Unexpected Jules response for get_session") as exc_info: + connector._handle_response(response, "get_session", "sessions/private-session") + + message = str(exc_info.value) + assert exc_info.value.code == 200 + assert "sessions/private-session" not in message + assert "hunter2" not in message + assert "raw_key" not in message + assert "[REDACTED]" in message + assert exc_info.value.__cause__ is None + + +def test_handle_response_redacts_non_object_error_response() -> None: + """Non-object Jules error JSON should still become a sanitized JulesError.""" + connector = JulesConnector(api_key="test-key") + request_url = "https://jules.googleapis.com/v1alpha/sessions/private-session?api_key=raw_key" + response = httpx.Response( + 500, + json=["sessions/private-session", "password=hunter2 Authorization: Bearer raw_token"], + request=httpx.Request("GET", request_url), + ) + + with pytest.raises(JulesError) as exc_info: + connector._handle_response(response, "get_session", "sessions/private-session") + + message = str(exc_info.value) + assert exc_info.value.code == 500 + assert "sessions/private-session" not in message + assert "hunter2" not in message + assert "raw_token" not in message + assert "raw_key" not in message + assert "[REDACTED]" in message + assert exc_info.value.__cause__ is None + + +def test_create_session_malformed_response_is_redacted() -> None: + """Created session payloads should validate without exposing prompts or sources.""" + connector = JulesConnector(api_key="test-key") + connector.post = MagicMock( + return_value=_response( + { + "id": "123", + "debug": "Fix private prompt", + "source": "sources/github/private-org/private-repo", + "password": "hunter2", + } + ) + ) + + with pytest.raises(JulesError, match="Unexpected Jules response for create_session") as exc_info: + connector.create_session( + prompt="Fix private prompt", + source="sources/github/private-org/private-repo", + title="private-title", + ) + + message = str(exc_info.value) + assert "Fix private prompt" not in message + assert "sources/github/private-org/private-repo" not in message + assert "private-title" not in message + assert "hunter2" not in message + assert "[REDACTED]" in message + + +def test_list_sources_malformed_response_is_redacted() -> None: + """Source list payloads must contain a list of valid Source objects.""" + connector = JulesConnector(api_key="test-key") + connector.get = MagicMock( + return_value=_response( + { + "sources": [ + { + "name": "sources/github/org/repo", + "debug": "private-page-token", + "authorization": "Bearer raw_token", + } + ] + } + ) + ) + + with pytest.raises(JulesError, match="Unexpected Jules response for list_sources") as exc_info: + connector.list_sources(page_token="private-page-token") + + message = str(exc_info.value) + assert "private-page-token" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + + +def test_list_sessions_requires_sessions_list() -> None: + """Session list payloads should fail loudly when the list contract changes.""" + connector = JulesConnector(api_key="test-key") + connector.get = MagicMock( + return_value=_response( + { + "sessions": { + "name": "sessions/private-session", + "password": "hunter2", + } + } + ) + ) + + with pytest.raises(JulesError, match="Unexpected Jules response for list_sessions") as exc_info: + connector.list_sessions(page_token="private-page-token") + + message = str(exc_info.value) + assert "private-page-token" not in message + assert "hunter2" not in message + assert "[REDACTED]" in message From 2120051df073a6593459df4b82fec90bbfab483d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:37:54 -0500 Subject: [PATCH 229/287] fix: send google jules prompt messages --- src/extended_data/connectors/google/jules.py | 24 +++++++++------ tests/connectors/test_google_jules.py | 32 ++++++++++++++++++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index 3305688..c2005e0 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -398,11 +398,12 @@ def approve_plan(self, session_name: str) -> ExtendedDict: # API returns empty on success, fetch updated session return self.get_session(session_name) - def add_user_response(self, session_name: str, message: str = "") -> ExtendedDict: + def add_user_response(self, session_name: str, message: str) -> ExtendedDict: """Add a follow-up message to a session or resume it. - Note: The Jules API uses :sendMessage endpoint. An empty body - resumes a paused session. A message can be included in certain states. + Note: The Jules API uses the :sendMessage endpoint with a required + prompt body. The response body is empty on success, so this method + fetches and returns the updated session. Args: session_name: Full resource name. @@ -411,23 +412,28 @@ def add_user_response(self, session_name: str, message: str = "") -> ExtendedDic Returns: Updated Session object. """ + if not isinstance(message, str) or not message.strip(): + msg = "Jules sendMessage requires a non-empty prompt" + raise ValueError(msg) + if not session_name.startswith("sessions/"): session_name = f"sessions/{session_name}" - # The API uses sendMessage, not addUserResponse - response = self.post(f"/{session_name}:sendMessage", json={}) - self._handle_response(response, "add_user_response", session_name, message) + body = {"prompt": message} + response = self.post(f"/{session_name}:sendMessage", json=body) + self._handle_response(response, "add_user_response", session_name, body) # API returns empty on success, fetch updated session return self.get_session(session_name) - def resume_session(self, session_name: str) -> ExtendedDict: - """Resume a paused or awaiting session. + def resume_session(self, session_name: str, message: str) -> ExtendedDict: + """Resume a paused or awaiting session by sending a follow-up prompt. Args: session_name: Full resource name. + message: User prompt to send to the session. Returns: Updated Session object. """ - return self.add_user_response(session_name) + return self.add_user_response(session_name, message) diff --git a/tests/connectors/test_google_jules.py b/tests/connectors/test_google_jules.py index 9604e6d..87d1bbb 100644 --- a/tests/connectors/test_google_jules.py +++ b/tests/connectors/test_google_jules.py @@ -156,6 +156,38 @@ def test_approve_plan_returns_updated_extended_session() -> None: connector.get_session.assert_called_once_with("sessions/123") +def test_add_user_response_sends_prompt_and_returns_updated_session() -> None: + """Jules follow-up messages are sent through the required sendMessage prompt body.""" + connector = JulesConnector(api_key="test-key") + connector.post = MagicMock(return_value=_response({})) + connector.get_session = MagicMock(return_value=ExtendedDict({"name": "sessions/123", "state": "RUNNING"})) + + result = connector.add_user_response("123", "Please continue with the tests") + + assert isinstance(result, ExtendedDict) + connector.post.assert_called_once_with("/sessions/123:sendMessage", json={"prompt": "Please continue with the tests"}) + connector.get_session.assert_called_once_with("sessions/123") + + +def test_add_user_response_requires_non_empty_prompt() -> None: + """The Jules sendMessage API requires a prompt, so empty local calls should fail.""" + connector = JulesConnector(api_key="test-key") + + with pytest.raises(ValueError, match="non-empty prompt"): + connector.add_user_response("123", "") + + +def test_resume_session_sends_prompt_via_add_user_response() -> None: + """The resume helper should keep the same required prompt contract.""" + connector = JulesConnector(api_key="test-key") + connector.add_user_response = MagicMock(return_value=ExtendedDict({"name": "sessions/123", "state": "RUNNING"})) + + result = connector.resume_session("123", "Resume with the approved plan") + + assert result["name"] == "sessions/123" + connector.add_user_response.assert_called_once_with("123", "Resume with the approved plan") + + def test_handle_response_raises_jules_error() -> None: """Jules API errors preserve vendor message and status details.""" connector = JulesConnector(api_key="test-key") From a4c93c61461debb6f0b6e10499a7f7dc4deb3e16 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 19:55:48 -0500 Subject: [PATCH 230/287] fix: redact connector bridge argument context --- README.md | 8 ++-- docs/package-surface.md | 7 ++-- src/extended_data/connectors/cli.py | 2 +- src/extended_data/connectors/mcp.py | 6 +-- src/extended_data/primitives/redaction.py | 49 +++++++++++++++++++++-- tests/connectors/test_cli.py | 37 +++++++++++++++++ tests/connectors/test_mcp.py | 20 +++++++++ tests/core/test_redaction.py | 15 +++++++ 8 files changed, 130 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 0c04c11..db1c110 100644 --- a/README.md +++ b/README.md @@ -150,9 +150,11 @@ objects. The generic CLI `call` command and MCP bridge expose only methods that advertise Extended Data payload returns. Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 -redaction primitives for common secret-bearing keys and token-shaped strings, -including connector-provided `values=[...]` for context-sensitive resource -identifiers, so connector data methods can return structured vendor payloads +redaction primitives for common secret-bearing keys and token-shaped strings. +CLI and MCP connector calls pass method arguments through `values=[...]` as +context-sensitive diagnostic data, and connectors can add their own +operation-specific values for resource IDs, paths, URLs, emails, prompt text, or +vendor handles. Connector data methods can return structured vendor payloads without making stdout, tool responses, logs, or raised transport errors a secret leak by default. Raw SDK/client objects and raw transport responses remain available from the methods that explicitly return them. diff --git a/docs/package-surface.md b/docs/package-surface.md index ca10c31..6c674e5 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -306,9 +306,10 @@ same redaction policy before exceptions are raised. Common secret-bearing keys such as `password`, `api_key`, `access_token`, `authorization`, and `client_secret`, plus token-like strings in error text, are replaced with `[REDACTED]` before CLI stdout/stderr, MCP tool responses, or raised transport -errors expose them. Connectors can also pass context-specific `values=[...]` -for resource IDs, paths, URLs, emails, prompt text, or vendor payload handles -that are sensitive only in that operation. +errors expose them. CLI and MCP connector calls pass method arguments through +`values=[...]` as context-specific diagnostic data, and connectors can add their +own operation-specific values for resource IDs, paths, URLs, emails, prompt +text, or vendor payload handles that are sensitive only in that operation. LangChain, CrewAI, Strands, and auto-detection factory functions still return plain framework tool object lists. diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index 2c2aebb..bbd832d 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -168,7 +168,7 @@ def cmd_call(args: argparse.Namespace) -> int: return 0 except Exception as e: - _write_stderr(str(e)) + _write_stderr(redact_sensitive_text(e, values=kwargs.values())) return 1 diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index a56a110..d337002 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -110,9 +110,9 @@ def _jsonable_tool_result(result: Any) -> Any: return redact_sensitive_data(result) -def _tool_error_text(error: Exception) -> str: +def _tool_error_text(error: Exception, values: Iterable[Any] | None = None) -> str: """Return an MCP-safe error string without raw secret values.""" - return f"Error: {type(error).__name__}: {redact_sensitive_text(error)}" + return f"Error: {type(error).__name__}: {redact_sensitive_text(error, values=values)}" def _unknown_tool_text(name: str) -> str: @@ -202,7 +202,7 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: return [TextContent(type="text", text=json.dumps(_jsonable_tool_result(result), indent=2, default=str))] except Exception as e: - return [TextContent(type="text", text=_tool_error_text(e))] + return [TextContent(type="text", text=_tool_error_text(e, arguments.values()))] return server diff --git a/src/extended_data/primitives/redaction.py b/src/extended_data/primitives/redaction.py index 8daee81..1f2eeb7 100644 --- a/src/extended_data/primitives/redaction.py +++ b/src/extended_data/primitives/redaction.py @@ -39,17 +39,58 @@ def _redacted_field(match: re.Match[str]) -> str: return f"{prefix}{_redacted_value(value)}" +def _iter_known_values(values: Iterable[Any]) -> Iterable[Any]: + """Yield scalar known-sensitive values from nested caller context.""" + for value in values: + if value is None: + continue + if isinstance(value, Mapping): + yield from _iter_known_values(value.values()) + elif isinstance(value, (str, bytes, bytearray)): + yield value + elif isinstance(value, Iterable): + yield from _iter_known_values(value) + else: + yield value + + +def _slash_encoding_variants(value: str) -> set[str]: + """Return common variants where any slash positions are URL encoded.""" + slash_count = value.count("/") + if slash_count == 0 or slash_count > 8: + return set() + + variants: set[str] = set() + for mask in range(1, 1 << slash_count): + slash_index = 0 + parts: list[str] = [] + for char in value: + if char == "/": + parts.append("%2F" if mask & (1 << slash_index) else "/") + slash_index += 1 + else: + parts.append(char) + variants.add("".join(parts)) + return variants + + def _redact_known_values(text: str, values: Iterable[Any] | None) -> str: """Redact explicitly provided values and URL-encoded variants.""" if values is None: return text - for value in values: - if value is None: - continue + for value in _iter_known_values(values): raw_value = str(value) if not raw_value: continue - for candidate in {raw_value, quote(raw_value, safe="")}: + slash_encoded = raw_value.replace("/", "%2F") + candidates = { + raw_value, + quote(raw_value, safe=""), + quote(raw_value, safe="/"), + slash_encoded, + } + candidates.update(_slash_encoding_variants(raw_value)) + for candidate in set(candidates) | {candidate.replace("%2F", "%2f") for candidate in candidates}: text = text.replace(candidate, REDACTED) return text diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index de66af5..441a34d 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -240,6 +240,43 @@ def test_cli_call_redacts_sensitive_error_output() -> None: assert "Authorization: [REDACTED]" in output +def test_cli_call_redacts_explicit_argument_values_from_errors() -> None: + """Call command should redact caller-provided resource context in stderr.""" + args = argparse.Namespace( + connector="example", + method="fetch", + extra=[ + "--email", + "private-user@example.com", + "--metadata", + '{"path": "/tmp/private/path", "prompt": "Fix login"}', + ], + json=False, + ) + connector = MagicMock() + connector.fetch.side_effect = RuntimeError( + "failed for private-user@example.com at /tmp/private%2Fpath while handling Fix login" + ) + + with ( + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), + patch("extended_data.connectors.cli.get_connector", return_value=connector), + patch("sys.stderr.write") as mock_write, + ): + exit_code = cmd_call(args) + + assert exit_code == 1 + connector.fetch.assert_called_once_with( + email="private-user@example.com", + metadata={"path": "/tmp/private/path", "prompt": "Fix login"}, + ) + output = mock_write.call_args.args[0] + assert "private-user@example.com" not in output + assert "/tmp/private%2Fpath" not in output + assert "Fix login" not in output + assert output.count("[REDACTED]") >= 3 + + def test_cli_main_help() -> None: """Test main CLI entry point with help.""" with patch("sys.argv", ["extended-data", "--help"]): diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index 9d9881e..2b44982 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -87,6 +87,26 @@ def test_tool_error_text_redacts_sensitive_exception_values() -> None: assert "[REDACTED]" in text +def test_tool_error_text_redacts_explicit_argument_values() -> None: + """Generic MCP errors should redact caller-provided resource context.""" + error = RuntimeError("failed for private-user@example.com at /tmp/private%2Fpath while handling Fix login") + + text = _tool_error_text( + error, + values=[ + { + "email": "private-user@example.com", + "metadata": {"path": "/tmp/private/path", "prompt": "Fix login"}, + } + ], + ) + + assert "private-user@example.com" not in text + assert "/tmp/private%2Fpath" not in text + assert "Fix login" not in text + assert text.count("[REDACTED]") >= 3 + + def test_unknown_tool_text_redacts_sensitive_tool_names() -> None: """Generic MCP unknown-tool diagnostics should redact user-controlled names.""" text = _unknown_tool_text("password=hunter2 Authorization: Bearer raw_token") diff --git a/tests/core/test_redaction.py b/tests/core/test_redaction.py index e50e676..e29b40a 100644 --- a/tests/core/test_redaction.py +++ b/tests/core/test_redaction.py @@ -31,6 +31,21 @@ def test_redact_sensitive_text_accepts_known_diagnostic_values() -> None: assert redacted.count("[REDACTED]") == 3 +def test_redact_sensitive_text_flattens_nested_known_values() -> None: + """Caller-provided diagnostic context can be nested like CLI or MCP arguments.""" + message = "failed for user@example.com at /tmp/private%2Fpath using prompt Fix login" + + redacted = redact_sensitive_text( + message, + values=[{"email": "user@example.com", "paths": ["/tmp/private/path"], "prompt": "Fix login"}], + ) + + assert "user@example.com" not in redacted + assert "/tmp/private%2Fpath" not in redacted + assert "Fix login" not in redacted + assert redacted.count("[REDACTED]") == 3 + + def test_redact_sensitive_data_recurses_through_json_like_payloads() -> None: """Structured redaction should handle nested JSON-like data.""" payload = { From 1e7d8bc0d792daf8bb29dc5be182dd92aba08cb7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:01:30 -0500 Subject: [PATCH 231/287] fix: separate google connector cache by scopes --- src/extended_data/connectors/connectors.py | 4 ++++ tests/connectors/test_connectors.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 17507be..95c75fc 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -344,9 +344,12 @@ def get_google_client( # For caching, use a hash to avoid exposing sensitive data cache_sa = hashlib.sha256(str(service_account_info).encode()).hexdigest()[:16] if service_account_info else None + cache_scopes = tuple(scopes) if scopes else None + cached = self._get_cached_client( "google", service_account=cache_sa, + scopes=cache_scopes, subject=subject, ) if cached: @@ -363,6 +366,7 @@ def get_google_client( "google", connector, service_account=cache_sa, + scopes=cache_scopes, subject=subject, ) return connector diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index cf3c0d2..25c3f54 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -229,6 +229,27 @@ def test_get_google_client(self, mock_google): assert result == mock_connector + @requires_google + @patch("extended_data.connectors.google.GoogleConnector") + def test_get_google_client_cache_separates_scopes(self, mock_google): + """Google connector cache keys include requested OAuth scopes.""" + vc = ConnectorFabric( + inputs={"GOOGLE_SERVICE_ACCOUNT": '{"type": "service_account"}'}, + from_environment=False, + ) + first_connector = MagicMock() + second_connector = MagicMock() + mock_google.side_effect = [first_connector, second_connector] + + first = vc.get_google_client(scopes=["scope-a"]) + second = vc.get_google_client(scopes=["scope-b"]) + third = vc.get_google_client(scopes=["scope-a"]) + + assert first is first_connector + assert second is second_connector + assert third is first_connector + assert mock_google.call_count == 2 + @requires_github @patch("extended_data.connectors.github.GitHubConnector") def test_get_github_client(self, mock_github): From bb18f4af9fda623a193edebcada15a354f425669 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:05:31 -0500 Subject: [PATCH 232/287] fix: list only usable connectors --- README.md | 4 +++- docs/package-surface.md | 5 +++- src/extended_data/connectors/registry.py | 12 +++++++--- tests/connectors/test_connectors.py | 29 ++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index db1c110..6888032 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,9 @@ nested or sorted default mappings instead of importing the internal helper class Vendor connectors are first-class adapters in the data fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with -generic vendor lookup. +generic vendor lookup. `list_connectors()` returns registered connectors whose +runtime requirements are installed; use `list_connector_info()` for the full +catalog, including known connectors that need an `extended-data[...]` extra. `AWSConnector` and `GoogleConnector` are unified first-class classes: S3, Organizations, SSO, Workspace, Cloud Resource Manager, Billing, and services operations live on those connectors directly rather than on separate diff --git a/docs/package-surface.md b/docs/package-surface.md index 6c674e5..83a810f 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -282,7 +282,10 @@ Every built-in connector class registered by name is also exported from `extended_data` and `extended_data.connectors`. Those exports are real classes, not `None` sentinels. Vendor SDKs load when connector instances need them, so package import remains lightweight while missing optional extras still fail at -the operation boundary with install guidance. +the operation boundary with install guidance. `list_connectors()` reports the +registered connectors whose runtime requirements are installed; use +`list_connector_info()` when tooling needs the complete catalog plus missing +dependency and install guidance. Connectors that inherit `VendorConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index bfd17ae..44385c2 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -193,12 +193,18 @@ def _list_connector_classes() -> dict[str, builtins.type[VendorConnectorBase]]: def list_connectors() -> ExtendedList[ExtendedString]: - """List available connector names. + """List registered connector names whose runtime requirements are installed. Returns: - ExtendedList of connector registry names. + ExtendedList of usable connector registry names. """ - return extend_data(sorted(_discover_connectors())) + return extend_data( + sorted( + name + for name in _discover_connectors() + if not get_missing_connector_requirements(name) + ), + ) def get_connector_class(name: str) -> builtins.type[VendorConnectorBase]: diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 25c3f54..1efa31c 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -454,3 +454,32 @@ def test_available_only_catalog_filters_missing_builtins(self): assert isinstance(info, ExtendedList) assert all(connector["available"] for connector in info) + + def test_list_connectors_filters_registered_connectors_with_missing_requirements(self, monkeypatch): + """Connector name lists only include registered connectors that can be used.""" + + class CursorConnector: + pass + + class GitHubConnector: + pass + + monkeypatch.setattr( + registry, + "_connector_cache", + { + "cursor": CursorConnector, + "github": GitHubConnector, + }, + ) + monkeypatch.setattr(registry, "_missing_builtin_connectors", {}) + monkeypatch.setattr( + registry, + "get_missing_connector_requirements", + lambda name: ExtendedList(["github"]) if name == "github" else ExtendedList(), + ) + + names = registry.list_connectors() + + assert isinstance(names, ExtendedList) + assert names == ["cursor"] From 7224b4ab10903a9dec85438f42f90324712408bd Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:08:11 -0500 Subject: [PATCH 233/287] test: align connector list assertions with extras --- tests/connectors/test_connectors.py | 2 +- tests/core/test_package_surface.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 1efa31c..7555ab5 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -150,8 +150,8 @@ def test_connector_fabric_exposes_catalog_info(self): assert github_info["name"] == "github" assert isinstance(connector_names, ExtendedList) assert isinstance(connector_names[0], ExtendedString) - assert "github" in connector_names assert "cursor" in connector_names + assert ("github" in connector_names) is github_info["available"] @requires_boto3 @patch("extended_data.connectors.aws.AWSConnector") diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 39704f6..b35ec07 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -164,7 +164,7 @@ def test_root_exports_first_class_integrated_surfaces() -> None: assert isinstance(connector_names[0], ExtendedString) assert get_type_hints(connectors.list_connectors)["return"] == ExtendedList[ExtendedString] assert get_type_hints(ConnectorFabric.list_connectors)["return"] == ExtendedList[ExtendedString] - assert "github" in connector_names + assert "cursor" in connector_names def test_logging_exposes_stored_messages_as_detached_tier2_data() -> None: @@ -265,7 +265,7 @@ def test_first_class_connectors_keep_operation_mixins_without_optional_extras() def test_google_registry_uses_single_first_class_connector() -> None: """Google Workspace, Cloud, and Billing operations should not be split into connector aliases.""" - connector_names = set(connectors.list_connectors()) + connector_names = {connector["name"] for connector in connectors.list_connector_info()} assert "google" in connector_names assert "google_cloud" not in connector_names From 27c3fe684dbf3baa6b8f77da422dba2a69c6af67 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:11:30 -0500 Subject: [PATCH 234/287] docs: remove connector bootstrap markers --- src/extended_data/connectors/__init__.py | 4 ++-- tests/core/test_release_hygiene.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index ff2abed..eecbd0b 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -1,9 +1,9 @@ """Extended Data Connectors - shared connectors for cloud, SaaS, and AI platforms. This package provides modular connectors for various cloud providers and services: -- Anthropic: Claude AI API and Agent SDK (NEW) +- Anthropic: Claude AI API and Agent SDK - AWS: Organizations, SSO/Identity Center, S3, Secrets Manager -- Cursor: Background Agent API for AI coding agents (NEW) +- Cursor: Background Agent API for AI coding agents - Google Cloud: Workspace, Cloud Platform, Billing, Services (GKE, Compute, etc.) - GitHub: Repository operations, PR management - Meshy: 3D asset generation diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index b8fb6c9..b7d1e12 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -31,6 +31,7 @@ ) REMOVED_PUBLIC_KEYWORDS = ("prefer_native", "unhump_results") FUTURE_API_PROMISES = ("will be available", "coming soon") +BOOTSTRAP_TEXT_MARKERS = ("(NEW)",) SECRETSSYNC_PROJECT_PATTERNS = ( re.compile(r"\bsecretssync\s+(?:Go\s+)?(?:project|library|repo|repository|CLI|connector|bindings?)\b", re.IGNORECASE), re.compile(r"\b(?:project|library|repo|repository|CLI|connector|bindings?)\s+secretssync\b", re.IGNORECASE), @@ -354,6 +355,24 @@ def test_public_text_does_not_promise_future_api_surfaces() -> None: assert offenders == [] +def test_public_text_does_not_keep_bootstrap_markers() -> None: + """Extracted package docs should not keep launch-era status markers.""" + offenders: list[str] = [] + paths = [REPO_ROOT / "README.md"] + paths.extend(path for root in (REPO_ROOT / "docs", REPO_ROOT / "examples", REPO_ROOT / "src") for path in root.rglob("*")) + + for path in sorted(path for path in paths if path.is_file()): + if path.suffix in {".pyc", ".png"}: + continue + relative_path = path.relative_to(REPO_ROOT) + for line_number, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): + for marker in BOOTSTRAP_TEXT_MARKERS: + if marker in line: + offenders.append(f"{relative_path}:{line_number}: {marker}") + + assert offenders == [] + + def test_public_guidance_names_secrets_sync_roles_precisely() -> None: """Use SecretSync for the product and reserve exact names for CLI modules.""" offenders: list[str] = [] From ae85078eeaa25cf3d2348157aa87d52071d30a5f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:23:52 -0500 Subject: [PATCH 235/287] fix: hash sensitive connector cache keys --- README.md | 2 ++ docs/package-surface.md | 5 ++- src/extended_data/connectors/connectors.py | 29 ++++++++++++++++- tests/connectors/test_connectors.py | 37 ++++++++++++++++++++++ 4 files changed, 71 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6888032..9449715 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,8 @@ context, caches connector instances, and lets specialized helpers coexist with generic vendor lookup. `list_connectors()` returns registered connectors whose runtime requirements are installed; use `list_connector_info()` for the full catalog, including known connectors that need an `extended-data[...]` extra. +Secret-like cache key fields such as `token`, `api_key`, `password`, and +`client_secret` are hashed before they are stored in the fabric cache. `AWSConnector` and `GoogleConnector` are unified first-class classes: S3, Organizations, SSO, Workspace, Cloud Resource Manager, Billing, and services operations live on those connectors directly rather than on separate diff --git a/docs/package-surface.md b/docs/package-surface.md index 83a810f..8d96391 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -285,7 +285,10 @@ package import remains lightweight while missing optional extras still fail at the operation boundary with install guidance. `list_connectors()` reports the registered connectors whose runtime requirements are installed; use `list_connector_info()` when tooling needs the complete catalog plus missing -dependency and install guidance. +dependency and install guidance. `ConnectorFabric` hashes secret-like cache-key +fields such as `token`, `api_key`, `password`, and `client_secret` before +storing cache entries, so cache inspection and debug output do not expose raw +credential material. Connectors that inherit `VendorConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 95c75fc..49407c4 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -26,6 +26,33 @@ from extended_data.primitives import get_default_dict, get_unique_signature, make_hashable +_SENSITIVE_CACHE_KEY_PARTS = ( + "api_key", + "authorization", + "client_secret", + "credential", + "password", + "secret", + "token", +) + + +def _is_sensitive_cache_field(name: str) -> bool: + """Return whether a cache-key field name usually carries secret material.""" + normalized = name.lower().replace("-", "_") + return any(part in normalized for part in _SENSITIVE_CACHE_KEY_PARTS) + + +def _cache_safe_value(name: str, value: Any) -> Any: + """Return cache-key material without storing raw secret values.""" + hashable_value = make_hashable(value) + if value is None or not _is_sensitive_cache_field(name): + return hashable_value + + digest = hashlib.sha256(repr(hashable_value).encode()).hexdigest() + return ("sha256", digest) + + # Optional connectors - imported lazily when methods are called # This allows the package to be imported without all vendor SDKs installed @@ -77,7 +104,7 @@ def __init__( def _get_cache_key(self, **kwargs: Any) -> frozenset[tuple[str, Any]]: """Generate a hashable cache key from kwargs.""" - hashable_kwargs = {k: make_hashable(v) for k, v in kwargs.items()} + hashable_kwargs = {k: _cache_safe_value(k, v) for k, v in kwargs.items()} return frozenset(hashable_kwargs.items()) def _get_cached_client(self, client_type: str, **kwargs: Any) -> Any | None: diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 7555ab5..20e815c 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -57,6 +57,22 @@ def test_get_cache_key(self): assert key1 == key2 assert key1 != key3 + def test_get_cache_key_hashes_sensitive_values(self): + """Sensitive cache-key fields should not expose raw credentials.""" + vc = ConnectorFabric(from_environment=False) + + key1 = vc._get_cache_key(github_token="ghp_raw_token", client_secret="zoom-secret", normal="public") + key2 = vc._get_cache_key(github_token="ghp_raw_token", client_secret="zoom-secret", normal="public") + key3 = vc._get_cache_key(github_token="ghp_other_token", client_secret="zoom-secret", normal="public") + + assert key1 == key2 + assert key1 != key3 + rendered = repr(key1) + assert "ghp_raw_token" not in rendered + assert "zoom-secret" not in rendered + assert "sha256" in rendered + assert "public" in rendered + def test_cache_client(self): """Test caching and retrieving clients.""" vc = ConnectorFabric() @@ -132,6 +148,27 @@ def __init__(self, **kwargs): assert third is not first assert mock_get_connector_class.call_count == 2 + @patch("extended_data.connectors.connectors.get_connector_class") + def test_get_connector_cache_does_not_store_raw_sensitive_kwargs(self, mock_get_connector_class): + """Generic connector caching hashes secret-like constructor arguments.""" + + class DummyConnector: + def __init__(self, **kwargs): + self.kwargs = kwargs + + vc = ConnectorFabric(from_environment=False) + mock_get_connector_class.return_value = DummyConnector + + first = vc.get_connector("dummy", api_key="key_123", password="hunter2") + second = vc.get_connector("dummy", api_key="key_123", password="hunter2") + + assert first is second + assert mock_get_connector_class.call_count == 1 + rendered_cache = repr(vc._client_cache) + assert "key_123" not in rendered_cache + assert "hunter2" not in rendered_cache + assert "sha256" in rendered_cache + def test_connector_fabric_exposes_catalog_info(self): """ConnectorFabric exposes registry-backed catalog metadata.""" vc = ConnectorFabric(from_environment=False) From 63f6656e0251a69fff5187bfbe44a2d13a9b550e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:35:02 -0500 Subject: [PATCH 236/287] feat: add data file artifact surface --- README.md | 18 ++-- docs/package-surface.md | 11 ++- src/extended_data/__init__.py | 2 + src/extended_data/io/__init__.py | 2 + src/extended_data/io/files.py | 147 +++++++++++++++++++++++++++++ tests/core/test_file_data_type.py | 58 ++++++++++++ tests/core/test_package_surface.py | 1 + 7 files changed, 231 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 9449715..6ff456e 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ CrewAI releases pull vulnerable `chromadb` versions transitively. ## Usage ```python -from extended_data import ConnectorFabric, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file +from extended_data import ConnectorFabric, DataFile, DataWorkflow, ExtendedDict, InputProvider, Logging, decode_file from extended_data.primitives import decode_json, encode_yaml, number_to_words, redact_sensitive_text logger = Logging(logger_name="example") @@ -46,6 +46,7 @@ connectors = ConnectorFabric(inputs=inputs.inputs, logger=logger) data = decode_json('{"status": "ok"}') payload = ExtendedDict(data).deep_merge({"source": "example"}) decoded_file = decode_file('{"service": {"name": "api"}}', suffix="json") +artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json") workflow = DataWorkflow.from_value(payload).then(("normalize", lambda data: data.unhump())).result() print(encode_yaml(payload)) @@ -53,6 +54,7 @@ print(decoded_file["service"]["name"].upper_first()) print(number_to_words(42)) print(redact_sensitive_text("Authorization: Bearer raw_token")) print(redact_sensitive_text("failed for user@example.com", values=["user@example.com"])) +print(artifact.metadata["encoding"]) print(workflow.as_builtin()) ``` @@ -229,12 +231,14 @@ Format encoders lower extended containers, including extended mapping keys, at the serialization boundary. `read_data_file()` is the direct file boundary for one-step read plus decode workflows; it raises for missing files and promotes structured data into Tier 2 -containers by default. `DataWorkflow` makes those compositions first-class: -read or decode data, apply named transformations, write an output artifact, and -keep the step trail in a `WorkflowResult`. Completed workflow results expose -detached promoted views with `as_extended()` plus direct `to_export_safe()` and -`wrap_for_export()` helpers. Missing workflow inputs and empty writes fail -loudly. +containers by default. `DataFile` makes one decoded file or URL artifact +first-class with promoted data, promoted source metadata, detached +`as_extended()` views, and direct write/export helpers. `DataWorkflow` makes +multi-step compositions first-class: read or decode data, apply named +transformations, write an output artifact, and keep the step trail in a +`WorkflowResult`. Completed workflow results expose detached promoted views +with `as_extended()` plus direct `to_export_safe()` and `wrap_for_export()` +helpers. Missing file inputs and empty writes fail loudly. `InputProvider` stores its active, frozen, and merged input snapshots as `ExtendedDict` values, so direct input-data access can use Tier 2 container methods. `snapshot_inputs()` returns detached active or frozen snapshots, and diff --git a/docs/package-surface.md b/docs/package-surface.md index 8d96391..7e02f4d 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -12,6 +12,7 @@ preserved in this major version. from extended_data import ( ConnectorFabric, DataDecodeError, + DataFile, DataWorkflow, ExtendedDict, ExtendedList, @@ -147,12 +148,14 @@ Tier 3 file and decode surfaces promote decoded values into Tier 2 containers by default: ```python -from extended_data import decode_file, read_data_file +from extended_data import DataFile, decode_file, read_data_file payload = decode_file('{"service": {"name": "api"}}', suffix="json") file_payload = read_data_file("config/service.json") +artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json") assert payload["service"]["name"].upper_first() == "Api" assert file_payload["service"]["name"].upper_first() == "Api" +assert artifact.metadata["encoding"].upper_first() == "Json" ``` Pass `as_extended=False` when a decode boundary should return standard Python @@ -165,6 +168,12 @@ Format encoders lower Tier 2 containers the same way before serializing JSON, YAML, TOML, and HCL output, including extended mapping keys that must become plain strings before JSON handoff. +`DataFile` is the Tier 3 artifact surface for one decoded file, URL, or +in-memory payload. It keeps `source`, `encoding`, and source metadata promoted, +returns decoded `data` as Tier 2 containers by default, exposes detached +`as_extended()` views, and writes output artifacts through the same export +boundary as `write_file()`. + `DataWorkflow` is the Tier 3 composition surface for higher-order data processing. It reads or decodes structured data through the file and format processors, promotes values into Tier 2 containers by default, applies named diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 6565934..38d64ac 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -26,6 +26,7 @@ wrap_raw_data_for_export, ) from extended_data.io.files import ( + DataFile, FilePath, clone_repository_to_temp, decode_file, @@ -125,6 +126,7 @@ def __getattr__(name: str) -> Any: "ConnectorInfo", "CursorConnector", "DataDecodeError", + "DataFile", "DataWorkflow", "ExitRunError", "ExtendedDict", diff --git a/src/extended_data/io/__init__.py b/src/extended_data/io/__init__.py index fd3e402..ad64010 100644 --- a/src/extended_data/io/__init__.py +++ b/src/extended_data/io/__init__.py @@ -3,6 +3,7 @@ from extended_data.io.base64 import base64_decode, base64_encode from extended_data.io.exporters import make_raw_data_export_safe, wrap_raw_data_for_export from extended_data.io.files import ( + DataFile, FilePath, clone_repository_to_temp, decode_file, @@ -24,6 +25,7 @@ __all__ = [ + "DataFile", "FilePath", "base64_decode", "base64_encode", diff --git a/src/extended_data/io/files.py b/src/extended_data/io/files.py index 3c22c33..aaaf489 100644 --- a/src/extended_data/io/files.py +++ b/src/extended_data/io/files.py @@ -8,6 +8,8 @@ from base64 import b64encode from collections.abc import Mapping +from copy import deepcopy +from dataclasses import dataclass, field from pathlib import Path from typing import Any, TypeAlias, cast @@ -15,6 +17,8 @@ from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError, Repo +from extended_data.containers import ExtendedDict, ExtendedString, extend_data, to_builtin +from extended_data.io.exporters import make_raw_data_export_safe, wrap_raw_data_for_export from extended_data.primitives.serialization import normalize_data_encoding @@ -22,6 +26,149 @@ """Type alias for file paths that can be represented as strings or os.PathLike objects.""" +@dataclass(frozen=True, slots=True) +class DataFile: + """Decoded file or URL data with source metadata and export helpers.""" + + source: ExtendedString + data: Any + encoding: ExtendedString + path: Path | None = None + metadata: ExtendedDict = field(default_factory=ExtendedDict) + + @classmethod + def decode( + cls, + file_data: str | memoryview | bytes | bytearray, + *, + file_path: FilePath | None = None, + suffix: str | None = None, + as_extended: bool = True, + ) -> DataFile: + """Decode in-memory data into a first-class data file artifact.""" + encoding = _resolve_data_file_encoding(file_path=file_path, suffix=suffix) + decoded = decode_file(file_data, file_path=file_path, suffix=encoding, as_extended=as_extended) + source = str(file_path) if file_path is not None else "memory" + return cls( + source=ExtendedString(source), + data=decoded, + encoding=ExtendedString(encoding), + metadata=_data_file_metadata(source=source, encoding=encoding, path=None, data=decoded), + ) + + @classmethod + def read( + cls, + file_path: FilePath, + *, + suffix: str | None = None, + as_extended: bool = True, + charset: str = "utf-8", + errors: str = "strict", + headers: Mapping[str, str] | None = None, + tld: Path | None = None, + ) -> DataFile: + """Read and decode a local file or URL into a first-class data artifact.""" + file_data = read_file( + file_path, + charset=charset, + errors=errors, + headers=headers, + tld=tld, + ) + if file_data is None: + raise FileNotFoundError(str(file_path)) + + source = str(file_path) + encoding = _resolve_data_file_encoding(file_path=file_path, suffix=suffix) + decoded = decode_file( + cast(str | memoryview | bytes | bytearray, file_data), + file_path=file_path, + suffix=encoding, + as_extended=as_extended, + ) + path = None if is_url(source) else resolve_local_path(file_path, tld=tld) + return cls( + source=ExtendedString(source), + data=decoded, + encoding=ExtendedString(encoding), + path=path, + metadata=_data_file_metadata(source=source, encoding=encoding, path=path, data=decoded), + ) + + def as_builtin(self) -> Any: + """Return the artifact data lowered to built-in Python values.""" + return to_builtin(self.data) + + def as_extended(self) -> Any: + """Return a detached copy of artifact data promoted to Extended Data containers.""" + return extend_data(deepcopy(to_builtin(self.data))) + + def to_export_safe(self, *, export_to_yaml: bool = False) -> Any: + """Return the artifact data converted to export-safe primitive values.""" + return make_raw_data_export_safe(self.data, export_to_yaml=export_to_yaml) + + def wrap_for_export(self, allow_encoding: bool | str = True, **format_opts: Any) -> str: + """Return the artifact data wrapped as an encoded export string.""" + return wrap_raw_data_for_export(self.data, allow_encoding=allow_encoding, **format_opts) + + def write( + self, + file_path: FilePath | None = None, + *, + encoding: str | None = None, + charset: str = "utf-8", + allow_empty: bool = False, + tld: Path | None = None, + ) -> DataFile: + """Write artifact data and return a new artifact for the output path.""" + target = file_path if file_path is not None else self.path + if target is None: + raise ValueError("DataFile has no local path; pass file_path to write it") + + output_path = write_file( + target, + self.data, + encoding=encoding, + charset=charset, + allow_empty=allow_empty, + tld=tld, + ) + if output_path is None: + raise ValueError("DataFile data was empty; pass allow_empty=True to write it") + + output_encoding = _resolve_data_file_encoding(file_path=output_path, suffix=encoding) + return DataFile( + source=ExtendedString(str(target)), + data=self.data, + encoding=ExtendedString(output_encoding), + path=output_path, + metadata=_data_file_metadata(source=str(target), encoding=output_encoding, path=output_path, data=self.data), + ) + + +def _resolve_data_file_encoding(*, file_path: FilePath | None = None, suffix: str | None = None) -> str: + """Return the normalized encoding used by a DataFile artifact.""" + if suffix is not None: + return normalize_data_encoding(suffix) or "raw" + if file_path is not None: + return get_encoding_for_file_path(file_path) + return "raw" + + +def _data_file_metadata(*, source: str, encoding: str, path: Path | None, data: Any) -> ExtendedDict: + """Return promoted artifact metadata for workflow and connector handoff.""" + return ExtendedDict( + { + "source": source, + "encoding": encoding, + "path": str(path) if path is not None else None, + "is_url": is_url(source), + "data_type": type(data).__name__, + } + ) + + def _github_auth_header_env(github_token: str) -> dict[str, str]: """Return Git environment config for GitHub token auth without URL credentials.""" env = os.environ.copy() diff --git a/tests/core/test_file_data_type.py b/tests/core/test_file_data_type.py index 2241fa4..ff4a7a5 100644 --- a/tests/core/test_file_data_type.py +++ b/tests/core/test_file_data_type.py @@ -27,6 +27,7 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.io.files import ( + DataFile, FilePath, clone_repository_to_temp, decode_file, @@ -575,6 +576,63 @@ def test_read_data_file_raises_for_missing_file(tmp_path: Path) -> None: read_data_file("missing.json", tld=tmp_path) +def test_data_file_read_promotes_data_and_metadata(tmp_path: Path) -> None: + """DataFile reads keep file data and source metadata in the promoted surface.""" + test_file = tmp_path / "service.json" + test_file.write_text('{"service": {"name": "api"}, "ports": [8080]}') + + artifact = DataFile.read("service.json", tld=tmp_path) + + assert artifact.source == "service.json" + assert artifact.encoding == "json" + assert artifact.path == test_file.resolve() + assert isinstance(artifact.data, ExtendedDict) + assert isinstance(artifact.data["service"], ExtendedDict) + assert isinstance(artifact.data["service"]["name"], ExtendedString) + assert isinstance(artifact.metadata, ExtendedDict) + assert artifact.metadata["encoding"].upper_first() == "Json" + assert artifact.metadata["path"] == str(test_file.resolve()) + assert artifact.metadata["is_url"] is False + assert artifact.as_builtin() == {"service": {"name": "api"}, "ports": [8080]} + + +def test_data_file_extended_view_is_detached() -> None: + """DataFile promoted views should not share mutable state with artifact data.""" + artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json") + + promoted = artifact.as_extended() + promoted["service"]["name"] = "worker" + + assert isinstance(promoted, ExtendedDict) + assert artifact.data["service"]["name"] == "api" + assert artifact.as_extended()["service"]["name"].upper_first() == "Api" + + +def test_data_file_decode_and_write_round_trip(tmp_path: Path) -> None: + """DataFile composes decode, export, write, and readback as a Tier 3 artifact.""" + artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json") + + assert isinstance(artifact.data, ExtendedDict) + assert artifact.source == "memory" + assert artifact.encoding == "json" + assert artifact.wrap_for_export(allow_encoding="json").strip().startswith("{") + + output = artifact.write("build/service.yaml", tld=tmp_path) + + assert output.path == tmp_path / "build" / "service.yaml" + assert output.encoding == "yaml" + assert isinstance(output.metadata["source"], ExtendedString) + assert read_data_file(output.path) == {"service": {"name": "api"}} + + +def test_data_file_write_without_local_target_fails_loudly() -> None: + """In-memory DataFile artifacts require an explicit output path.""" + artifact = DataFile.decode("plain text", suffix="raw") + + with pytest.raises(ValueError, match="pass file_path"): + artifact.write() + + def test_write_file_json(tmp_path: Path) -> None: """Tests writing data as JSON. diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index b35ec07..702146e 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -143,6 +143,7 @@ def test_old_monorepo_import_namespaces_are_not_preserved() -> None: def test_root_exports_first_class_integrated_surfaces() -> None: """Inputs, logging, and connector fabric are available from the root package.""" assert extended_data.DataDecodeError.__name__ == "DataDecodeError" + assert extended_data.DataFile.__name__ == "DataFile" assert extended_data.DataWorkflow.__name__ == "DataWorkflow" assert extended_data.InputProvider is InputProvider assert extended_data.Logging is Logging From 1f845de60fa9ce4433db83d9c3ba0670ff5d2138 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:40:59 -0500 Subject: [PATCH 237/287] feat: expose connector response artifacts --- README.md | 3 + docs/package-surface.md | 5 +- src/extended_data/connectors/base.py | 105 ++++++++++++++++++ src/extended_data/io/files.py | 17 ++- tests/connectors/test_base.py | 64 +++++++++++ .../test_connector_payload_contracts.py | 4 + 6 files changed, 194 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6ff456e..22b827d 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,9 @@ preserved. Connector data payloads are promoted into Tier 2 containers at the boundary, so decoded files, HTTP response data, GraphQL responses, and SDK-shaped maps can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. +Use `request_data_file()` when a connector workflow needs API response data and +non-secret provenance such as source URL, HTTP status, content type, method, +and endpoint in one `DataFile` artifact. Data-returning AI tool wrappers expose the same `ExtendedDict`/`ExtendedList` payload contract; framework factory functions still return framework tool objects. diff --git a/docs/package-surface.md b/docs/package-surface.md index 7e02f4d..c4bdca0 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -302,7 +302,10 @@ credential material. Connectors that inherit `VendorConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other verb-specific helpers to decode HTTP JSON, YAML, TOML, HCL, or text responses -through the same Tier 2 container bridge used by file and input decoding. +through the same Tier 2 container bridge used by file and input decoding. Use +`request_data_file()` when an API workflow needs the decoded data plus +non-secret response provenance such as source URL, HTTP status, content type, +method, and endpoint in a `DataFile` artifact. Connector methods that return vendor data payloads should call `extend_result()` at the return boundary, making SDK-shaped dictionaries, lists, decoded repository files, GraphQL results, and workflow-builder output diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index a30fc25..4675b3a 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -33,6 +33,8 @@ def my_operation(self) -> ExtendedDict: import time from abc import ABC +from collections.abc import Mapping +from contextlib import suppress from typing import TYPE_CHECKING, Any, ClassVar import httpx @@ -62,6 +64,7 @@ def my_operation(self) -> ExtendedDict: from pydantic import BaseModel from extended_data.containers import ExtendedDict, ExtendedList + from extended_data.io import DataFile class RateLimitError(Exception): @@ -362,6 +365,88 @@ def decode_response( return decode_file(response.content, suffix=resolved_suffix, as_extended=as_extended) + @staticmethod + def _response_source(response: httpx.Response, fallback: str | None = None) -> str: + """Return a stable source label for a response artifact.""" + if fallback: + return fallback + try: + return str(response.request.url) + except RuntimeError: + return "response" + + @staticmethod + def _response_metadata(response: httpx.Response, metadata: Mapping[str, Any] | None = None) -> dict[str, Any]: + """Return non-secret response provenance for a DataFile artifact.""" + response_metadata: dict[str, Any] = { + "status_code": response.status_code, + "content_type": response.headers.get("content-type", ""), + } + with suppress(RuntimeError): + response_metadata["method"] = response.request.method + if metadata: + response_metadata.update(metadata) + return response_metadata + + def decode_response_file( + self, + response: httpx.Response, + *, + source: str | None = None, + suffix: str | None = None, + as_extended: bool = True, + metadata: Mapping[str, Any] | None = None, + ) -> DataFile: + """Decode an HTTP response body into a DataFile artifact with provenance.""" + from extended_data.containers import ExtendedDict, ExtendedString + from extended_data.io import DataFile + + resolved_suffix = suffix or self._suffix_from_content_type(response.headers.get("content-type")) + artifact_source = self._response_source(response, fallback=source) + artifact_metadata = self._response_metadata(response, metadata=metadata) + + if not response.content: + return DataFile( + source=ExtendedString(artifact_source), + data=None, + encoding=ExtendedString(resolved_suffix or "raw"), + metadata=ExtendedDict( + { + "source": artifact_source, + "encoding": resolved_suffix or "raw", + "path": None, + "is_url": artifact_source.startswith(("http://", "https://")), + "data_type": "NoneType", + **artifact_metadata, + } + ), + ) + + if resolved_suffix is None: + return DataFile( + source=ExtendedString(artifact_source), + data=response.content, + encoding=ExtendedString("raw"), + metadata=ExtendedDict( + { + "source": artifact_source, + "encoding": "raw", + "path": None, + "is_url": artifact_source.startswith(("http://", "https://")), + "data_type": type(response.content).__name__, + **artifact_metadata, + } + ), + ) + + return DataFile.decode( + response.content, + file_path=artifact_source, + suffix=resolved_suffix, + as_extended=as_extended, + metadata=artifact_metadata, + ) + def extend_result(self, value: Any) -> Any: """Promote connector data payloads into Tier 2 containers.""" from extended_data.containers import extend_data @@ -382,6 +467,26 @@ def request_data( response = self.request(method, endpoint, headers=headers, **kwargs) return self.decode_response(response, suffix=suffix, as_extended=as_extended) + def request_data_file( + self, + method: str, + endpoint: str, + *, + headers: dict[str, str] | None = None, + suffix: str | None = None, + as_extended: bool = True, + **kwargs: Any, + ) -> DataFile: + """Make an HTTP request and return a decoded DataFile response artifact.""" + response = self.request(method, endpoint, headers=headers, **kwargs) + return self.decode_response_file( + response, + source=self._build_url(endpoint), + suffix=suffix, + as_extended=as_extended, + metadata={"method": method.upper(), "endpoint": endpoint}, + ) + def get(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP GET request.""" return self.request("GET", endpoint, **kwargs) diff --git a/src/extended_data/io/files.py b/src/extended_data/io/files.py index aaaf489..0dcc076 100644 --- a/src/extended_data/io/files.py +++ b/src/extended_data/io/files.py @@ -44,6 +44,7 @@ def decode( file_path: FilePath | None = None, suffix: str | None = None, as_extended: bool = True, + metadata: Mapping[str, Any] | None = None, ) -> DataFile: """Decode in-memory data into a first-class data file artifact.""" encoding = _resolve_data_file_encoding(file_path=file_path, suffix=suffix) @@ -53,7 +54,7 @@ def decode( source=ExtendedString(source), data=decoded, encoding=ExtendedString(encoding), - metadata=_data_file_metadata(source=source, encoding=encoding, path=None, data=decoded), + metadata=_data_file_metadata(source=source, encoding=encoding, path=None, data=decoded, extra=metadata), ) @classmethod @@ -156,9 +157,16 @@ def _resolve_data_file_encoding(*, file_path: FilePath | None = None, suffix: st return "raw" -def _data_file_metadata(*, source: str, encoding: str, path: Path | None, data: Any) -> ExtendedDict: +def _data_file_metadata( + *, + source: str, + encoding: str, + path: Path | None, + data: Any, + extra: Mapping[str, Any] | None = None, +) -> ExtendedDict: """Return promoted artifact metadata for workflow and connector handoff.""" - return ExtendedDict( + metadata = ExtendedDict( { "source": source, "encoding": encoding, @@ -167,6 +175,9 @@ def _data_file_metadata(*, source: str, encoding: str, path: Path | None, data: "data_type": type(data).__name__, } ) + if extra: + metadata.update(extra) + return metadata def _github_auth_header_env(github_token: str) -> dict[str, str]: diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index b754f89..4366725 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -13,6 +13,7 @@ from extended_data.connectors.base import ConnectorAPIError, RateLimitError, VendorConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString +from extended_data.io import DataFile from extended_data.logging import Logging @@ -117,6 +118,69 @@ def test_request_data_decodes_response_body() -> None: assert mock_client.request.call_args.args[1] == "https://api.example.com/status" +def test_decode_response_file_returns_artifact_with_metadata() -> None: + """HTTP response artifacts retain decoded data and non-secret provenance.""" + connector = _connector() + response = httpx.Response( + 200, + content=b'{"service":{"name":"api"}}', + headers={"content-type": "application/json"}, + request=httpx.Request("GET", "https://api.example.com/status"), + ) + + artifact = connector.decode_response_file(response) + + assert isinstance(artifact, DataFile) + assert artifact.source == "https://api.example.com/status" + assert artifact.encoding == "json" + assert isinstance(artifact.data, ExtendedDict) + assert artifact.data["service"]["name"].upper_first() == "Api" + assert artifact.metadata["status_code"] == 200 + assert artifact.metadata["content_type"] == "application/json" + assert artifact.metadata["method"] == "GET" + + +def test_decode_response_file_preserves_unknown_binary_payload() -> None: + """Unknown binary API responses remain bytes inside the DataFile artifact.""" + connector = _connector() + response = httpx.Response( + 200, + content=b"\x00\x01\x02", + headers={"content-type": "application/octet-stream"}, + ) + + artifact = connector.decode_response_file(response, source="https://api.example.com/blob") + + assert isinstance(artifact, DataFile) + assert artifact.source == "https://api.example.com/blob" + assert artifact.encoding == "raw" + assert artifact.data == b"\x00\x01\x02" + assert artifact.metadata["data_type"] == "bytes" + assert artifact.metadata["status_code"] == 200 + + +def test_request_data_file_adds_request_provenance() -> None: + """request_data_file combines request, decoding, and artifact provenance.""" + connector = _connector() + mock_client = MagicMock() + mock_client.request.return_value = httpx.Response( + 200, + content=b'{"ok":true}', + headers={"content-type": "application/json"}, + ) + connector._client = mock_client + + artifact = connector.request_data_file("GET", "/status") + + assert isinstance(artifact, DataFile) + assert artifact.source == "https://api.example.com/status" + assert artifact.data == {"ok": True} + assert isinstance(artifact.data, ExtendedDict) + assert artifact.metadata["method"] == "GET" + assert artifact.metadata["endpoint"] == "/status" + mock_client.request.assert_called_once() + + def test_extend_result_promotes_connector_payloads() -> None: """Connector data payloads cross into the Tier 2 container layer explicitly.""" connector = _connector() diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 69a1d56..0a4f73d 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -213,6 +213,7 @@ RAW_DATA_SURFACE_METHOD_NAMES = { "close", + "decode_response_file", "delete", "delete_data", "download", @@ -234,11 +235,13 @@ "replace_inputs", "request", "request_data", + "request_data_file", "snapshot_inputs", } RAW_DATA_SURFACE_METHODS = ( VendorConnectorBase.close, + VendorConnectorBase.decode_response_file, VendorConnectorBase.delete, VendorConnectorBase.delete_data, VendorConnectorBase.download, @@ -256,6 +259,7 @@ VendorConnectorBase.put_data, VendorConnectorBase.request, VendorConnectorBase.request_data, + VendorConnectorBase.request_data_file, InputProvider.freeze_inputs, InputProvider.get_input, InputProvider.merge_inputs, From 5d79c84d31a59b9abaa9a50dfcf6f8c2855bea07 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:44:52 -0500 Subject: [PATCH 238/287] feat: compose data files into workflows --- README.md | 11 ++++++----- docs/package-surface.md | 13 +++++++------ src/extended_data/io/files.py | 12 +++++++++++- src/extended_data/workflows/__init__.py | 8 +++++++- tests/core/test_workflows.py | 25 +++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 22b827d..e544e62 100644 --- a/README.md +++ b/README.md @@ -236,12 +236,13 @@ the serialization boundary. workflows; it raises for missing files and promotes structured data into Tier 2 containers by default. `DataFile` makes one decoded file or URL artifact first-class with promoted data, promoted source metadata, detached -`as_extended()` views, and direct write/export helpers. `DataWorkflow` makes -multi-step compositions first-class: read or decode data, apply named +`as_extended()` views, direct write/export helpers, and a `workflow()` bridge +for artifact-first processing. `DataWorkflow` makes multi-step compositions +first-class: read, decode, or accept a `DataFile` artifact, apply named transformations, write an output artifact, and keep the step trail in a -`WorkflowResult`. Completed workflow results expose detached promoted views -with `as_extended()` plus direct `to_export_safe()` and `wrap_for_export()` -helpers. Missing file inputs and empty writes fail loudly. +`WorkflowResult`. Completed workflow results expose detached promoted views with +`as_extended()` plus direct `to_export_safe()` and `wrap_for_export()` helpers. +Missing file inputs and empty writes fail loudly. `InputProvider` stores its active, frozen, and merged input snapshots as `ExtendedDict` values, so direct input-data access can use Tier 2 container methods. `snapshot_inputs()` returns detached active or frozen snapshots, and diff --git a/docs/package-surface.md b/docs/package-surface.md index c4bdca0..de2348f 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -171,15 +171,16 @@ plain strings before JSON handoff. `DataFile` is the Tier 3 artifact surface for one decoded file, URL, or in-memory payload. It keeps `source`, `encoding`, and source metadata promoted, returns decoded `data` as Tier 2 containers by default, exposes detached -`as_extended()` views, and writes output artifacts through the same export -boundary as `write_file()`. +`as_extended()` views, writes output artifacts through the same export boundary +as `write_file()`, and starts artifact-first processing with `workflow()`. `DataWorkflow` is the Tier 3 composition surface for higher-order data processing. It reads or decodes structured data through the file and format -processors, promotes values into Tier 2 containers by default, applies named -transformation steps, writes output artifacts, and returns a `WorkflowResult` -with the completed value, output path, and step trail. `WorkflowResult.as_extended()` -returns a detached promoted view of the completed value, and result-level +processors, accepts `DataFile` artifacts with `from_data_file()`, promotes +values into Tier 2 containers by default, applies named transformation steps, +writes output artifacts, and returns a `WorkflowResult` with the completed +value, output path, and step trail. `WorkflowResult.as_extended()` returns a +detached promoted view of the completed value, and result-level `to_export_safe()` / `wrap_for_export()` expose the same export boundary used by Tier 2 containers. diff --git a/src/extended_data/io/files.py b/src/extended_data/io/files.py index 0dcc076..236883a 100644 --- a/src/extended_data/io/files.py +++ b/src/extended_data/io/files.py @@ -11,7 +11,7 @@ from copy import deepcopy from dataclasses import dataclass, field from pathlib import Path -from typing import Any, TypeAlias, cast +from typing import TYPE_CHECKING, Any, TypeAlias, cast import validators @@ -22,6 +22,10 @@ from extended_data.primitives.serialization import normalize_data_encoding +if TYPE_CHECKING: + from extended_data.workflows import DataWorkflow + + FilePath: TypeAlias = str | os.PathLike[str] """Type alias for file paths that can be represented as strings or os.PathLike objects.""" @@ -113,6 +117,12 @@ def wrap_for_export(self, allow_encoding: bool | str = True, **format_opts: Any) """Return the artifact data wrapped as an encoded export string.""" return wrap_raw_data_for_export(self.data, allow_encoding=allow_encoding, **format_opts) + def workflow(self, *, as_extended: bool = True) -> DataWorkflow: + """Start a DataWorkflow from this artifact's decoded data.""" + from extended_data.workflows import DataWorkflow + + return DataWorkflow.from_data_file(self, as_extended=as_extended) + def write( self, file_path: FilePath | None = None, diff --git a/src/extended_data/workflows/__init__.py b/src/extended_data/workflows/__init__.py index ac9c82c..3237d40 100644 --- a/src/extended_data/workflows/__init__.py +++ b/src/extended_data/workflows/__init__.py @@ -10,7 +10,7 @@ from extended_data.containers import extend_data, to_builtin from extended_data.io.exporters import make_raw_data_export_safe, wrap_raw_data_for_export -from extended_data.io.files import FilePath, decode_file, read_data_file, write_file +from extended_data.io.files import DataFile, FilePath, decode_file, read_data_file, write_file WorkflowAction: TypeAlias = Callable[[Any], Any] @@ -84,6 +84,12 @@ def from_value(cls, value: Any, *, as_extended: bool = True) -> DataWorkflow: """Start a workflow from an in-memory value.""" return cls(value, steps=("value",), as_extended=as_extended) + @classmethod + def from_data_file(cls, artifact: DataFile, *, as_extended: bool = True) -> DataWorkflow: + """Start a workflow from a decoded DataFile artifact.""" + value = artifact.as_extended() if as_extended else artifact.as_builtin() + return cls(value, steps=(f"data_file:{artifact.source}",), as_extended=as_extended) + @classmethod def decode( cls, diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index dbc452c..4418bf6 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -10,6 +10,7 @@ import pytest from extended_data import ( + DataFile, DataWorkflow, ExtendedDict, ExtendedList, @@ -87,6 +88,30 @@ def select_services(data: ExtendedDict) -> ExtendedDict: } +def test_data_workflow_starts_from_data_file_artifact() -> None: + """DataFile artifacts can start named workflows without manual .data plumbing.""" + artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json", metadata={"status_code": 200}) + + workflow = artifact.workflow().then(("project-name", lambda data: {"name": data["service"]["name"]})) + result = workflow.result() + + assert workflow.steps == ("data_file:memory", "project-name") + assert isinstance(workflow.value, ExtendedDict) + assert result.value["name"].upper_first() == "Api" + assert artifact.metadata["status_code"] == 200 + + +def test_data_workflow_from_data_file_can_return_builtin_state() -> None: + """DataFile-to-workflow composition can explicitly lower to plain Python values.""" + artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json") + + workflow = DataWorkflow.from_data_file(artifact, as_extended=False) + + assert workflow.steps == ("data_file:memory",) + assert isinstance(workflow.value, dict) + assert not isinstance(workflow.value, ExtendedDict) + + def test_data_workflow_preserves_extended_policy_after_file_decode(tmp_path: Path) -> None: """Decoded workflows keep promoting plain transform outputs by default.""" write_file("config/service.json", {"service": {"name": "api"}}, tld=tmp_path) From b862dbb4c61de148e799b4ff3b6a3db226ebdca8 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:50:53 -0500 Subject: [PATCH 239/287] feat: preserve workflow metadata --- README.md | 8 +- docs/package-surface.md | 11 ++- src/extended_data/workflows/__init__.py | 100 ++++++++++++++++++++---- tests/core/test_workflows.py | 38 ++++++++- 4 files changed, 132 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index e544e62..5d61edd 100644 --- a/README.md +++ b/README.md @@ -240,9 +240,11 @@ first-class with promoted data, promoted source metadata, detached for artifact-first processing. `DataWorkflow` makes multi-step compositions first-class: read, decode, or accept a `DataFile` artifact, apply named transformations, write an output artifact, and keep the step trail in a -`WorkflowResult`. Completed workflow results expose detached promoted views with -`as_extended()` plus direct `to_export_safe()` and `wrap_for_export()` helpers. -Missing file inputs and empty writes fail loudly. +`WorkflowResult`. Workflow metadata is promoted and preserved across +transformations, lowering/promoting, and writes, so file and API provenance can +stay with the result. Completed workflow results expose detached promoted views +with `as_extended()` plus direct `to_export_safe()` and `wrap_for_export()` +helpers. Missing file inputs and empty writes fail loudly. `InputProvider` stores its active, frozen, and merged input snapshots as `ExtendedDict` values, so direct input-data access can use Tier 2 container methods. `snapshot_inputs()` returns detached active or frozen snapshots, and diff --git a/docs/package-surface.md b/docs/package-surface.md index de2348f..d95cd64 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -179,10 +179,12 @@ processing. It reads or decodes structured data through the file and format processors, accepts `DataFile` artifacts with `from_data_file()`, promotes values into Tier 2 containers by default, applies named transformation steps, writes output artifacts, and returns a `WorkflowResult` with the completed -value, output path, and step trail. `WorkflowResult.as_extended()` returns a -detached promoted view of the completed value, and result-level -`to_export_safe()` / `wrap_for_export()` expose the same export boundary used by -Tier 2 containers. +value, output path, step trail, and promoted metadata. Workflow metadata is +preserved across `then()`, `run()`, `as_builtin()`, `as_extended()`, and +`write()`, so file and API provenance from `DataFile` artifacts remains attached +to the result. `WorkflowResult.as_extended()` returns a detached promoted view +of the completed value, and result-level `to_export_safe()` / +`wrap_for_export()` expose the same export boundary used by Tier 2 containers. ```python from extended_data import DataWorkflow @@ -195,6 +197,7 @@ result = ( ) assert result.steps == ("read:config/base.yaml", "merge-env", "write:build/config.yaml") +assert result.metadata["source"] == "config/base.yaml" assert result.as_extended()["service"]["name"].upper_first() == "Api" assert result.to_export_safe()["service"]["name"] == "api" ``` diff --git a/src/extended_data/workflows/__init__.py b/src/extended_data/workflows/__init__.py index 3237d40..c37a979 100644 --- a/src/extended_data/workflows/__init__.py +++ b/src/extended_data/workflows/__init__.py @@ -2,15 +2,15 @@ from __future__ import annotations -from collections.abc import Callable, Iterable +from collections.abc import Callable, Iterable, Mapping from copy import deepcopy -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Any, TypeAlias -from extended_data.containers import extend_data, to_builtin +from extended_data.containers import ExtendedDict, extend_data, to_builtin from extended_data.io.exporters import make_raw_data_export_safe, wrap_raw_data_for_export -from extended_data.io.files import DataFile, FilePath, decode_file, read_data_file, write_file +from extended_data.io.files import DataFile, FilePath, write_file WorkflowAction: TypeAlias = Callable[[Any], Any] @@ -29,13 +29,32 @@ def __call__(self, value: Any) -> Any: return self.action(value) -@dataclass(frozen=True, slots=True) +@dataclass(frozen=True, slots=True, init=False) class WorkflowResult: """The completed value and audit trail for a data workflow.""" value: Any - steps: tuple[str, ...] = () - output_path: Path | None = None + steps: tuple[str, ...] + output_path: Path | None + _metadata: ExtendedDict = field(repr=False) + + def __init__( + self, + value: Any, + steps: Iterable[str] = (), + output_path: Path | None = None, + metadata: Mapping[str, Any] | None = None, + ) -> None: + """Store workflow metadata as promoted detached data.""" + object.__setattr__(self, "value", value) + object.__setattr__(self, "steps", tuple(steps)) + object.__setattr__(self, "output_path", output_path) + object.__setattr__(self, "_metadata", ExtendedDict(metadata or {})) + + @property + def metadata(self) -> ExtendedDict: + """Return a detached promoted copy of workflow metadata.""" + return ExtendedDict(to_builtin(self._metadata)) def as_builtin(self) -> Any: """Return the workflow value lowered to built-in Python containers.""" @@ -63,11 +82,13 @@ def __init__( *, steps: Iterable[str] = (), as_extended: bool = True, + metadata: Mapping[str, Any] | None = None, ) -> None: """Create a workflow from an existing value.""" self._value = extend_data(value) if as_extended else value self._steps = tuple(steps) self._as_extended = as_extended + self._metadata = ExtendedDict(metadata or {}) @property def value(self) -> Any: @@ -79,16 +100,32 @@ def steps(self) -> tuple[str, ...]: """Return the names of executed workflow steps.""" return self._steps + @property + def metadata(self) -> ExtendedDict: + """Return a detached promoted copy of workflow metadata.""" + return ExtendedDict(to_builtin(self._metadata)) + @classmethod - def from_value(cls, value: Any, *, as_extended: bool = True) -> DataWorkflow: + def from_value( + cls, + value: Any, + *, + as_extended: bool = True, + metadata: Mapping[str, Any] | None = None, + ) -> DataWorkflow: """Start a workflow from an in-memory value.""" - return cls(value, steps=("value",), as_extended=as_extended) + return cls(value, steps=("value",), as_extended=as_extended, metadata=metadata) @classmethod def from_data_file(cls, artifact: DataFile, *, as_extended: bool = True) -> DataWorkflow: """Start a workflow from a decoded DataFile artifact.""" value = artifact.as_extended() if as_extended else artifact.as_builtin() - return cls(value, steps=(f"data_file:{artifact.source}",), as_extended=as_extended) + return cls( + value, + steps=(f"data_file:{artifact.source}",), + as_extended=as_extended, + metadata=artifact.metadata, + ) @classmethod def decode( @@ -98,10 +135,22 @@ def decode( file_path: FilePath | None = None, suffix: str | None = None, as_extended: bool = True, + metadata: Mapping[str, Any] | None = None, ) -> DataWorkflow: """Start a workflow by decoding structured text or bytes.""" - decoded = decode_file(file_data, file_path=file_path, suffix=suffix, as_extended=as_extended) - return cls(decoded, steps=(_decode_step_name(file_path=file_path, suffix=suffix),), as_extended=as_extended) + artifact = DataFile.decode( + file_data, + file_path=file_path, + suffix=suffix, + as_extended=as_extended, + metadata=metadata, + ) + return cls( + artifact.data, + steps=(_decode_step_name(file_path=file_path, suffix=suffix),), + as_extended=as_extended, + metadata=artifact.metadata, + ) @classmethod def from_file( @@ -115,7 +164,7 @@ def from_file( tld: Path | None = None, ) -> DataWorkflow: """Read and decode a local file or URL into a workflow.""" - decoded = read_data_file( + artifact = DataFile.read( file_path, suffix=suffix, as_extended=as_extended, @@ -123,7 +172,12 @@ def from_file( errors=errors, tld=tld, ) - return cls(decoded, steps=(f"read:{file_path}",), as_extended=as_extended) + return cls( + artifact.data, + steps=(f"read:{file_path}",), + as_extended=as_extended, + metadata=artifact.metadata, + ) def then( self, @@ -142,6 +196,7 @@ def then( next_value, steps=(*self._steps, workflow_step.name), as_extended=should_extend, + metadata=self._metadata, ) def run(self, *steps: StepLike, as_extended: bool | None = None) -> DataWorkflow: @@ -153,15 +208,25 @@ def run(self, *steps: StepLike, as_extended: bool | None = None) -> DataWorkflow def as_builtin(self) -> DataWorkflow: """Return the next workflow state with built-in Python containers.""" - return DataWorkflow(to_builtin(self._value), steps=(*self._steps, "to_builtin"), as_extended=False) + return DataWorkflow( + to_builtin(self._value), + steps=(*self._steps, "to_builtin"), + as_extended=False, + metadata=self._metadata, + ) def as_extended(self) -> DataWorkflow: """Return the next workflow state with Extended Data containers.""" - return DataWorkflow(extend_data(self._value), steps=(*self._steps, "as_extended"), as_extended=True) + return DataWorkflow( + extend_data(self._value), + steps=(*self._steps, "as_extended"), + as_extended=True, + metadata=self._metadata, + ) def result(self) -> WorkflowResult: """Return a completed workflow result without writing an output artifact.""" - return WorkflowResult(value=self._value, steps=self._steps) + return WorkflowResult(value=self._value, steps=self._steps, metadata=self.metadata) def write( self, @@ -190,6 +255,7 @@ def write( value=self._value, steps=(*self._steps, f"write:{file_path}"), output_path=output_path, + metadata=self.metadata, ) diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 4418bf6..e39f45f 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -97,8 +97,10 @@ def test_data_workflow_starts_from_data_file_artifact() -> None: assert workflow.steps == ("data_file:memory", "project-name") assert isinstance(workflow.value, ExtendedDict) + assert workflow.metadata["status_code"] == 200 assert result.value["name"].upper_first() == "Api" - assert artifact.metadata["status_code"] == 200 + assert result.metadata["status_code"] == 200 + assert result.metadata["source"] == "memory" def test_data_workflow_from_data_file_can_return_builtin_state() -> None: @@ -110,6 +112,40 @@ def test_data_workflow_from_data_file_can_return_builtin_state() -> None: assert workflow.steps == ("data_file:memory",) assert isinstance(workflow.value, dict) assert not isinstance(workflow.value, ExtendedDict) + assert workflow.metadata["encoding"] == "json" + + +def test_data_workflow_metadata_survives_state_transitions(tmp_path: Path) -> None: + """Workflow metadata should stay promoted through transforms, lowering, and writes.""" + write_file("config/service.json", {"service": {"name": "api"}}, tld=tmp_path) + + workflow = ( + DataWorkflow.from_file("config/service.json", tld=tmp_path) + .then(("project", lambda data: {"name": data["service"]["name"]})) + .as_builtin() + .as_extended() + ) + result = workflow.write("build/service.json", tld=tmp_path) + + assert workflow.metadata["source"] == "config/service.json" + assert workflow.metadata["encoding"] == "json" + assert workflow.metadata["path"] == str((tmp_path / "config" / "service.json").resolve()) + assert result.metadata == workflow.metadata + assert result.output_path == tmp_path / "build" / "service.json" + + +def test_workflow_metadata_views_are_detached() -> None: + """Workflow and result metadata accessors should not expose mutable internals.""" + workflow = DataWorkflow.from_value({"service": "api"}, metadata={"source": {"name": "payload"}}) + result = workflow.result() + + workflow_metadata = workflow.metadata + result_metadata = result.metadata + workflow_metadata["source"]["name"] = "mutated" + result_metadata["source"]["name"] = "also-mutated" + + assert workflow.metadata["source"]["name"] == "payload" + assert result.metadata["source"]["name"] == "payload" def test_data_workflow_preserves_extended_policy_after_file_decode(tmp_path: Path) -> None: From 40f9bca98e2d7e429103d98d36caafa2cd4a8e51 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 20:54:31 -0500 Subject: [PATCH 240/287] fix: redact data file provenance --- README.md | 6 ++++-- docs/package-surface.md | 3 +++ src/extended_data/io/files.py | 21 ++++++++++++-------- src/extended_data/primitives/redaction.py | 2 +- tests/core/test_file_data_type.py | 24 +++++++++++++++++++++++ tests/core/test_redaction.py | 11 +++++++++++ 6 files changed, 56 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 5d61edd..22e578f 100644 --- a/README.md +++ b/README.md @@ -237,8 +237,10 @@ workflows; it raises for missing files and promotes structured data into Tier 2 containers by default. `DataFile` makes one decoded file or URL artifact first-class with promoted data, promoted source metadata, detached `as_extended()` views, direct write/export helpers, and a `workflow()` bridge -for artifact-first processing. `DataWorkflow` makes multi-step compositions -first-class: read, decode, or accept a `DataFile` artifact, apply named +for artifact-first processing. DataFile source labels and metadata use the +shared Tier 1 redaction policy before they enter workflow steps or result +metadata. `DataWorkflow` makes multi-step compositions first-class: read, +decode, or accept a `DataFile` artifact, apply named transformations, write an output artifact, and keep the step trail in a `WorkflowResult`. Workflow metadata is promoted and preserved across transformations, lowering/promoting, and writes, so file and API provenance can diff --git a/docs/package-surface.md b/docs/package-surface.md index d95cd64..7032762 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -173,6 +173,9 @@ in-memory payload. It keeps `source`, `encoding`, and source metadata promoted, returns decoded `data` as Tier 2 containers by default, exposes detached `as_extended()` views, writes output artifacts through the same export boundary as `write_file()`, and starts artifact-first processing with `workflow()`. +Source labels and metadata are redacted with the Tier 1 redaction policy before +they enter workflow step names or `WorkflowResult.metadata`; caller-supplied +metadata cannot override the sanitized core `source` and `path` fields. `DataWorkflow` is the Tier 3 composition surface for higher-order data processing. It reads or decodes structured data through the file and format diff --git a/src/extended_data/io/files.py b/src/extended_data/io/files.py index 236883a..723684f 100644 --- a/src/extended_data/io/files.py +++ b/src/extended_data/io/files.py @@ -19,6 +19,7 @@ from extended_data.containers import ExtendedDict, ExtendedString, extend_data, to_builtin from extended_data.io.exporters import make_raw_data_export_safe, wrap_raw_data_for_export +from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text from extended_data.primitives.serialization import normalize_data_encoding @@ -55,7 +56,7 @@ def decode( decoded = decode_file(file_data, file_path=file_path, suffix=encoding, as_extended=as_extended) source = str(file_path) if file_path is not None else "memory" return cls( - source=ExtendedString(source), + source=ExtendedString(_safe_data_file_source(source)), data=decoded, encoding=ExtendedString(encoding), metadata=_data_file_metadata(source=source, encoding=encoding, path=None, data=decoded, extra=metadata), @@ -94,7 +95,7 @@ def read( ) path = None if is_url(source) else resolve_local_path(file_path, tld=tld) return cls( - source=ExtendedString(source), + source=ExtendedString(_safe_data_file_source(source)), data=decoded, encoding=ExtendedString(encoding), path=path, @@ -150,7 +151,7 @@ def write( output_encoding = _resolve_data_file_encoding(file_path=output_path, suffix=encoding) return DataFile( - source=ExtendedString(str(target)), + source=ExtendedString(_safe_data_file_source(str(target))), data=self.data, encoding=ExtendedString(output_encoding), path=output_path, @@ -167,6 +168,11 @@ def _resolve_data_file_encoding(*, file_path: FilePath | None = None, suffix: st return "raw" +def _safe_data_file_source(source: str) -> str: + """Return a source label safe for metadata and workflow steps.""" + return redact_sensitive_text(source) + + def _data_file_metadata( *, source: str, @@ -176,17 +182,16 @@ def _data_file_metadata( extra: Mapping[str, Any] | None = None, ) -> ExtendedDict: """Return promoted artifact metadata for workflow and connector handoff.""" - metadata = ExtendedDict( + metadata = ExtendedDict(redact_sensitive_data(extra or {})) + metadata.update( { - "source": source, + "source": _safe_data_file_source(source), "encoding": encoding, - "path": str(path) if path is not None else None, + "path": redact_sensitive_text(path) if path is not None else None, "is_url": is_url(source), "data_type": type(data).__name__, } ) - if extra: - metadata.update(extra) return metadata diff --git a/src/extended_data/primitives/redaction.py b/src/extended_data/primitives/redaction.py index 1f2eeb7..5187776 100644 --- a/src/extended_data/primitives/redaction.py +++ b/src/extended_data/primitives/redaction.py @@ -18,7 +18,7 @@ rf"(?i)([\"']?(?:{SENSITIVE_KEY_PATTERN})[\"']?\s*:\s*)" rf"([\"'][^\"']*[\"']|Bearer\s+[^\s,;}}\]]+|[^,\s}}\]]+)" ) -KEY_VALUE_SECRET_RE = re.compile(rf"(?i)(\b(?:{SENSITIVE_KEY_PATTERN})\b\s*=\s*)([^\s,;]+)") +KEY_VALUE_SECRET_RE = re.compile(rf"(?i)(\b(?:{SENSITIVE_KEY_PATTERN})\b\s*=\s*)([^\s,;&]+)") CLI_SECRET_RE = re.compile(rf"(?i)(--(?:{SENSITIVE_KEY_PATTERN})(?:=|\s+))(\S+)") BEARER_SECRET_RE = re.compile(r"(?i)(\bBearer\s+)[A-Za-z0-9._~+/=-]+") REDACTED = "[REDACTED]" diff --git a/tests/core/test_file_data_type.py b/tests/core/test_file_data_type.py index ff4a7a5..4dc17cc 100644 --- a/tests/core/test_file_data_type.py +++ b/tests/core/test_file_data_type.py @@ -625,6 +625,30 @@ def test_data_file_decode_and_write_round_trip(tmp_path: Path) -> None: assert read_data_file(output.path) == {"service": {"name": "api"}} +def test_data_file_redacts_secret_bearing_source_and_metadata() -> None: + """DataFile provenance should be safe to carry into workflow metadata.""" + artifact = DataFile.decode( + '{"service": {"name": "api"}}', + file_path="https://example.com/config.json?api_key=key_123®ion=us-east-1", + suffix="json", + metadata={ + "authorization": "Bearer raw_token", + "nested": {"client_secret": "secret_456"}, + "source": "password=hunter2", + }, + ) + workflow = artifact.workflow() + + assert "key_123" not in artifact.source + assert "region=us-east-1" in artifact.source + assert artifact.metadata["source"] == artifact.source + assert artifact.metadata["authorization"] == "[REDACTED]" + assert artifact.metadata["nested"]["client_secret"] == "[REDACTED]" + assert "hunter2" not in artifact.metadata["source"] + assert workflow.metadata["source"] == artifact.source + assert "key_123" not in workflow.steps[0] + + def test_data_file_write_without_local_target_fails_loudly() -> None: """In-memory DataFile artifacts require an explicit output path.""" artifact = DataFile.decode("plain text", suffix="raw") diff --git a/tests/core/test_redaction.py b/tests/core/test_redaction.py index e29b40a..e287a52 100644 --- a/tests/core/test_redaction.py +++ b/tests/core/test_redaction.py @@ -31,6 +31,17 @@ def test_redact_sensitive_text_accepts_known_diagnostic_values() -> None: assert redacted.count("[REDACTED]") == 3 +def test_redact_sensitive_text_preserves_non_secret_url_query_values() -> None: + """Key/value redaction should not consume unrelated URL query parameters.""" + message = "https://example.com/config.json?api_key=key_123®ion=us-east-1" + + redacted = redact_sensitive_text(message) + + assert "key_123" not in redacted + assert "api_key=[REDACTED]" in redacted + assert "region=us-east-1" in redacted + + def test_redact_sensitive_text_flattens_nested_known_values() -> None: """Caller-provided diagnostic context can be nested like CLI or MCP arguments.""" message = "failed for user@example.com at /tmp/private%2Fpath using prompt Fix login" From 00f4bb21e2bcf6e7273b222f1fb6e6021ee34e91 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:00:55 -0500 Subject: [PATCH 241/287] fix: decode cursor responses through data boundary --- .../connectors/cursor/__init__.py | 21 +++++++--- tests/connectors/test_cursor.py | 40 ++++++++++++++++++- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index 39e1aaf..5e23d5a 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -393,7 +393,7 @@ def _request_api( endpoint: str, method: str = "GET", json_body: dict[str, Any] | None = None, - ) -> dict[str, Any] | None: + ) -> ExtendedDict | None: """Make an HTTP request to the Cursor API. Args: @@ -423,7 +423,12 @@ def _request_api( if not text or not text.strip(): return None - return response.json() + decoded = self.decode_response(response, suffix="json", as_extended=True) + if decoded is None: + return None + if not isinstance(decoded, Mapping): + raise self._unexpected_response_error("_request_api", decoded, endpoint, json_body) + return ExtendedDict(decoded) except httpx.TimeoutException: raise CursorAPIError(f"Request timeout after {self._timeout}s") from None @@ -455,8 +460,9 @@ def _parse_model_response( *sensitive_values: Any, ) -> dict[str, Any]: """Validate one Cursor response model and return a JSON payload.""" + model_data = to_builtin(data) try: - return self._model_payload(model_type.model_validate(data)) + return self._model_payload(model_type.model_validate(model_data)) except ValidationError: raise self._unexpected_response_error(operation, data, *sensitive_values) from None @@ -469,7 +475,8 @@ def _parse_model_list( *sensitive_values: Any, ) -> list[dict[str, Any]]: """Validate a Cursor response list and return JSON payloads.""" - items = data.get(key, []) if isinstance(data, Mapping) else None + model_data = to_builtin(data) + items = model_data.get(key, []) if isinstance(model_data, Mapping) else None if not isinstance(items, list): raise self._unexpected_response_error(operation, data, *sensitive_values) @@ -539,7 +546,8 @@ def get_agent_conversation(self, agent_id: str) -> ExtendedDict: if not data: return self.extend_result(self._model_payload(Conversation(agent_id=agent_id, messages=[]))) - message_data = data.get("messages", []) if isinstance(data, Mapping) else None + plain_data = to_builtin(data) + message_data = plain_data.get("messages", []) if isinstance(plain_data, Mapping) else None if not isinstance(message_data, list): raise self._unexpected_response_error("get_agent_conversation", data, agent_id) @@ -706,7 +714,8 @@ def list_models(self) -> ExtendedList[ExtendedString]: if not data: return self.extend_result([]) - models = data.get("models", []) if isinstance(data, Mapping) else None + plain_data = to_builtin(data) + models = plain_data.get("models", []) if isinstance(plain_data, Mapping) else None if not isinstance(models, list) or any(not isinstance(model, str) for model in models): raise self._unexpected_response_error("list_models", data) return self.extend_result(models) diff --git a/tests/connectors/test_cursor.py b/tests/connectors/test_cursor.py index 2b200eb..ddd6d85 100644 --- a/tests/connectors/test_cursor.py +++ b/tests/connectors/test_cursor.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import os from unittest.mock import MagicMock, patch @@ -44,7 +45,8 @@ def _json_response(payload: object) -> MagicMock: response.status_code = 200 response.is_success = True response.headers = {"content-type": "application/json"} - response.text = "{}" + response.text = json.dumps(payload) + response.content = response.text.encode() response.json.return_value = payload return response @@ -218,6 +220,33 @@ def test_conversation_model(self): assert len(conv.messages) == 2 +class TestTransport: + """Tests for Cursor HTTP transport integration with Extended Data.""" + + @patch("extended_data.connectors.cursor.httpx.Client") + def test_request_api_decodes_response_through_extended_data_boundary(self, mock_client_class): + """Private transport should decode JSON bytes into ExtendedDict payloads.""" + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.text = '{"service": {"name": "api"}}' + mock_response.content = mock_response.text.encode() + mock_response.json.side_effect = AssertionError("raw response.json() should not be used") + mock_client.request.return_value = mock_response + + connector = CursorConnector(api_key="test-key") + payload = connector._request_api("/status") + + assert isinstance(payload, ExtendedDict) + assert isinstance(payload["service"], ExtendedDict) + assert isinstance(payload["service"]["name"], ExtendedString) + assert payload["service"]["name"].upper_first() == "Api" + + class TestCursorConnector: """Tests for CursorConnector.""" @@ -254,6 +283,7 @@ def test_list_agents(self, mock_client_class): mock_response.is_success = True mock_response.headers = {"content-type": "application/json"} mock_response.text = '{"agents": [{"id": "agent-1", "state": "running"}]}' + mock_response.content = mock_response.text.encode() mock_response.json.return_value = {"agents": [{"id": "agent-1", "state": "running"}]} mock_client.request.return_value = mock_response @@ -278,6 +308,7 @@ def test_get_agent_status_returns_extended_dict(self, mock_client_class): mock_response.is_success = True mock_response.headers = {"content-type": "application/json"} mock_response.text = '{"id": "agent-1", "state": "finished", "pr_url": "https://github.com/org/repo/pull/1"}' + mock_response.content = mock_response.text.encode() mock_response.json.return_value = { "id": "agent-1", "state": "finished", @@ -329,6 +360,7 @@ def test_get_agent_conversation_returns_extended_dict(self, mock_client_class): mock_response.is_success = True mock_response.headers = {"content-type": "application/json"} mock_response.text = '{"messages": [{"role": "user", "content": "hello"}]}' + mock_response.content = mock_response.text.encode() mock_response.json.return_value = {"messages": [{"role": "user", "content": "hello"}]} mock_client.request.return_value = mock_response @@ -350,6 +382,8 @@ def test_launch_agent(self, mock_client_class): mock_response.status_code = 200 mock_response.is_success = True mock_response.headers = {"content-type": "application/json"} + mock_response.text = '{"id": "new-agent", "state": "pending"}' + mock_response.content = mock_response.text.encode() mock_response.json.return_value = {"id": "new-agent", "state": "pending"} mock_client.request.return_value = mock_response @@ -385,6 +419,7 @@ def test_launch_agent_redacts_repository_diagnostics_but_preserves_payload(self, mock_response.is_success = True mock_response.headers = {"content-type": "application/json"} mock_response.text = '{"id": "new-agent", "state": "pending"}' + mock_response.content = mock_response.text.encode() mock_response.json.return_value = {"id": "new-agent", "state": "pending"} mock_client.request.return_value = mock_response @@ -422,6 +457,7 @@ def test_list_repositories_returns_extended_list(self, mock_client_class): mock_response.is_success = True mock_response.headers = {"content-type": "application/json"} mock_response.text = '{"repositories": [{"name": "org/repo", "default_branch": "main"}]}' + mock_response.content = mock_response.text.encode() mock_response.json.return_value = {"repositories": [{"name": "org/repo", "default_branch": "main"}]} mock_client.request.return_value = mock_response @@ -443,6 +479,7 @@ def test_list_models_returns_extended_list(self, mock_client_class): mock_response.is_success = True mock_response.headers = {"content-type": "application/json"} mock_response.text = '{"models": ["cursor-small", "cursor-large"]}' + mock_response.content = mock_response.text.encode() mock_response.json.return_value = {"models": ["cursor-small", "cursor-large"]} mock_client.request.return_value = mock_response @@ -464,6 +501,7 @@ def test_list_models_empty_response_returns_extended_list(self, mock_client_clas mock_response.is_success = True mock_response.headers = {"content-type": "application/json"} mock_response.text = "{}" + mock_response.content = mock_response.text.encode() mock_response.json.return_value = {} mock_client.request.return_value = mock_response From 6b2e8201d774cd6193e91c272b1d6bfdf5c04406 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:13:10 -0500 Subject: [PATCH 242/287] fix: decode connector responses through data boundary --- README.md | 3 + docs/package-surface.md | 5 +- .../connectors/anthropic/__init__.py | 20 +-- src/extended_data/connectors/google/jules.py | 12 +- src/extended_data/connectors/meshy/base.py | 20 ++- src/extended_data/connectors/zoom/__init__.py | 14 +- tests/connectors/meshy/test_task_ids.py | 17 +-- tests/connectors/test_anthropic.py | 56 ++++---- tests/connectors/test_google_jules.py | 8 +- tests/connectors/test_zoom_connector.py | 132 +++++++----------- 10 files changed, 137 insertions(+), 150 deletions(-) diff --git a/README.md b/README.md index 22e578f..2e8258b 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,9 @@ preserved. Connector data payloads are promoted into Tier 2 containers at the boundary, so decoded files, HTTP response data, GraphQL responses, and SDK-shaped maps can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods immediately. +Built-in HTTP connectors decode response bytes through the same file/data +decoding primitives instead of bypassing the boundary with transport-specific +JSON helpers. Use `request_data_file()` when a connector workflow needs API response data and non-secret provenance such as source URL, HTTP status, content type, method, and endpoint in one `DataFile` artifact. diff --git a/docs/package-surface.md b/docs/package-surface.md index 7032762..8997cb6 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -309,7 +309,10 @@ credential material. Connectors that inherit `VendorConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other verb-specific helpers to decode HTTP JSON, YAML, TOML, HCL, or text responses -through the same Tier 2 container bridge used by file and input decoding. Use +through the same Tier 2 container bridge used by file and input decoding. +Built-in connectors that parse HTTP JSON responses should decode response bytes +through these shared data primitives and lower to built-in values only at model +validation or redaction boundaries. Use `request_data_file()` when an API workflow needs the decoded data plus non-secret response provenance such as source URL, HTTP status, content type, method, and endpoint in a `DataFile` artifact. diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index 0584e1e..cb037f6 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -282,9 +282,11 @@ def _handle_error(self, response: httpx.Response) -> None: """ status_code = response.status_code try: - error_data = response.json() - error_type = error_data.get("error", {}).get("type", "unknown") - message = error_data.get("error", {}).get("message", response.text) + error_data = self.decode_response(response, suffix="json", as_extended=True) + raw_error = error_data.get("error", {}) if isinstance(error_data, Mapping) else {} + error = raw_error if isinstance(raw_error, Mapping) else {} + error_type = error.get("type", "unknown") + message = error.get("message", response.text) except Exception: error_type = "unknown" message = response.text @@ -313,11 +315,11 @@ def _unexpected_response_error(operation: str, data: Any, *, status_code: int | def _response_json(self, response: httpx.Response, operation: str) -> Any: """Parse a response body or raise a redacted malformed-response error.""" try: - return response.json() - except Exception as exc: + return self.decode_response(response, suffix="json", as_extended=True) + except Exception: raise self._unexpected_response_error( operation, - exc, + response.text, status_code=response.status_code, ) from None @@ -330,7 +332,7 @@ def _parse_model_response( """Validate one Anthropic model response and return a JSON payload.""" data = self._response_json(response, operation) try: - return self._model_payload(model_type.model_validate(data)) + return self._model_payload(model_type.model_validate(to_builtin(data))) except ValidationError: raise self._unexpected_response_error( operation, @@ -487,7 +489,7 @@ def list_models(self) -> ExtendedList[ExtendedDict]: data = self._response_json(response, "list_models") models_data = data.get("data") if isinstance(data, Mapping) else None - if not isinstance(models_data, list): + if not isinstance(models_data, (list, ExtendedList)): raise self._unexpected_response_error( "list_models", data, @@ -495,7 +497,7 @@ def list_models(self) -> ExtendedList[ExtendedDict]: ) try: - parsed_models = [self._model_payload(Model.model_validate(model_data)) for model_data in models_data] + parsed_models = [self._model_payload(Model.model_validate(to_builtin(model_data))) for model_data in models_data] except ValidationError: raise self._unexpected_response_error( "list_models", diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index c2005e0..ec6dce4 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -37,7 +37,7 @@ from extended_data.connectors.base import VendorConnectorBase from extended_data.connectors.google._diagnostics import safe_google_text -from extended_data.containers import ExtendedDict, ExtendedList +from extended_data.containers import ExtendedDict, ExtendedList, to_builtin from extended_data.primitives.redaction import redact_sensitive_data @@ -172,7 +172,7 @@ def _handle_response(self, response: httpx.Response, operation: str, *sensitive_ data = self._response_json(response, operation, diagnostic_values) if not isinstance(data, Mapping): raise self._unexpected_response_error(operation, data, response.status_code, diagnostic_values) - return dict(data) + return to_builtin(data) def _raise_api_error( self, @@ -195,7 +195,7 @@ def _raise_api_error( raise JulesError( safe_google_text(error.get("message", response.text), diagnostic_values), error_code, - redact_sensitive_data(error.get("details"), values=diagnostic_values), + redact_sensitive_data(to_builtin(error.get("details")), values=diagnostic_values), ) def _response_json(self, response: httpx.Response, operation: str, diagnostic_values: list[Any]) -> Any: @@ -203,7 +203,7 @@ def _response_json(self, response: httpx.Response, operation: str, diagnostic_va if not response.content: return {} try: - return response.json() + return self.decode_response(response, suffix="json", as_extended=True) except Exception: raise self._unexpected_response_error( operation, @@ -250,7 +250,7 @@ def _parse_model_response( ) -> dict[str, Any]: """Validate one Jules response model and return a JSON payload.""" try: - return self._model_payload(model_type.model_validate(data)) + return self._model_payload(model_type.model_validate(to_builtin(data))) except ValidationError: raise self._unexpected_response_error( operation, @@ -273,7 +273,7 @@ def _parse_model_list( raise self._unexpected_response_error(operation, data, 200, list(sensitive_values)) try: - return [self._model_payload(model_type.model_validate(item)) for item in items] + return [self._model_payload(model_type.model_validate(to_builtin(item))) for item in items] except ValidationError: raise self._unexpected_response_error(operation, data, 200, list(sensitive_values)) from None diff --git a/src/extended_data/connectors/meshy/base.py b/src/extended_data/connectors/meshy/base.py index cb0f99d..4390c94 100644 --- a/src/extended_data/connectors/meshy/base.py +++ b/src/extended_data/connectors/meshy/base.py @@ -22,8 +22,9 @@ from pydantic import BaseModel, ValidationError from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential -from extended_data.containers import ExtendedDict, ExtendedString, extend_data +from extended_data.containers import ExtendedDict, ExtendedString, extend_data, to_builtin from extended_data.inputs import InputProvider +from extended_data.io.files import decode_file from extended_data.primitives.redaction import redact_sensitive_text @@ -129,20 +130,27 @@ def unexpected_response_message(data: Any) -> str: return f"Unexpected API response: missing 'result' key. Response: {redact_sensitive_text(data)}" +def _decode_response_json(response: httpx.Response) -> Any: + """Decode a Meshy JSON response through the shared data boundary.""" + if not response.content: + return None + return decode_file(response.content, suffix="json", as_extended=True) + + def task_id_from_response(response: httpx.Response) -> ExtendedString: """Extract a non-empty Meshy task id from a create/refine response.""" - data = response.json() + data = _decode_response_json(response) result = data.get("result") if isinstance(data, Mapping) else None - if not isinstance(result, str) or not result.strip(): + if not isinstance(result, (str, ExtendedString)) or not str(result).strip(): raise RuntimeError(unexpected_response_message(data)) - return ExtendedString(result) + return ExtendedString(str(result)) def task_payload_from_response(response: httpx.Response, model_type: type[BaseModel], endpoint: str) -> ExtendedDict: """Validate a Meshy task payload and return a promoted public mapping.""" - data = response.json() + data = _decode_response_json(response) try: - result = model_type(**data) + result = model_type.model_validate(to_builtin(data)) except ValidationError: raise RuntimeError(f"Unexpected API response for {endpoint}: {redact_sensitive_text(data)}") from None return cast(ExtendedDict, extend_data(result.model_dump(mode="json"))) diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index 200591f..26a7030 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -10,7 +10,8 @@ import requests from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import ExtendedDict, ExtendedList +from extended_data.containers import ExtendedDict, ExtendedList, to_builtin +from extended_data.io.files import decode_file from extended_data.logging import Logging from extended_data.primitives.redaction import redact_sensitive_text @@ -69,17 +70,20 @@ def __init__( def _response_json(self, response: Any, action: str, *sensitive_values: Any) -> Any: """Parse a Zoom JSON response or raise a redacted diagnostic.""" + content = getattr(response, "content", b"") + if not content: + return {} try: - return response.json() - except Exception as exc: - raise _zoom_response_error(action, exc, *sensitive_values) from None + return decode_file(content, suffix="json", as_extended=True) + except Exception: + raise _zoom_response_error(action, getattr(response, "text", content), *sensitive_values) from None def _response_mapping(self, response: Any, action: str, *sensitive_values: Any) -> dict[str, Any]: """Parse and validate a Zoom object response.""" data = self._response_json(response, action, *sensitive_values) if not isinstance(data, Mapping): raise _zoom_response_error(action, data, *sensitive_values) - return dict(data) + return to_builtin(data) def _response_list_field( self, diff --git a/tests/connectors/meshy/test_task_ids.py b/tests/connectors/meshy/test_task_ids.py index 8c3dc78..0572a44 100644 --- a/tests/connectors/meshy/test_task_ids.py +++ b/tests/connectors/meshy/test_task_ids.py @@ -2,8 +2,11 @@ from __future__ import annotations +import json + from unittest.mock import MagicMock, patch +import httpx import pytest from extended_data.connectors.meshy import animate, image3d, retexture, rigging, text3d @@ -18,14 +21,13 @@ def _task_response(task_id: str) -> MagicMock: - response = MagicMock() - response.json.return_value = {"result": task_id} - return response + return _json_response({"result": task_id}) -def _json_response(payload: dict[str, object]) -> MagicMock: - response = MagicMock() - response.json.return_value = payload +def _json_response(payload: object) -> MagicMock: + response = MagicMock(spec=httpx.Response) + response.content = json.dumps(payload).encode() + response.json.side_effect = AssertionError("Meshy responses must be decoded from response content") return response @@ -266,8 +268,7 @@ def test_meshy_poll_redacts_failed_task_errors(monkeypatch: pytest.MonkeyPatch, @pytest.mark.parametrize("payload", [{"result": ""}, {"result": 123}, ["not", "a", "mapping"]]) def test_meshy_task_id_response_requires_non_empty_string_result(payload: object) -> None: """Task ids are string API handles, not arbitrary JSON payload values.""" - response = MagicMock() - response.json.return_value = payload + response = _json_response(payload) with patch("extended_data.connectors.meshy.image3d.base.request", return_value=response): with pytest.raises(RuntimeError, match="missing 'result' key"): diff --git a/tests/connectors/test_anthropic.py b/tests/connectors/test_anthropic.py index 83ef968..ac63aba 100644 --- a/tests/connectors/test_anthropic.py +++ b/tests/connectors/test_anthropic.py @@ -6,6 +6,7 @@ from unittest.mock import MagicMock, patch +import httpx import pytest from extended_data.connectors.anthropic import ( @@ -23,6 +24,20 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data +def _json_response(payload: object, status_code: int = 200) -> httpx.Response: + """Build an HTTPX response whose JSON must be read from content bytes.""" + response = httpx.Response(status_code, json=payload) + response.json = MagicMock(side_effect=AssertionError("Anthropic responses must be decoded from content bytes")) + return response + + +def _text_response(text: str, status_code: int = 200) -> httpx.Response: + """Build an HTTPX response with invalid/non-JSON body text.""" + response = httpx.Response(status_code, content=text.encode()) + response.json = MagicMock(side_effect=AssertionError("Anthropic responses must be decoded from content bytes")) + return response + + def _logged_text(logger: MagicMock) -> str: """Return concatenated mock logger messages.""" return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) @@ -143,10 +158,7 @@ def test_create_message(self): mock_client = MagicMock() - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.is_success = True - mock_response.json.return_value = { + mock_response = _json_response({ "id": "msg_123", "type": "message", "role": "assistant", @@ -154,7 +166,7 @@ def test_create_message(self): "model": "claude-sonnet-4-20250514", "stop_reason": "end_turn", "usage": {"input_tokens": 10, "output_tokens": 5}, - } + }) mock_client.request.return_value = mock_response with patch.object(httpx, "Client", return_value=mock_client): @@ -190,17 +202,14 @@ def test_create_message_with_system(self): mock_client = MagicMock() - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.is_success = True - mock_response.json.return_value = { + mock_response = _json_response({ "id": "msg_123", "type": "message", "role": "assistant", "content": [{"type": "text", "text": "Hello!"}], "model": "claude-sonnet-4-20250514", "usage": {"input_tokens": 10, "output_tokens": 5}, - } + }) mock_client.request.return_value = mock_response with patch.object(httpx, "Client", return_value=mock_client): @@ -221,15 +230,12 @@ def test_list_models(self): mock_client = MagicMock() - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.is_success = True - mock_response.json.return_value = { + mock_response = _json_response({ "data": [ {"id": "claude-sonnet-4-20250514", "display_name": "Claude Sonnet 4"}, {"id": "claude-opus-4-20250514", "display_name": "Claude Opus 4"}, ] - } + }) mock_client.request.return_value = mock_response with patch.object(httpx, "Client", return_value=mock_client): @@ -248,10 +254,7 @@ def test_get_model(self): mock_client = MagicMock() - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.is_success = True - mock_response.json.return_value = {"id": "claude-sonnet-4-20250514", "display_name": "Claude Sonnet 4"} + mock_response = _json_response({"id": "claude-sonnet-4-20250514", "display_name": "Claude Sonnet 4"}) mock_client.request.return_value = mock_response with patch.object(httpx, "Client", return_value=mock_client): @@ -267,10 +270,7 @@ def test_count_tokens_returns_vendor_token_count(self): import httpx mock_client = MagicMock() - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.is_success = True - mock_response.json.return_value = {"input_tokens": 42} + mock_response = _json_response({"input_tokens": 42}) mock_client.request.return_value = mock_response with patch.object(httpx, "Client", return_value=mock_client): @@ -311,10 +311,7 @@ def test_success_response_validation_errors_are_redacted(self, method_name, call import httpx mock_client = MagicMock() - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.is_success = True - mock_response.json.return_value = payload + mock_response = _json_response(payload) mock_client.request.return_value = mock_response with patch.object(httpx, "Client", return_value=mock_client): @@ -335,10 +332,7 @@ def test_success_response_json_errors_are_redacted(self): import httpx mock_client = MagicMock() - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.is_success = True - mock_response.json.side_effect = ValueError("bad password=hunter2 Authorization: Bearer raw_token") + mock_response = _text_response("bad password=hunter2 Authorization: Bearer raw_token") mock_client.request.return_value = mock_response with patch.object(httpx, "Client", return_value=mock_client): diff --git a/tests/connectors/test_google_jules.py b/tests/connectors/test_google_jules.py index 87d1bbb..346b8df 100644 --- a/tests/connectors/test_google_jules.py +++ b/tests/connectors/test_google_jules.py @@ -12,19 +12,23 @@ def _response(payload: object, status_code: int = 200) -> httpx.Response: - return httpx.Response( + response = httpx.Response( status_code, json=payload, request=httpx.Request("GET", "https://jules.googleapis.com/v1alpha/test"), ) + response.json = MagicMock(side_effect=AssertionError("Jules responses must be decoded from content bytes")) + return response def _text_response(text: str, status_code: int = 500, url: str = "https://jules.googleapis.com/v1alpha/test") -> httpx.Response: - return httpx.Response( + response = httpx.Response( status_code, text=text, request=httpx.Request("GET", url), ) + response.json = MagicMock(side_effect=AssertionError("Jules responses must be decoded from content bytes")) + return response def test_session_pull_request_model_property() -> None: diff --git a/tests/connectors/test_zoom_connector.py b/tests/connectors/test_zoom_connector.py index b0b8784..3c2f990 100644 --- a/tests/connectors/test_zoom_connector.py +++ b/tests/connectors/test_zoom_connector.py @@ -2,6 +2,8 @@ from __future__ import annotations +import json + from unittest.mock import MagicMock, patch import pytest @@ -16,14 +18,31 @@ def _logged_text(logger: MagicMock) -> str: return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) -def _token_response(token: str = "test-token") -> MagicMock: - """Build a successful Zoom OAuth response mock.""" +def _json_response(payload: object) -> MagicMock: + """Build a requests-like response whose JSON must be decoded from content.""" response = MagicMock() - response.json.return_value = {"access_token": token} + response.content = json.dumps(payload).encode() + response.text = response.content.decode() + response.json.side_effect = AssertionError("Zoom responses must be decoded from content bytes") response.raise_for_status = MagicMock() return response +def _text_response(text: str) -> MagicMock: + """Build a requests-like response with invalid/non-JSON body text.""" + response = MagicMock() + response.content = text.encode() + response.text = text + response.json.side_effect = AssertionError("Zoom responses must be decoded from content bytes") + response.raise_for_status = MagicMock() + return response + + +def _token_response(token: str = "test-token") -> MagicMock: + """Build a successful Zoom OAuth response mock.""" + return _json_response({"access_token": token}) + + class TestZoomConnector: """Test suite for ZoomConnector.""" @@ -43,9 +62,7 @@ def test_init(self, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.post") def test_get_access_token_success(self, mock_post, base_connector_kwargs): """Test successful access token retrieval.""" - mock_response = MagicMock() - mock_response.json.return_value = {"access_token": "test-access-token"} - mock_response.raise_for_status = MagicMock() + mock_response = _json_response({"access_token": "test-access-token"}) mock_post.return_value = mock_response connector = ZoomConnector( @@ -86,13 +103,11 @@ def test_get_access_token_failure(self, mock_post, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.post") def test_get_access_token_malformed_response_is_redacted(self, mock_post, base_connector_kwargs): """Missing token responses should fail loudly without exposing OAuth credentials.""" - mock_response = MagicMock() - mock_response.json.return_value = { + mock_response = _json_response({ "password": "hunter2", "authorization": "Bearer raw_token", "account_id": "test-account-id", - } - mock_response.raise_for_status = MagicMock() + }) mock_post.return_value = mock_response connector = ZoomConnector( @@ -114,10 +129,7 @@ def test_get_access_token_malformed_response_is_redacted(self, mock_post, base_c @patch("extended_data.connectors.zoom.requests.post") def test_list_users_redacts_request_failure_details(self, mock_post, mock_get, base_connector_kwargs): """Zoom list failures should not expose raw secret-bearing exception text.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() mock_get.side_effect = requests.exceptions.RequestException( "status=401 password=hunter2 Authorization: Bearer raw_token" ) @@ -143,11 +155,9 @@ def test_list_users_redacts_request_failure_details(self, mock_post, mock_get, b def test_list_users_malformed_response_is_redacted(self, mock_post, mock_get, base_connector_kwargs): """Malformed user list responses should not return partial or raw payloads.""" mock_post.return_value = _token_response() - mock_users_response = MagicMock() - mock_users_response.json.return_value = { + mock_users_response = _json_response({ "users": [{"password": "hunter2", "authorization": "Bearer raw_token"}] - } - mock_users_response.raise_for_status = MagicMock() + }) mock_get.return_value = mock_users_response connector = ZoomConnector( @@ -169,20 +179,15 @@ def test_list_users_malformed_response_is_redacted(self, mock_post, mock_get, ba @patch("extended_data.connectors.zoom.requests.post") def test_list_users(self, mock_post, mock_get, base_connector_kwargs): """Test listing Zoom users.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() - mock_users_response = MagicMock() - mock_users_response.json.return_value = { + mock_users_response = _json_response({ "users": [ {"email": "user1@example.com", "id": "123", "first_name": "User", "last_name": "One"}, {"email": "user2@example.com", "id": "456", "first_name": "User", "last_name": "Two"}, ], "next_page_token": None, - } - mock_users_response.raise_for_status = MagicMock() + }) mock_get.return_value = mock_users_response connector = ZoomConnector( @@ -214,9 +219,7 @@ def test_get_zoom_users_alias_is_not_preserved(self, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.post") def test_create_zoom_user(self, mock_post, base_connector_kwargs): """Test creating a Zoom user.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() + mock_token_response = _token_response() mock_create_response = MagicMock() mock_create_response.raise_for_status = MagicMock() @@ -238,10 +241,7 @@ def test_create_zoom_user(self, mock_post, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.post") def test_remove_zoom_user_redacts_error_state_and_logs(self, mock_post, mock_delete, base_connector_kwargs): """Zoom mutation failures should redact user IDs and exception secrets.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() mock_delete.side_effect = requests.exceptions.RequestException( "failed for private-user@example.com?access_token=raw_token" ) @@ -265,9 +265,7 @@ def test_remove_zoom_user_redacts_error_state_and_logs(self, mock_post, mock_del @patch("extended_data.connectors.zoom.requests.post") def test_create_zoom_user_redacts_error_state_and_logs(self, mock_post, base_connector_kwargs): """Zoom create failures should redact user PII and avoid traceback logs.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() + mock_token_response = _token_response() mock_post.side_effect = [ mock_token_response, requests.exceptions.RequestException("failed Jane SecretUser newuser@example.com token=raw-token"), @@ -295,19 +293,14 @@ def test_create_zoom_user_redacts_error_state_and_logs(self, mock_post, base_con @patch("extended_data.connectors.zoom.requests.post") def test_get_user(self, mock_post, mock_get, base_connector_kwargs): """Test getting a specific user.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() - mock_user_response = MagicMock() - mock_user_response.json.return_value = { + mock_user_response = _json_response({ "id": "123", "email": "user1@example.com", "first_name": "User", "last_name": "One", - } - mock_user_response.raise_for_status = MagicMock() + }) mock_get.return_value = mock_user_response connector = ZoomConnector( @@ -327,10 +320,7 @@ def test_get_user(self, mock_post, mock_get, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.post") def test_get_user_redacts_identifier_and_secret_details(self, mock_post, mock_get, base_connector_kwargs): """Zoom lookup failures should not echo user identifiers or secrets.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() mock_get.side_effect = requests.exceptions.RequestException( "404 for user1@example.com and user1%40example.com client_secret=s3cr3t" ) @@ -357,9 +347,7 @@ def test_get_user_redacts_identifier_and_secret_details(self, mock_post, mock_ge def test_get_user_malformed_response_is_redacted(self, mock_post, mock_get, base_connector_kwargs): """Zoom user lookups should reject non-object payloads without leaking identifiers.""" mock_post.return_value = _token_response() - mock_user_response = MagicMock() - mock_user_response.json.return_value = ["private-user@example.com", {"password": "hunter2"}] - mock_user_response.raise_for_status = MagicMock() + mock_user_response = _json_response(["private-user@example.com", {"password": "hunter2"}]) mock_get.return_value = mock_user_response connector = ZoomConnector( @@ -381,19 +369,14 @@ def test_get_user_malformed_response_is_redacted(self, mock_post, mock_get, base @patch("extended_data.connectors.zoom.requests.post") def test_list_meetings(self, mock_post, mock_get, base_connector_kwargs): """Test listing meetings for a user.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() - mock_meetings_response = MagicMock() - mock_meetings_response.json.return_value = { + mock_meetings_response = _json_response({ "meetings": [ {"id": "111", "topic": "Team Meeting"}, {"id": "222", "topic": "Client Call"}, ] - } - mock_meetings_response.raise_for_status = MagicMock() + }) mock_get.return_value = mock_meetings_response connector = ZoomConnector( @@ -413,10 +396,7 @@ def test_list_meetings(self, mock_post, mock_get, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.post") def test_list_meetings_redacts_identifier_and_secret_details(self, mock_post, mock_get, base_connector_kwargs): """Zoom meeting list failures should not chain raw user identifiers.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() mock_get.side_effect = requests.exceptions.RequestException( "failed for private-user@example.com type=scheduled token=raw-token" ) @@ -442,11 +422,9 @@ def test_list_meetings_redacts_identifier_and_secret_details(self, mock_post, mo def test_list_meetings_malformed_response_is_redacted(self, mock_post, mock_get, base_connector_kwargs): """Zoom meeting list responses should preserve the ExtendedList contract.""" mock_post.return_value = _token_response() - mock_meetings_response = MagicMock() - mock_meetings_response.json.return_value = { + mock_meetings_response = _json_response({ "meetings": [{"id": "111"}, "password=hunter2 Authorization: Bearer raw_token"] - } - mock_meetings_response.raise_for_status = MagicMock() + }) mock_get.return_value = mock_meetings_response connector = ZoomConnector( @@ -469,18 +447,13 @@ def test_list_meetings_malformed_response_is_redacted(self, mock_post, mock_get, @patch("extended_data.connectors.zoom.requests.post") def test_get_meeting(self, mock_post, mock_get, base_connector_kwargs): """Test getting a specific meeting.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() - mock_meeting_response = MagicMock() - mock_meeting_response.json.return_value = { + mock_meeting_response = _json_response({ "id": "111", "topic": "Team Meeting", "start_time": "2024-01-15T10:00:00Z", - } - mock_meeting_response.raise_for_status = MagicMock() + }) mock_get.return_value = mock_meeting_response connector = ZoomConnector( @@ -500,10 +473,7 @@ def test_get_meeting(self, mock_post, mock_get, base_connector_kwargs): @patch("extended_data.connectors.zoom.requests.post") def test_get_meeting_redacts_identifier_and_secret_details(self, mock_post, mock_get, base_connector_kwargs): """Zoom meeting lookup failures should not chain raw meeting identifiers.""" - mock_token_response = MagicMock() - mock_token_response.json.return_value = {"access_token": "test-token"} - mock_token_response.raise_for_status = MagicMock() - mock_post.return_value = mock_token_response + mock_post.return_value = _token_response() mock_get.side_effect = requests.exceptions.RequestException("meeting private-meeting token=raw-token") connector = ZoomConnector( @@ -527,11 +497,9 @@ def test_get_meeting_redacts_identifier_and_secret_details(self, mock_post, mock def test_get_meeting_json_parse_error_is_redacted(self, mock_post, mock_get, base_connector_kwargs): """Zoom JSON parse failures should not expose raw meeting IDs or parser text.""" mock_post.return_value = _token_response() - mock_meeting_response = MagicMock() - mock_meeting_response.json.side_effect = ValueError( + mock_meeting_response = _text_response( "bad meeting private-meeting password=hunter2 Authorization: Bearer raw_token" ) - mock_meeting_response.raise_for_status = MagicMock() mock_get.return_value = mock_meeting_response connector = ZoomConnector( From 514078d5597da1e09a63588c4adefd5622056f1d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:17:17 -0500 Subject: [PATCH 243/287] fix: decode s3 json objects through data boundary --- src/extended_data/connectors/aws/s3.py | 8 +++++--- tests/connectors/test_aws_s3.py | 27 +++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/extended_data/connectors/aws/s3.py b/src/extended_data/connectors/aws/s3.py index 066d27c..c8ba201 100644 --- a/src/extended_data/connectors/aws/s3.py +++ b/src/extended_data/connectors/aws/s3.py @@ -8,10 +8,11 @@ import json from collections.abc import Mapping, Sequence -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from extended_data.connectors.aws._diagnostics import safe_aws_ref, safe_aws_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin +from extended_data.io.files import decode_file from extended_data.primitives import unhump_map @@ -187,14 +188,15 @@ def get_json_object( content = self.get_object( bucket=bucket, key=key, - decode=True, + decode=False, execution_role_arn=execution_role_arn, ) if content is None: return None - return self.extend_result(json.loads(content if isinstance(content, bytes) else str(content))) + file_data = str(content) if isinstance(content, ExtendedString) else content + return cast(ExtendedDict | ExtendedList[Any], decode_file(file_data, suffix="json", as_extended=True)) def put_object( self, diff --git a/tests/connectors/test_aws_s3.py b/tests/connectors/test_aws_s3.py index 5270a36..8a9c2f5 100644 --- a/tests/connectors/test_aws_s3.py +++ b/tests/connectors/test_aws_s3.py @@ -15,8 +15,9 @@ from botocore.exceptions import ClientError -from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data from extended_data.connectors.aws import AWSConnector +from extended_data.connectors.aws import s3 as s3_module +from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data def _logged_text(logger: MagicMock) -> str: @@ -229,6 +230,30 @@ def test_get_json_object(self, aws_connector): assert isinstance(result["key"], ExtendedString) assert result == test_data + def test_get_json_object_decodes_through_data_boundary(self, monkeypatch, aws_connector): + """S3 JSON reads should use the shared file decoder, not local json.loads.""" + + class NoLocalJsonLoads: + dumps = staticmethod(json.dumps) + + @staticmethod + def loads(*args, **kwargs): + raise AssertionError("S3 JSON objects must be decoded through decode_file") + + mock_s3 = MagicMock() + mock_body = MagicMock() + mock_body.read.return_value = b'{"items":[{"name":"one"}]}' + mock_s3.get_object.return_value = {"Body": mock_body} + aws_connector.get_aws_client = MagicMock(return_value=mock_s3) + monkeypatch.setattr(s3_module, "json", NoLocalJsonLoads) + + result = aws_connector.get_json_object("bucket", "data.json") + + assert isinstance(result, ExtendedDict) + assert isinstance(result["items"], ExtendedList) + assert isinstance(result["items"][0], ExtendedDict) + assert isinstance(result["items"][0]["name"], ExtendedString) + def test_get_json_object_not_found(self, aws_connector): """Test getting a non-existent JSON object.""" mock_s3 = MagicMock() From 6070eba0320ba5583c6233acf47fab9f3b935c8d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:22:42 -0500 Subject: [PATCH 244/287] fix: decode secretsync output through data boundary --- README.md | 2 ++ docs/package-surface.md | 3 +- .../connectors/secrets/__init__.py | 10 +++--- tests/connectors/test_secrets.py | 33 +++++++++++++++---- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 2e8258b..b47d33b 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,8 @@ The `secrets` connector integrates with the standalone SecretSync project (`jbcom/secrets-sync`) through the `secretsync` CLI. It expects `secretsync pipeline --output json` to return the stable pipeline result envelope used by this package. +That JSON envelope is decoded through the same file/data primitives as other +structured connector payloads before being lowered into the `SyncResult` model. ```python from extended_data import SecretsConnector, SyncOptions diff --git a/docs/package-surface.md b/docs/package-surface.md index 8997cb6..e92c5e4 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -349,7 +349,8 @@ assert payload["service"]["name"].upper_first() == "Api" The `secrets` adapter is the Python-facing bridge to the standalone SecretSync project (`jbcom/secrets-sync`). It uses the `secretsync` CLI, which must emit the stable `secretsync pipeline --output json` result envelope for both dry-run -and apply runs. +and apply runs. The connector decodes that envelope through the shared file/data +primitives before lowering it into the `SyncResult` model. Secrets tool factories are exported from `extended_data.secrets`; the duplicate `extended_data.secrets.tools` module path is intentionally not preserved. diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index d1422e5..51f51cb 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -40,8 +40,10 @@ from typing import Any from extended_data.connectors.base import VendorConnectorBase -from extended_data.containers import ExtendedDict, extend_data +from extended_data.containers import ExtendedDict, extend_data, to_builtin +from extended_data.io.files import decode_file from extended_data.logging import Logging +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text @@ -338,8 +340,8 @@ def _cli_run_pipeline( stdout = result.stdout.strip() if stdout: try: - output = json.loads(stdout) - except json.JSONDecodeError as e: + output = to_builtin(decode_file(stdout, suffix="json", as_extended=True)) + except DataDecodeError as e: if result.returncode == 0: return SyncResult( success=False, @@ -376,7 +378,7 @@ def _cli_run_pipeline( success=False, error_message="Pipeline execution timed out", ) - except json.JSONDecodeError as e: + except DataDecodeError as e: return SyncResult( success=False, error_message=f"Failed to parse output: {redact_sensitive_text(e)}", diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index ffe8e24..dcb5c9d 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -25,6 +25,7 @@ validate_config, ) from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data +from extended_data.primitives.formats.errors import DataDecodeError @pytest.fixture @@ -225,6 +226,27 @@ def test_cli_run_pipeline_parses_result_envelope(mock_run: MagicMock, connector: assert result["diff_output"] == '{"summary":{"added":1}}' +@patch("extended_data.connectors.secrets.json.loads") +@patch("subprocess.run") +def test_cli_run_pipeline_decodes_result_envelope_through_data_boundary( + mock_run: MagicMock, + mock_json_loads: MagicMock, + connector: SecretsConnector, +) -> None: + """SecretSync JSON envelopes should use shared data decoding, not local json.loads.""" + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps({"success": True, "results": [{"target": "prod"}]}), + stderr="", + ) + mock_json_loads.side_effect = AssertionError("SecretSync CLI output must be decoded through decode_file") + + result = connector.run_pipeline("config.yaml") + + assert result["success"] is True + assert '"target": "prod"' in result["results_json"] + + @patch("subprocess.run") def test_cli_run_pipeline_rejects_legacy_raw_diff_json(mock_run: MagicMock, connector: SecretsConnector) -> None: mock_run.return_value = MagicMock( @@ -344,11 +366,11 @@ def test_cli_run_pipeline_success_without_json_is_error(mock_run: MagicMock, con assert "produced no JSON output" in result["error_message"] -@patch("json.loads") +@patch("extended_data.connectors.secrets.decode_file") @patch("subprocess.run") def test_cli_run_pipeline_success_parse_error_is_redacted( mock_run: MagicMock, - mock_json_loads: MagicMock, + mock_decode_file: MagicMock, connector: SecretsConnector, ) -> None: mock_run.return_value = MagicMock( @@ -356,10 +378,9 @@ def test_cli_run_pipeline_success_parse_error_is_redacted( stdout="not json", stderr="", ) - mock_json_loads.side_effect = json.JSONDecodeError( - "invalid password=hunter2 Authorization: Bearer raw_token", - "", - 0, + mock_decode_file.side_effect = DataDecodeError( + "JSON", + reason="invalid password=hunter2 Authorization: Bearer raw_token", ) result = connector.run_pipeline("config.yaml") From 208cb17fde0ee7a3e22d464ed17774e7cdb7a023 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:26:23 -0500 Subject: [PATCH 245/287] fix: read secrets config through data file boundary --- README.md | 2 ++ docs/package-surface.md | 3 +- .../connectors/secrets/__init__.py | 35 +++++++++++-------- tests/connectors/test_secrets.py | 22 ++++++++++++ 4 files changed, 47 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index b47d33b..ed5566a 100644 --- a/README.md +++ b/README.md @@ -177,6 +177,8 @@ The `secrets` connector integrates with the standalone SecretSync project envelope used by this package. That JSON envelope is decoded through the same file/data primitives as other structured connector payloads before being lowered into the `SyncResult` model. +Configuration inspection uses the same decoded file artifact path for YAML +pipeline configs. ```python from extended_data import SecretsConnector, SyncOptions diff --git a/docs/package-surface.md b/docs/package-surface.md index e92c5e4..b02f661 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -350,7 +350,8 @@ The `secrets` adapter is the Python-facing bridge to the standalone SecretSync project (`jbcom/secrets-sync`). It uses the `secretsync` CLI, which must emit the stable `secretsync pipeline --output json` result envelope for both dry-run and apply runs. The connector decodes that envelope through the shared file/data -primitives before lowering it into the `SyncResult` model. +primitives before lowering it into the `SyncResult` model. Configuration +inspection reads YAML configs through the same decoded `DataFile` artifact path. Secrets tool factories are exported from `extended_data.secrets`; the duplicate `extended_data.secrets.tools` module path is intentionally not preserved. diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 51f51cb..231c727 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -41,6 +41,7 @@ from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, extend_data, to_builtin +from extended_data.io import DataFile from extended_data.io.files import decode_file from extended_data.logging import Logging from extended_data.primitives.formats.errors import DataDecodeError @@ -245,31 +246,37 @@ def get_config_info(self, config_path: str) -> ExtendedDict: def _cli_get_config_info(self, config_path: str) -> ConfigInfo: """Get config info via CLI.""" try: - import yaml - except ImportError: - return ConfigInfo(error_message="pyyaml is required for CLI mode but not installed.") - - try: - with open(config_path) as f: - cfg = yaml.safe_load(f) + cfg = to_builtin(DataFile.read(config_path, suffix="yaml", as_extended=True).data) if not isinstance(cfg, dict): # Handles empty file (cfg=None) or non-dict root cfg = {} + sources = cfg.get("sources", {}) + if not isinstance(sources, dict): + sources = {} + targets = cfg.get("targets", {}) + if not isinstance(targets, dict): + targets = {} + vault = cfg.get("vault", {}) + if not isinstance(vault, dict): + vault = {} + aws = cfg.get("aws", {}) + if not isinstance(aws, dict): + aws = {} return ConfigInfo( valid=True, - source_count=len(cfg.get("sources", {})), - target_count=len(cfg.get("targets", {})), - sources=list(cfg.get("sources", {}).keys()), - targets=list(cfg.get("targets", {}).keys()), + source_count=len(sources), + target_count=len(targets), + sources=list(sources.keys()), + targets=list(targets.keys()), has_merge_store="merge_store" in cfg, - vault_address=cfg.get("vault", {}).get("address", ""), - aws_region=cfg.get("aws", {}).get("region", ""), + vault_address=vault.get("address", ""), + aws_region=aws.get("region", ""), ) except FileNotFoundError: return ConfigInfo(error_message=f"Configuration file not found: {redact_sensitive_text(config_path)}") - except yaml.YAMLError as e: + except DataDecodeError as e: return ConfigInfo(error_message=f"Error parsing YAML file: {redact_sensitive_text(e)}") def run_pipeline( diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index dcb5c9d..bed9cbf 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -25,6 +25,7 @@ validate_config, ) from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data +from extended_data.io import DataFile from extended_data.primitives.formats.errors import DataDecodeError @@ -74,6 +75,27 @@ def test_cli_get_config_info_valid(connector: SecretsConnector, tmp_path: Path) assert info["aws_region"] == "us-east-1" +@patch("extended_data.connectors.secrets.DataFile.read") +def test_cli_get_config_info_reads_through_data_file( + mock_read: MagicMock, + connector: SecretsConnector, +) -> None: + mock_read.return_value = DataFile.decode( + "sources:\n src1: {}\ntargets:\n tgt1: {}\n", + file_path="config.yaml", + suffix="yaml", + ) + + info = connector.get_config_info("config.yaml") + + mock_read.assert_called_once_with("config.yaml", suffix="yaml", as_extended=True) + assert info["valid"] is True + assert info["source_count"] == 1 + assert info["target_count"] == 1 + assert info["sources"] == ["src1"] + assert info["targets"] == ["tgt1"] + + def test_cli_get_config_info_not_found(connector: SecretsConnector) -> None: info = connector.get_config_info("/non/existent/path.yaml") assert isinstance(info, ExtendedDict) From b1480e70732983358947a357b386e540a450cbf7 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:31:50 -0500 Subject: [PATCH 246/287] fix: decode inputs through data boundary --- README.md | 4 ++- docs/package-surface.md | 4 ++- src/extended_data/inputs/__main__.py | 18 ++++++---- tests/inputs/test_main.py | 49 ++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ed5566a..70047f2 100644 --- a/README.md +++ b/README.md @@ -262,7 +262,9 @@ state by default. `get_input()` remains the scalar coercion boundary for booleans, numbers, paths, datetimes, and credential strings; pass `as_extended=True` when an injected raw or fallback input value should stay in Tier 2 form and keep using container methods such as `reconstruct_special_types()` -and `to_export_safe()`. +and `to_export_safe()`. Stdin JSON plus JSON/YAML `decode_input()` paths use +the same file/data decoding boundary as structured files and connector +payloads. `Logging` stores marked log message collections as `ExtendedDict` and `ExtendedSet` values while keeping Python logger and handler objects plain. Use `get_stored_messages()` or `snapshot_stored_messages()` when downstream diff --git a/docs/package-surface.md b/docs/package-surface.md index b02f661..dc2ae1e 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -209,7 +209,9 @@ Missing workflow input files raise `FileNotFoundError`, and empty workflow writes raise `ValueError` unless `allow_empty=True` is passed. `InputProvider` loads input data from explicit mappings, environment variables, -and stdin, then decodes or coerces values through the primitive layer. Its +and stdin, then decodes or coerces values through the shared primitive and +file/data layers. Stdin JSON and JSON/YAML `decode_input()` paths use the same +structured decoder boundary as file and connector payloads. Its `decode_input(..., as_extended=True)` path gives input-driven workflows the same container bridge as file and Base64 decoding; fallback values use that same promotion rule, so defaults do not silently drop back to plain dictionaries. diff --git a/src/extended_data/inputs/__main__.py b/src/extended_data/inputs/__main__.py index 4771514..d1c72e3 100644 --- a/src/extended_data/inputs/__main__.py +++ b/src/extended_data/inputs/__main__.py @@ -9,7 +9,6 @@ from __future__ import annotations import binascii -import json import os import sys @@ -21,9 +20,8 @@ from extended_data.containers.factory import extend_data, to_builtin from extended_data.containers.mappings import ExtendedDict from extended_data.io.base64 import base64_decode +from extended_data.io.files import decode_file from extended_data.primitives.formats.errors import DataDecodeError -from extended_data.primitives.formats.json import decode_json -from extended_data.primitives.formats.yaml import decode_yaml from extended_data.primitives.state import is_nothing from extended_data.primitives.types import ( string_to_bool, @@ -130,9 +128,12 @@ def _load_from_stdin() -> dict[str, Any]: return {} try: - decoded_stdin: dict[str, Any] = json.loads(inputs_from_stdin) + decoded_stdin = decode_file(inputs_from_stdin, suffix="json", as_extended=False) + if not isinstance(decoded_stdin, dict): + message = "Failed to decode stdin as JSON object." + raise TypeError(message) return decoded_stdin - except json.JSONDecodeError as exc: + except DataDecodeError as exc: message = f"Failed to decode stdin as JSON ({len(inputs_from_stdin)} characters)." raise RuntimeError(message) from exc @@ -312,13 +313,13 @@ def decode_input( if decode_from_yaml: try: - conf = decode_yaml(conf) + conf = decode_file(conf, suffix="yaml", as_extended=as_extended) except DataDecodeError as exc: message = f"Failed to decode input {k} from YAML." raise RuntimeError(message) from exc elif decode_from_json: try: - conf = decode_json(conf) + conf = decode_file(conf, suffix="json", as_extended=as_extended) except DataDecodeError as exc: message = f"Failed to decode input {k} from JSON." raise RuntimeError(message) from exc @@ -326,6 +327,9 @@ def decode_input( if conf is None and not allow_none: return self._return_value(default, as_extended=as_extended) + if (decode_from_yaml or decode_from_json) and as_extended: + return conf + return self._return_value(conf, as_extended=as_extended) def freeze_inputs(self) -> ExtendedDict: diff --git a/tests/inputs/test_main.py b/tests/inputs/test_main.py index 4b94c67..5e71efa 100644 --- a/tests/inputs/test_main.py +++ b/tests/inputs/test_main.py @@ -38,6 +38,7 @@ from extended_data import base64_encode from extended_data.containers import ExtendedDict, ExtendedString +from extended_data.inputs import __main__ as inputs_module from extended_data.inputs.__main__ import InputProvider @@ -96,6 +97,24 @@ def test_init_with_stdin(monkeypatch): assert dic.inputs["stdin_key"] == "stdin_value" +@pytest.mark.usefixtures("_env_setup") +def test_init_with_stdin_decodes_through_data_boundary(monkeypatch): + """Stdin JSON should use the shared data decoder before merging inputs.""" + + def fake_decode_file(data, *, suffix=None, as_extended=True): + assert data == '{"stdin_key": "stdin_value"}' + assert suffix == "json" + assert as_extended is False + return {"stdin_key": "stdin_value"} + + monkeypatch.setattr("sys.stdin.read", lambda: '{"stdin_key": "stdin_value"}') + monkeypatch.setattr(inputs_module, "decode_file", fake_decode_file) + + dic = InputProvider(from_stdin=True) + + assert dic.inputs["stdin_key"] == "stdin_value" + + def test_get_input_with_default(): """Test retrieving an input with a default value. @@ -227,6 +246,36 @@ def test_decode_input_yaml(): assert decoded == {"name": "test"} +@pytest.mark.parametrize( + ("input_key", "input_value", "decode_kwargs", "expected_suffix"), + [ + ("json_key", '{"name": "test"}', {"decode_from_json": True}, "json"), + ("yaml_key", "name: test", {"decode_from_yaml": True}, "yaml"), + ], +) +def test_decode_input_uses_data_boundary( + monkeypatch, + input_key: str, + input_value: str, + decode_kwargs: dict[str, bool], + expected_suffix: str, +) -> None: + """Structured input decoding should use the shared file/data decoder.""" + + def fake_decode_file(data, *, suffix=None, as_extended=True): + assert data == input_value + assert suffix == expected_suffix + assert as_extended is False + return {"name": "test"} + + monkeypatch.setattr(inputs_module, "decode_file", fake_decode_file) + dic = InputProvider(inputs={input_key: input_value}, from_environment=False) + + decoded = dic.decode_input(input_key, **decode_kwargs) + + assert decoded == {"name": "test"} + + def test_decode_input_base64(): """Test decoding an input from Base64 format. From 55b3c6919072777b4dfab37aba75206754a4da4f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:37:10 -0500 Subject: [PATCH 247/287] fix: decode cli arguments through data boundary --- README.md | 3 +++ docs/package-surface.md | 3 +++ src/extended_data/connectors/cli.py | 6 ++++-- tests/connectors/test_cli.py | 24 ++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 70047f2..6a70ce4 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,9 @@ payload contract; framework factory functions still return framework tool objects. The generic CLI `call` command and MCP bridge expose only methods that advertise Extended Data payload returns. +CLI `--arg` values that look like JSON are decoded through the same structured +data boundary used by files, inputs, and connector payloads before method +dispatch. Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 redaction primitives for common secret-bearing keys and token-shaped strings. CLI and MCP connector calls pass method arguments through `values=[...]` as diff --git a/docs/package-surface.md b/docs/package-surface.md index dc2ae1e..afb27ef 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -330,6 +330,9 @@ their payload returns as `ExtendedDict` or `ExtendedList[ExtendedDict]`. The generic CLI `call` command and MCP bridge expose only connector methods that advertise Extended Data payload returns, so raw SDK client factories and low-level HTTP helpers do not leak into serialized tool catalogs. +CLI `--arg` values that look like JSON are decoded through the shared +structured data boundary before method dispatch, matching file, input, and +connector payload decoding. Serialized CLI/MCP boundaries apply Tier 1 redaction after Tier 2 containers are lowered to JSON-compatible data, and connector API error messages use the same redaction policy before exceptions are raised. Common secret-bearing keys diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index bbd832d..c941ccc 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -32,6 +32,8 @@ from extended_data.connectors.surface import connector_data_methods, is_connector_data_method from extended_data.containers import ExtendedList from extended_data.containers.factory import to_builtin +from extended_data.io.files import decode_file +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.redaction import redact_sensitive_text @@ -51,8 +53,8 @@ def _parse_arg_value(value: str) -> Any: """Parse a CLI argument value, attempting JSON decode.""" # Try JSON first try: - return json.loads(value) - except json.JSONDecodeError: + return decode_file(value, suffix="json", as_extended=False) + except DataDecodeError: pass # Try common conversions diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 441a34d..60401ad 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -277,6 +277,30 @@ def test_cli_call_redacts_explicit_argument_values_from_errors() -> None: assert output.count("[REDACTED]") >= 3 +@patch("extended_data.connectors.cli.json.loads") +def test_cli_call_decodes_json_arguments_through_data_boundary(mock_json_loads: MagicMock) -> None: + """Structured CLI method arguments should use the shared data decoder.""" + mock_json_loads.side_effect = AssertionError("CLI arguments must be decoded through decode_file") + args = argparse.Namespace( + connector="example", + method="fetch", + extra=["--metadata", '{"service": {"name": "api"}}'], + json=False, + ) + connector = MagicMock() + connector.fetch.return_value = {"ok": True} + + with ( + patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), + patch("extended_data.connectors.cli.get_connector", return_value=connector), + patch("sys.stdout.write"), + ): + exit_code = cmd_call(args) + + assert exit_code == 0 + connector.fetch.assert_called_once_with(metadata={"service": {"name": "api"}}) + + def test_cli_main_help() -> None: """Test main CLI entry point with help.""" with patch("sys.argv", ["extended-data", "--help"]): From 3b6b60bdc1fdfc6e3d4e7037be70ec7b7cc452e9 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:42:22 -0500 Subject: [PATCH 248/287] fix: decode connector persistence through data boundary --- README.md | 2 ++ docs/package-surface.md | 2 ++ .../connectors/google/__init__.py | 12 ++++---- .../meshy/persistence/repository.py | 6 ++-- .../meshy/persistence/vector_store.py | 6 ++-- tests/connectors/meshy/test_repository.py | 24 +++++++++++++++ tests/connectors/meshy/test_vector_store.py | 20 +++++++++++++ tests/connectors/test_google_connector.py | 30 +++++++++++++++++++ 8 files changed, 91 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 6a70ce4..791aae5 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,8 @@ advertise Extended Data payload returns. CLI `--arg` values that look like JSON are decoded through the same structured data boundary used by files, inputs, and connector payloads before method dispatch. +Google service-account strings and Meshy persisted manifests/metadata follow +that same decode path instead of parsing JSON in connector-local code. Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 redaction primitives for common secret-bearing keys and token-shaped strings. CLI and MCP connector calls pass method arguments through `values=[...]` as diff --git a/docs/package-surface.md b/docs/package-surface.md index afb27ef..536aea7 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -333,6 +333,8 @@ low-level HTTP helpers do not leak into serialized tool catalogs. CLI `--arg` values that look like JSON are decoded through the shared structured data boundary before method dispatch, matching file, input, and connector payload decoding. +Google service-account strings and Meshy persisted manifests/metadata use that +same boundary, so connector-local reads do not grow private JSON parsers. Serialized CLI/MCP boundaries apply Tier 1 redaction after Tier 2 containers are lowered to JSON-compatible data, and connector API error messages use the same redaction policy before exceptions are raised. Common secret-bearing keys diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index 694c2e3..6243c16 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -2,8 +2,6 @@ from __future__ import annotations -import json - from collections.abc import Sequence from typing import TYPE_CHECKING, Any, cast @@ -14,7 +12,9 @@ from extended_data.connectors.google.services import GoogleServicesMixin from extended_data.connectors.google.workspace import GoogleWorkspaceMixin from extended_data.containers import ExtendedDict, ExtendedList +from extended_data.io.files import decode_file from extended_data.logging import Logging +from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.redaction import redact_sensitive_text @@ -99,8 +99,8 @@ def __init__( # Parse if string if isinstance(service_account_info, str): try: - service_account_info = json.loads(service_account_info) - except json.JSONDecodeError as e: + service_account_info = decode_file(service_account_info, suffix="json", as_extended=False) + except DataDecodeError as e: safe_payload = redact_sensitive_text(service_account_info, values=[service_account_info]) error_message = ( "Failed to parse GOOGLE_SERVICE_ACCOUNT JSON: " @@ -281,8 +281,8 @@ def _resolve_sequence_option( candidate = raw_value.strip() if candidate: try: - parsed = json.loads(candidate) - except json.JSONDecodeError: + parsed = decode_file(candidate, suffix="json", as_extended=False) + except DataDecodeError: pass else: return self._normalize_str_sequence(parsed) diff --git a/src/extended_data/connectors/meshy/persistence/repository.py b/src/extended_data/connectors/meshy/persistence/repository.py index aa8e39c..1c15b2c 100644 --- a/src/extended_data/connectors/meshy/persistence/repository.py +++ b/src/extended_data/connectors/meshy/persistence/repository.py @@ -20,6 +20,7 @@ ) from extended_data.connectors.meshy.persistence.utils import compute_spec_hash as util_compute_spec_hash from extended_data.containers import ExtendedDict, ExtendedList, extend_data +from extended_data.io.files import DataFile from extended_data.primitives.redaction import redact_sensitive_text @@ -59,9 +60,8 @@ def _load_project_manifest_model(self, project: str) -> ProjectManifest: self.save_project_manifest(manifest) return manifest - with open(manifest_path) as f: - data = json.load(f) - return ProjectManifest(**data) + data = DataFile.read(manifest_path, as_extended=False).as_builtin() + return ProjectManifest(**data) def load_project_manifest(self, project: str) -> ExtendedDict: """Load manifest for a project, creating empty one if missing. diff --git a/src/extended_data/connectors/meshy/persistence/vector_store.py b/src/extended_data/connectors/meshy/persistence/vector_store.py index efa170c..e511d56 100644 --- a/src/extended_data/connectors/meshy/persistence/vector_store.py +++ b/src/extended_data/connectors/meshy/persistence/vector_store.py @@ -49,6 +49,8 @@ from typing_extensions import Self from extended_data.containers import ExtendedDict, ExtendedList, extend_data +from extended_data.io.files import decode_file +from extended_data.primitives.formats.errors import DataDecodeError if TYPE_CHECKING: @@ -528,8 +530,8 @@ def _row_to_record(self, row: sqlite3.Row) -> GenerationRecord: """Convert database row to GenerationRecord.""" metadata = {} if row["metadata_json"]: - with suppress(json.JSONDecodeError): - metadata = json.loads(row["metadata_json"]) + with suppress(DataDecodeError): + metadata = decode_file(row["metadata_json"], suffix="json", as_extended=False) return GenerationRecord( id=row["id"], diff --git a/tests/connectors/meshy/test_repository.py b/tests/connectors/meshy/test_repository.py index ed08e1b..3216389 100644 --- a/tests/connectors/meshy/test_repository.py +++ b/tests/connectors/meshy/test_repository.py @@ -5,6 +5,7 @@ import json from datetime import datetime, timezone +from unittest.mock import patch import pytest @@ -69,6 +70,29 @@ def test_load_existing_manifest(self, task_repository, temp_dir): assert isinstance(manifest, ExtendedDict) assert manifest["project"] == "project2" + def test_load_existing_manifest_decodes_through_data_file_boundary(self, task_repository, temp_dir): + """Existing manifests should be read through DataFile, not local json.load.""" + project_dir = temp_dir / "project2" + project_dir.mkdir() + manifest_path = project_dir / "manifest.json" + manifest_path.write_text( + json.dumps( + { + "project": "project2", + "asset_specs": {}, + "version": "1.0", + "last_updated": datetime.now(timezone.utc).isoformat(), + } + ) + ) + + with patch("extended_data.connectors.meshy.persistence.repository.json.load") as mock_json_load: + mock_json_load.side_effect = AssertionError("manifests must be decoded through DataFile") + manifest = task_repository.load_project_manifest("project2") + + assert manifest["project"] == "project2" + mock_json_load.assert_not_called() + def test_save_and_load_manifest(self, task_repository): """Test saving and reloading manifest.""" manifest = ProjectManifest(project="project1") diff --git a/tests/connectors/meshy/test_vector_store.py b/tests/connectors/meshy/test_vector_store.py index e8d7281..4433120 100644 --- a/tests/connectors/meshy/test_vector_store.py +++ b/tests/connectors/meshy/test_vector_store.py @@ -69,6 +69,26 @@ def test_get_record_methods_return_extended_payloads(temp_dir) -> None: assert by_task["task_id"] == "task-123" +def test_record_metadata_decodes_through_data_boundary(temp_dir, monkeypatch) -> None: + """Persisted metadata should use the shared JSON decoder on reads.""" + with VectorStore(temp_dir / "assets.db") as store: + store.record_generation( + spec_hash="hash-abc", + prompt="cute otter character", + project="project1", + metadata={"source": "test"}, + ) + + def fail_local_json_loads(*_: object) -> object: + raise AssertionError("metadata_json must be decoded through decode_file") + + monkeypatch.setattr(vector_store_module.json, "loads", fail_local_json_loads) + record = store.get_by_spec_hash("hash-abc") + + assert isinstance(record, ExtendedDict) + assert record["metadata"]["source"] == "test" + + def test_search_text_and_list_pending_return_extended_payloads(temp_dir) -> None: """Search and pending queries should return extended lists of mappings.""" with VectorStore(temp_dir / "assets.db") as store: diff --git a/tests/connectors/test_google_connector.py b/tests/connectors/test_google_connector.py index d9889fd..716f58d 100644 --- a/tests/connectors/test_google_connector.py +++ b/tests/connectors/test_google_connector.py @@ -89,6 +89,21 @@ def test_init_with_dict_service_account(self, base_connector_kwargs): assert connector.service_account_info == service_account assert connector._credentials is None + @patch("extended_data.connectors.google.decode_file") + def test_init_decodes_service_account_string_through_data_boundary(self, mock_decode_file, base_connector_kwargs): + """Service-account JSON strings should use the shared data decoder.""" + service_account = _service_account() + service_account_text = '{"type": "service_account"}' + mock_decode_file.return_value = service_account + + connector = GoogleConnector( + service_account_info=service_account_text, + **base_connector_kwargs, + ) + + assert connector.service_account_info == service_account + mock_decode_file.assert_called_once_with(service_account_text, suffix="json", as_extended=False) + def test_init_redacts_invalid_service_account_json_logs(self, base_connector_kwargs): """Invalid service-account JSON diagnostics should not expose key material.""" invalid_service_account = '{"private_key": "-----BEGIN RSA PRIVATE KEY-----\\nMIIE...test"' @@ -104,6 +119,21 @@ def test_init_redacts_invalid_service_account_json_logs(self, base_connector_kwa assert exc_info.value.__cause__ is None assert all("exc_info" not in logged_call.kwargs for logged_call in base_connector_kwargs["logger"].logger.method_calls) + @patch("extended_data.connectors.google.decode_file") + def test_sequence_option_input_decodes_json_through_data_boundary(self, mock_decode_file, base_connector_kwargs): + """List-like Google input values should use the shared data decoder.""" + mock_decode_file.return_value = ["/Engineering", "/Platform"] + connector = GoogleConnector( + service_account_info=_service_account(), + inputs={"GOOGLE_OU_ALLOW_LIST": '["/Engineering", "/Platform"]'}, + **base_connector_kwargs, + ) + + result = connector._resolve_sequence_option(None, "GOOGLE_OU_ALLOW_LIST") + + assert result == ["/Engineering", "/Platform"] + mock_decode_file.assert_called_once_with('["/Engineering", "/Platform"]', suffix="json", as_extended=False) + @patch("extended_data.connectors.google.service_account.Credentials.from_service_account_info") def test_credentials_property(self, mock_from_sa, base_connector_kwargs): """Test credentials property creates credentials.""" From 2ca2c53ca4c2c17e16a4af23248505a515884a9d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:47:59 -0500 Subject: [PATCH 249/287] fix: encode connector persistence through export boundary --- README.md | 2 ++ docs/package-surface.md | 2 ++ src/extended_data/connectors/aws/s3.py | 5 ++--- src/extended_data/connectors/meshy/jobs.py | 6 ++--- .../meshy/persistence/repository.py | 4 ++-- tests/connectors/meshy/test_jobs.py | 9 +++++++- tests/connectors/meshy/test_repository.py | 22 ++++++++++++++++--- tests/connectors/test_aws_s3.py | 22 +++++++++---------- 8 files changed, 48 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 791aae5..cf3f279 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,8 @@ data boundary used by files, inputs, and connector payloads before method dispatch. Google service-account strings and Meshy persisted manifests/metadata follow that same decode path instead of parsing JSON in connector-local code. +AWS S3 JSON object writes and Meshy manifest writes use the shared export +boundary as well, keeping connector persistence aligned with Tier 3 file data. Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 redaction primitives for common secret-bearing keys and token-shaped strings. CLI and MCP connector calls pass method arguments through `values=[...]` as diff --git a/docs/package-surface.md b/docs/package-surface.md index 536aea7..205ed18 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -335,6 +335,8 @@ structured data boundary before method dispatch, matching file, input, and connector payload decoding. Google service-account strings and Meshy persisted manifests/metadata use that same boundary, so connector-local reads do not grow private JSON parsers. +AWS S3 JSON object writes and Meshy manifest writes go through the shared export +boundary, so connector persistence uses the same Tier 3 data-file encoding path. Serialized CLI/MCP boundaries apply Tier 1 redaction after Tier 2 containers are lowered to JSON-compatible data, and connector API error messages use the same redaction policy before exceptions are raised. Common secret-bearing keys diff --git a/src/extended_data/connectors/aws/s3.py b/src/extended_data/connectors/aws/s3.py index c8ba201..bf3c573 100644 --- a/src/extended_data/connectors/aws/s3.py +++ b/src/extended_data/connectors/aws/s3.py @@ -5,13 +5,12 @@ from __future__ import annotations -import json - from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any, cast from extended_data.connectors.aws._diagnostics import safe_aws_ref, safe_aws_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin +from extended_data.io import wrap_raw_data_for_export from extended_data.io.files import decode_file from extended_data.primitives import unhump_map @@ -274,7 +273,7 @@ def put_json_object( Returns: The S3 put_object response. """ - body = json.dumps(to_builtin(data), indent=indent, default=str) + body = wrap_raw_data_for_export(data, allow_encoding="json", indent_2=bool(indent)) return self.put_object( bucket=bucket, key=key, diff --git a/src/extended_data/connectors/meshy/jobs.py b/src/extended_data/connectors/meshy/jobs.py index 069ba1a..8273b5d 100644 --- a/src/extended_data/connectors/meshy/jobs.py +++ b/src/extended_data/connectors/meshy/jobs.py @@ -7,7 +7,6 @@ from __future__ import annotations import hashlib -import json from dataclasses import asdict, dataclass from pathlib import Path @@ -15,7 +14,8 @@ from extended_data.connectors.meshy import base, text3d from extended_data.connectors.meshy.models import ArtStyle, AssetIntent, AssetSpec, Text3DRequest -from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin +from extended_data.containers import ExtendedDict, ExtendedList, extend_data +from extended_data.io import wrap_raw_data_for_export @dataclass @@ -125,7 +125,7 @@ def generate_model(self, spec: AssetSpec, wait: bool = True, poll_interval: floa # Save manifest manifest_path = output_dir / f"{asset_id}_manifest.json" with open(manifest_path, "w") as f: - json.dump(to_builtin(manifest.to_dict()), f, indent=2) + f.write(wrap_raw_data_for_export(manifest.to_dict(), allow_encoding="json", indent_2=True)) return manifest.to_dict() diff --git a/src/extended_data/connectors/meshy/persistence/repository.py b/src/extended_data/connectors/meshy/persistence/repository.py index 1c15b2c..05113c6 100644 --- a/src/extended_data/connectors/meshy/persistence/repository.py +++ b/src/extended_data/connectors/meshy/persistence/repository.py @@ -2,7 +2,6 @@ from __future__ import annotations -import json import os import tempfile @@ -20,6 +19,7 @@ ) from extended_data.connectors.meshy.persistence.utils import compute_spec_hash as util_compute_spec_hash from extended_data.containers import ExtendedDict, ExtendedList, extend_data +from extended_data.io import wrap_raw_data_for_export from extended_data.io.files import DataFile from extended_data.primitives.redaction import redact_sensitive_text @@ -89,7 +89,7 @@ def save_project_manifest(self, manifest: ProjectManifest) -> None: # Atomic write: write to temp file, then rename with tempfile.NamedTemporaryFile(mode="w", dir=manifest_path.parent, delete=False, suffix=".tmp") as tmp_file: - json.dump(manifest_dict, tmp_file, indent=2) + tmp_file.write(wrap_raw_data_for_export(manifest_dict, allow_encoding="json", indent_2=True)) tmp_path = tmp_file.name # Atomic rename diff --git a/tests/connectors/meshy/test_jobs.py b/tests/connectors/meshy/test_jobs.py index 2bde941..2ee2752 100644 --- a/tests/connectors/meshy/test_jobs.py +++ b/tests/connectors/meshy/test_jobs.py @@ -6,6 +6,7 @@ from unittest.mock import patch +from extended_data.connectors.meshy import jobs as jobs_module from extended_data.connectors.meshy.jobs import ( AssetGenerator, AssetManifest, @@ -199,7 +200,11 @@ def test_generate_model_saves_manifest_json(self, temp_dir): asset_id="barrel-001", ) - generator.generate_model(spec, wait=True, poll_interval=0.01) + with patch( + "extended_data.connectors.meshy.jobs.wrap_raw_data_for_export", + wraps=jobs_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export: + generator.generate_model(spec, wait=True, poll_interval=0.01) manifest_path = temp_dir / "models" / "props" / "barrel-001_manifest.json" assert manifest_path.exists() @@ -207,6 +212,8 @@ def test_generate_model_saves_manifest_json(self, temp_dir): with open(manifest_path) as f: saved_manifest = json.load(f) assert saved_manifest["asset_id"] == "barrel-001" + mock_wrap_for_export.assert_called_once() + assert mock_wrap_for_export.call_args.kwargs == {"allow_encoding": "json", "indent_2": True} def test_batch_generate(self, temp_dir): """Test batch generation of multiple assets.""" diff --git a/tests/connectors/meshy/test_repository.py b/tests/connectors/meshy/test_repository.py index 3216389..9221f42 100644 --- a/tests/connectors/meshy/test_repository.py +++ b/tests/connectors/meshy/test_repository.py @@ -9,6 +9,7 @@ import pytest +from extended_data.connectors.meshy.persistence import repository as repository_module from extended_data.connectors.meshy.persistence.repository import TaskRepository from extended_data.connectors.meshy.persistence.schemas import ( ArtifactRecord, @@ -86,12 +87,14 @@ def test_load_existing_manifest_decodes_through_data_file_boundary(self, task_re ) ) - with patch("extended_data.connectors.meshy.persistence.repository.json.load") as mock_json_load: - mock_json_load.side_effect = AssertionError("manifests must be decoded through DataFile") + with patch( + "extended_data.connectors.meshy.persistence.repository.DataFile.read", + wraps=repository_module.DataFile.read, + ) as mock_read: manifest = task_repository.load_project_manifest("project2") assert manifest["project"] == "project2" - mock_json_load.assert_not_called() + mock_read.assert_called_once_with(manifest_path, as_extended=False) def test_save_and_load_manifest(self, task_repository): """Test saving and reloading manifest.""" @@ -114,6 +117,19 @@ def test_save_and_load_manifest(self, task_repository): assert "hash-123" in reloaded["asset_specs"] assert reloaded["asset_specs"]["hash-123"]["project"] == "project1" + def test_save_manifest_encodes_through_export_boundary(self, task_repository): + """Saved manifests should use the shared export boundary.""" + manifest = ProjectManifest(project="project1") + + with patch( + "extended_data.connectors.meshy.persistence.repository.wrap_raw_data_for_export", + wraps=repository_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export: + task_repository.save_project_manifest(manifest) + + assert mock_wrap_for_export.called + assert mock_wrap_for_export.call_args.kwargs == {"allow_encoding": "json", "indent_2": True} + class TestAssetRecordOperations: """Tests for asset record operations.""" diff --git a/tests/connectors/test_aws_s3.py b/tests/connectors/test_aws_s3.py index 8a9c2f5..1cdee5b 100644 --- a/tests/connectors/test_aws_s3.py +++ b/tests/connectors/test_aws_s3.py @@ -230,29 +230,22 @@ def test_get_json_object(self, aws_connector): assert isinstance(result["key"], ExtendedString) assert result == test_data - def test_get_json_object_decodes_through_data_boundary(self, monkeypatch, aws_connector): + def test_get_json_object_decodes_through_data_boundary(self, aws_connector): """S3 JSON reads should use the shared file decoder, not local json.loads.""" - - class NoLocalJsonLoads: - dumps = staticmethod(json.dumps) - - @staticmethod - def loads(*args, **kwargs): - raise AssertionError("S3 JSON objects must be decoded through decode_file") - mock_s3 = MagicMock() mock_body = MagicMock() mock_body.read.return_value = b'{"items":[{"name":"one"}]}' mock_s3.get_object.return_value = {"Body": mock_body} aws_connector.get_aws_client = MagicMock(return_value=mock_s3) - monkeypatch.setattr(s3_module, "json", NoLocalJsonLoads) - result = aws_connector.get_json_object("bucket", "data.json") + with patch("extended_data.connectors.aws.s3.decode_file", wraps=s3_module.decode_file) as mock_decode_file: + result = aws_connector.get_json_object("bucket", "data.json") assert isinstance(result, ExtendedDict) assert isinstance(result["items"], ExtendedList) assert isinstance(result["items"][0], ExtendedDict) assert isinstance(result["items"][0]["name"], ExtendedString) + mock_decode_file.assert_called_once_with(b'{"items":[{"name":"one"}]}', suffix="json", as_extended=True) def test_get_json_object_not_found(self, aws_connector): """Test getting a non-existent JSON object.""" @@ -340,7 +333,11 @@ def test_put_json_object(self, aws_connector): aws_connector.get_aws_client = MagicMock(return_value=mock_s3) data = extend_data({"key": "value", "number": 123}) - result = aws_connector.put_json_object("bucket", "data.json", data) + with patch( + "extended_data.connectors.aws.s3.wrap_raw_data_for_export", + wraps=s3_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export: + result = aws_connector.put_json_object("bucket", "data.json", data) assert isinstance(result, ExtendedDict) assert isinstance(result["ETag"], ExtendedString) @@ -350,6 +347,7 @@ def test_put_json_object(self, aws_connector): # Verify JSON was serialized body_str = call_args["Body"].decode("utf-8") assert json.loads(body_str) == data + mock_wrap_for_export.assert_called_once_with(data, allow_encoding="json", indent_2=True) def test_delete_object(self, aws_connector): """Test deleting an object.""" From 581af103d1e2fbfe58a9f34b03b7968f3a94e01f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:53:22 -0500 Subject: [PATCH 250/287] fix: serialize connector outputs through export boundary --- README.md | 2 ++ docs/package-surface.md | 2 ++ src/extended_data/connectors/cli.py | 4 ++-- src/extended_data/connectors/mcp.py | 9 +++++-- src/extended_data/connectors/meshy/mcp.py | 19 +++++++++++---- .../connectors/secrets/__init__.py | 9 ++++--- tests/connectors/meshy/test_meshy_mcp.py | 23 +++++++++++++++++- tests/connectors/test_cli.py | 11 ++++++--- tests/connectors/test_mcp.py | 23 ++++++++++++++++++ tests/connectors/test_secrets.py | 24 +++++++++++++++---- 10 files changed, 106 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index cf3f279..d2ba89e 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,8 @@ Google service-account strings and Meshy persisted manifests/metadata follow that same decode path instead of parsing JSON in connector-local code. AWS S3 JSON object writes and Meshy manifest writes use the shared export boundary as well, keeping connector persistence aligned with Tier 3 file data. +CLI JSON output, MCP tool results, and SecretSync `results_json` use that same +export boundary after redaction. Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 redaction primitives for common secret-bearing keys and token-shaped strings. CLI and MCP connector calls pass method arguments through `values=[...]` as diff --git a/docs/package-surface.md b/docs/package-surface.md index 205ed18..43ae258 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -337,6 +337,8 @@ Google service-account strings and Meshy persisted manifests/metadata use that same boundary, so connector-local reads do not grow private JSON parsers. AWS S3 JSON object writes and Meshy manifest writes go through the shared export boundary, so connector persistence uses the same Tier 3 data-file encoding path. +CLI JSON output, MCP tool results, and SecretSync `results_json` are exported +through the same path after redaction. Serialized CLI/MCP boundaries apply Tier 1 redaction after Tier 2 containers are lowered to JSON-compatible data, and connector API error messages use the same redaction policy before exceptions are raised. Common secret-bearing keys diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index c941ccc..6bdfb90 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -17,7 +17,6 @@ from __future__ import annotations import argparse -import json import sys from collections.abc import Mapping @@ -32,6 +31,7 @@ from extended_data.connectors.surface import connector_data_methods, is_connector_data_method from extended_data.containers import ExtendedList from extended_data.containers.factory import to_builtin +from extended_data.io import wrap_raw_data_for_export from extended_data.io.files import decode_file from extended_data.primitives.formats.errors import DataDecodeError from extended_data.primitives.redaction import redact_sensitive_text @@ -46,7 +46,7 @@ def _json_output(data: Any) -> str: data = dict(data) elif hasattr(data, "__iter__") and not isinstance(data, (str, bytes, bytearray)): data = [d.model_dump() if hasattr(d, "model_dump") else d for d in data] - return json.dumps(data, indent=2, default=str) + return wrap_raw_data_for_export(data, allow_encoding="json", indent_2=True, default=str) def _parse_arg_value(value: str) -> Any: diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index d337002..91d7779 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -22,7 +22,6 @@ import builtins import inspect -import json import sys from collections.abc import Callable, Iterable, Mapping @@ -31,6 +30,7 @@ from extended_data.connectors.registry import _list_connector_classes, get_connector from extended_data.connectors.surface import connector_data_methods from extended_data.containers import to_builtin +from extended_data.io import wrap_raw_data_for_export from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text @@ -120,6 +120,11 @@ def _unknown_tool_text(name: str) -> str: return f"Unknown tool: {redact_sensitive_text(name)}" +def _tool_result_text(result: Any) -> str: + """Return a serialized MCP tool result through the shared export boundary.""" + return wrap_raw_data_for_export(_jsonable_tool_result(result), allow_encoding="json", indent_2=True, default=str) + + def create_server() -> Any: """Create the unified MCP server with all registered connectors.""" try: @@ -199,7 +204,7 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: if inspect.iscoroutine(result): result = await result - return [TextContent(type="text", text=json.dumps(_jsonable_tool_result(result), indent=2, default=str))] + return [TextContent(type="text", text=_tool_result_text(result))] except Exception as e: return [TextContent(type="text", text=_tool_error_text(e, arguments.values()))] diff --git a/src/extended_data/connectors/meshy/mcp.py b/src/extended_data/connectors/meshy/mcp.py index 4870d62..ab479fc 100644 --- a/src/extended_data/connectors/meshy/mcp.py +++ b/src/extended_data/connectors/meshy/mcp.py @@ -29,12 +29,11 @@ from __future__ import annotations -import json - from collections.abc import Callable, Iterable, Mapping from typing import Any, cast from extended_data.containers import to_builtin +from extended_data.io import wrap_raw_data_for_export from extended_data.primitives.redaction import redact_sensitive_data, redact_sensitive_text @@ -274,6 +273,16 @@ def _tool_error_payload(error: object) -> dict[str, str]: return {"error": redact_sensitive_text(error)} +def _tool_payload_text(payload: Any) -> str: + """Return a serialized MCP text payload through the shared export boundary.""" + return wrap_raw_data_for_export(payload, allow_encoding="json", indent_2=True) + + +def _tool_result_text(result: Any) -> str: + """Return a serialized Meshy MCP result through the shared export boundary.""" + return _tool_payload_text(_jsonable_tool_result(result)) + + def create_server() -> Any: """Create an MCP server with Meshy AI tools. @@ -313,18 +322,18 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]: return [ TextContent( type="text", - text=json.dumps(_tool_error_payload(f"Unknown tool: {name}")), + text=_tool_payload_text(_tool_error_payload(f"Unknown tool: {name}")), ) ] try: result = handler(**arguments) - return [TextContent(type="text", text=json.dumps(_jsonable_tool_result(result), indent=2))] + return [TextContent(type="text", text=_tool_result_text(result))] except Exception as e: return [ TextContent( type="text", - text=json.dumps(_tool_error_payload(e), indent=2), + text=_tool_payload_text(_tool_error_payload(e)), ) ] diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 231c727..d6f98e6 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -30,7 +30,6 @@ from __future__ import annotations -import json import shutil import subprocess @@ -41,7 +40,7 @@ from extended_data.connectors.base import VendorConnectorBase from extended_data.containers import ExtendedDict, extend_data, to_builtin -from extended_data.io import DataFile +from extended_data.io import DataFile, wrap_raw_data_for_export from extended_data.io.files import decode_file from extended_data.logging import Logging from extended_data.primitives.formats.errors import DataDecodeError @@ -109,7 +108,11 @@ def from_cli_output(cls, output: dict[str, Any]) -> SyncResult: secrets_unchanged=safe_output.get("secrets_unchanged", 0), duration_ms=safe_output.get("duration_ms", 0), error_message=safe_output.get("error_message", ""), - results_json=json.dumps(safe_output.get("results", [])), + results_json=wrap_raw_data_for_export( + safe_output.get("results", []), + allow_encoding="json", + indent_2=True, + ), diff_output=safe_output.get("diff_output", ""), ) diff --git a/tests/connectors/meshy/test_meshy_mcp.py b/tests/connectors/meshy/test_meshy_mcp.py index 0d775d0..0941f32 100644 --- a/tests/connectors/meshy/test_meshy_mcp.py +++ b/tests/connectors/meshy/test_meshy_mcp.py @@ -2,7 +2,10 @@ from __future__ import annotations -from extended_data.connectors.meshy.mcp import _jsonable_tool_result, _tool_error_payload +from unittest.mock import patch + +from extended_data.connectors.meshy import mcp as meshy_mcp_module +from extended_data.connectors.meshy.mcp import _jsonable_tool_result, _tool_error_payload, _tool_result_text from extended_data.containers import ExtendedDict, ExtendedSet @@ -23,6 +26,24 @@ def test_meshy_mcp_result_lowers_and_redacts_extended_payloads() -> None: assert sorted(result["tags"]) == ["asset", "model"] +def test_meshy_mcp_result_text_uses_shared_export_boundary() -> None: + """Meshy MCP text payloads should serialize through the Tier 3 export boundary.""" + payload = ExtendedDict({"service": {"name": "meshy"}}) + + with patch( + "extended_data.connectors.meshy.mcp.wrap_raw_data_for_export", + wraps=meshy_mcp_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export: + text = _tool_result_text(payload) + + assert '"service": {' in text + mock_wrap_for_export.assert_called_once_with( + {"service": {"name": "meshy"}}, + allow_encoding="json", + indent_2=True, + ) + + def test_meshy_mcp_error_payload_redacts_sensitive_values() -> None: """Meshy MCP errors should not return raw secret-bearing exception text.""" payload = _tool_error_payload(RuntimeError("failed api_key=key_123 Bearer raw_token")) diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 60401ad..f59c587 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -9,6 +9,7 @@ import pytest +from extended_data.connectors import cli as cli_module from extended_data.connectors.cli import cmd_call, cmd_info, cmd_list, cmd_methods, main from extended_data.containers import ExtendedDict @@ -143,12 +144,16 @@ def test_cli_call_serializes_extended_containers_as_data() -> None: with ( patch("extended_data.connectors.cli.get_connector_class", return_value=ExampleConnector), patch("extended_data.connectors.cli.get_connector", return_value=connector), + patch("extended_data.connectors.cli.wrap_raw_data_for_export", wraps=cli_module.wrap_raw_data_for_export) + as mock_wrap_for_export, patch("sys.stdout.write") as mock_write, ): exit_code = cmd_call(args) assert exit_code == 0 assert json.loads(mock_write.call_args.args[0]) == {"service": {"name": "api"}} + mock_wrap_for_export.assert_called_once() + assert mock_wrap_for_export.call_args.kwargs == {"allow_encoding": "json", "indent_2": True, "default": str} def test_cli_call_redacts_sensitive_json_output() -> None: @@ -277,10 +282,9 @@ def test_cli_call_redacts_explicit_argument_values_from_errors() -> None: assert output.count("[REDACTED]") >= 3 -@patch("extended_data.connectors.cli.json.loads") -def test_cli_call_decodes_json_arguments_through_data_boundary(mock_json_loads: MagicMock) -> None: +@patch("extended_data.connectors.cli.decode_file", wraps=cli_module.decode_file) +def test_cli_call_decodes_json_arguments_through_data_boundary(mock_decode_file: MagicMock) -> None: """Structured CLI method arguments should use the shared data decoder.""" - mock_json_loads.side_effect = AssertionError("CLI arguments must be decoded through decode_file") args = argparse.Namespace( connector="example", method="fetch", @@ -299,6 +303,7 @@ def test_cli_call_decodes_json_arguments_through_data_boundary(mock_json_loads: assert exit_code == 0 connector.fetch.assert_called_once_with(metadata={"service": {"name": "api"}}) + mock_decode_file.assert_called_once_with('{"service": {"name": "api"}}', suffix="json", as_extended=False) def test_cli_main_help() -> None: diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index 2b44982..c870b27 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -2,12 +2,16 @@ from __future__ import annotations +from unittest.mock import patch + import pytest +from extended_data.connectors import mcp as mcp_module from extended_data.connectors.mcp import ( _get_public_methods, _jsonable_tool_result, _tool_error_text, + _tool_result_text, _unknown_tool_text, create_server, ) @@ -45,6 +49,25 @@ def test_jsonable_tool_result_lowers_extended_mapping_payloads() -> None: assert _jsonable_tool_result(payload) == {"service": {"name": "api"}} +def test_tool_result_text_uses_shared_export_boundary() -> None: + """MCP text payloads should serialize through the Tier 3 export boundary.""" + payload = ExtendedDict({"service": {"name": "api"}}) + + with patch( + "extended_data.connectors.mcp.wrap_raw_data_for_export", + wraps=mcp_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export: + text = _tool_result_text(payload) + + assert '"service": {' in text + mock_wrap_for_export.assert_called_once_with( + {"service": {"name": "api"}}, + allow_encoding="json", + indent_2=True, + default=str, + ) + + def test_jsonable_tool_result_redacts_sensitive_mapping_payloads() -> None: """MCP result serialization should not bypass connector redaction.""" payload = ExtendedDict({"password": "hunter2", "nested": {"api_key": "key_123"}}) diff --git a/tests/connectors/test_secrets.py b/tests/connectors/test_secrets.py index bed9cbf..5e12d1c 100644 --- a/tests/connectors/test_secrets.py +++ b/tests/connectors/test_secrets.py @@ -7,6 +7,7 @@ import pytest import yaml +from extended_data.connectors import secrets as secrets_module from extended_data.connectors.secrets import ( ConfigInfo, OutputFormat, @@ -248,25 +249,40 @@ def test_cli_run_pipeline_parses_result_envelope(mock_run: MagicMock, connector: assert result["diff_output"] == '{"summary":{"added":1}}' -@patch("extended_data.connectors.secrets.json.loads") +@patch("extended_data.connectors.secrets.decode_file", wraps=secrets_module.decode_file) @patch("subprocess.run") def test_cli_run_pipeline_decodes_result_envelope_through_data_boundary( mock_run: MagicMock, - mock_json_loads: MagicMock, + mock_decode_file: MagicMock, connector: SecretsConnector, ) -> None: """SecretSync JSON envelopes should use shared data decoding, not local json.loads.""" + stdout = json.dumps({"success": True, "results": [{"target": "prod"}]}) mock_run.return_value = MagicMock( returncode=0, - stdout=json.dumps({"success": True, "results": [{"target": "prod"}]}), + stdout=stdout, stderr="", ) - mock_json_loads.side_effect = AssertionError("SecretSync CLI output must be decoded through decode_file") result = connector.run_pipeline("config.yaml") assert result["success"] is True assert '"target": "prod"' in result["results_json"] + mock_decode_file.assert_called_once_with(stdout, suffix="json", as_extended=True) + + +def test_sync_result_results_json_uses_shared_export_boundary() -> None: + """SecretSync result details should serialize through the shared export boundary.""" + output = {"success": True, "results": [{"target": "prod"}]} + + with patch( + "extended_data.connectors.secrets.wrap_raw_data_for_export", + wraps=secrets_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export: + result = SyncResult.from_cli_output(output) + + assert '"target": "prod"' in result.results_json + mock_wrap_for_export.assert_called_once_with(output["results"], allow_encoding="json", indent_2=True) @patch("subprocess.run") From a3a7bbdb47ae03633029df40b8588067d1462ea4 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 21:56:06 -0500 Subject: [PATCH 251/287] fix: encode vector metadata through export boundary --- README.md | 3 ++- docs/package-surface.md | 3 ++- .../meshy/persistence/vector_store.py | 3 ++- tests/connectors/meshy/test_vector_store.py | 18 ++++++++++++++++++ 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d2ba89e..9695c3e 100644 --- a/README.md +++ b/README.md @@ -167,7 +167,8 @@ dispatch. Google service-account strings and Meshy persisted manifests/metadata follow that same decode path instead of parsing JSON in connector-local code. AWS S3 JSON object writes and Meshy manifest writes use the shared export -boundary as well, keeping connector persistence aligned with Tier 3 file data. +boundary as well, keeping connector persistence aligned with Tier 3 file data; +Meshy vector-store metadata follows the same path. CLI JSON output, MCP tool results, and SecretSync `results_json` use that same export boundary after redaction. Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 diff --git a/docs/package-surface.md b/docs/package-surface.md index 43ae258..7087b01 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -336,7 +336,8 @@ connector payload decoding. Google service-account strings and Meshy persisted manifests/metadata use that same boundary, so connector-local reads do not grow private JSON parsers. AWS S3 JSON object writes and Meshy manifest writes go through the shared export -boundary, so connector persistence uses the same Tier 3 data-file encoding path. +boundary, so connector persistence uses the same Tier 3 data-file encoding path; +Meshy vector-store metadata follows the same path. CLI JSON output, MCP tool results, and SecretSync `results_json` are exported through the same path after redaction. Serialized CLI/MCP boundaries apply Tier 1 redaction after Tier 2 containers diff --git a/src/extended_data/connectors/meshy/persistence/vector_store.py b/src/extended_data/connectors/meshy/persistence/vector_store.py index e511d56..471df14 100644 --- a/src/extended_data/connectors/meshy/persistence/vector_store.py +++ b/src/extended_data/connectors/meshy/persistence/vector_store.py @@ -49,6 +49,7 @@ from typing_extensions import Self from extended_data.containers import ExtendedDict, ExtendedList, extend_data +from extended_data.io import wrap_raw_data_for_export from extended_data.io.files import decode_file from extended_data.primitives.formats.errors import DataDecodeError @@ -269,7 +270,7 @@ def record_generation( return cast(ExtendedDict, extend_data(_record_payload(self._row_to_record(row)))) # Insert new record - metadata_json = json.dumps(metadata) if metadata else None + metadata_json = wrap_raw_data_for_export(metadata, allow_encoding="json") if metadata else None cursor = conn.execute( """ diff --git a/tests/connectors/meshy/test_vector_store.py b/tests/connectors/meshy/test_vector_store.py index 4433120..02b33ca 100644 --- a/tests/connectors/meshy/test_vector_store.py +++ b/tests/connectors/meshy/test_vector_store.py @@ -5,6 +5,7 @@ import sys from types import ModuleType +from unittest.mock import patch from extended_data.connectors.meshy.persistence import vector_store as vector_store_module from extended_data.connectors.meshy.persistence.vector_store import VectorStore, get_embedding @@ -89,6 +90,23 @@ def fail_local_json_loads(*_: object) -> object: assert record["metadata"]["source"] == "test" +def test_record_metadata_encodes_through_export_boundary(temp_dir) -> None: + """Persisted metadata should use the shared JSON export boundary on writes.""" + with VectorStore(temp_dir / "assets.db") as store: + with patch( + "extended_data.connectors.meshy.persistence.vector_store.wrap_raw_data_for_export", + wraps=vector_store_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export: + store.record_generation( + spec_hash="hash-abc", + prompt="cute otter character", + project="project1", + metadata={"source": "test"}, + ) + + mock_wrap_for_export.assert_called_once_with({"source": "test"}, allow_encoding="json") + + def test_search_text_and_list_pending_return_extended_payloads(temp_dir) -> None: """Search and pending queries should return extended lists of mappings.""" with VectorStore(temp_dir / "assets.db") as store: From 69e91698a4917694b89fa01bd737df7b9c3c4b6d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:00:15 -0500 Subject: [PATCH 252/287] fix: route final serializers through export boundary --- README.md | 2 ++ docs/package-surface.md | 2 ++ .../connectors/github/__init__.py | 9 +----- src/extended_data/logging/logging.py | 4 +-- .../test_github_workflow_builder.py | 29 ++++++++++++------- tests/logging/test_exit_run.py | 7 +++++ 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 9695c3e..61422ce 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,8 @@ boundary as well, keeping connector persistence aligned with Tier 3 file data; Meshy vector-store metadata follows the same path. CLI JSON output, MCP tool results, and SecretSync `results_json` use that same export boundary after redaction. +GitHub workflow YAML generation and `Logging.exit_run()` stdout serialization +also route through the shared exporter. Serialized CLI/MCP boundaries and connector API error messages reuse the Tier 1 redaction primitives for common secret-bearing keys and token-shaped strings. CLI and MCP connector calls pass method arguments through `values=[...]` as diff --git a/docs/package-surface.md b/docs/package-surface.md index 7087b01..23aecf7 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -340,6 +340,8 @@ boundary, so connector persistence uses the same Tier 3 data-file encoding path; Meshy vector-store metadata follows the same path. CLI JSON output, MCP tool results, and SecretSync `results_json` are exported through the same path after redaction. +GitHub workflow YAML generation and `Logging.exit_run()` stdout serialization +also route through the shared exporter. Serialized CLI/MCP boundaries apply Tier 1 redaction after Tier 2 containers are lowered to JSON-compatible data, and connector API error messages use the same redaction policy before exceptions are raised. Common secret-bearing keys diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index 56a533c..1aad132 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -2,15 +2,12 @@ from __future__ import annotations -import io import os from collections.abc import Mapping, Sequence from copy import deepcopy from typing import Any -from ruamel.yaml import YAML - from extended_data.connectors._optional import require_extra from extended_data.connectors.base import VendorConnectorBase from extended_data.connectors.github._diagnostics import safe_github_ref, safe_github_text @@ -1053,11 +1050,7 @@ def build_github_actions_workflow( if concurrency_group: workflow["concurrency"] = concurrency_group - yaml = YAML() - yaml.indent(mapping=2, sequence=4, offset=2) - buffer = io.StringIO() - yaml.dump(workflow, buffer) - return buffer.getvalue().strip() + return wrap_raw_data_for_export(workflow, allow_encoding="yaml").strip() from extended_data.connectors.github.tools import ( diff --git a/src/extended_data/logging/logging.py b/src/extended_data/logging/logging.py index fc9f47f..fb84b6e 100644 --- a/src/extended_data/logging/logging.py +++ b/src/extended_data/logging/logging.py @@ -27,8 +27,6 @@ cast, ) -import orjson - from extended_data.containers import ExtendedDict, ExtendedSet, to_builtin from extended_data.io import wrap_raw_data_for_export from extended_data.logging.const import VERBOSITY @@ -652,7 +650,7 @@ def encode_result_with_base64(r: Any) -> str: if not isinstance(data, str): self.logger.info("Dumping results to JSON") - data = orjson.dumps(data, default=str).decode("utf-8") + data = wrap_raw_data_for_export(data, allow_encoding="json", default=str) sys.stdout.write(data) sys.exit(0) diff --git a/tests/connectors/test_github_workflow_builder.py b/tests/connectors/test_github_workflow_builder.py index 91775a3..451e692 100644 --- a/tests/connectors/test_github_workflow_builder.py +++ b/tests/connectors/test_github_workflow_builder.py @@ -2,8 +2,11 @@ from __future__ import annotations +from unittest.mock import patch + from ruamel.yaml import YAML +from extended_data.connectors import github as github_module from extended_data.connectors.github import build_github_actions_workflow @@ -18,21 +21,27 @@ def test_build_github_actions_workflow_generates_yaml(): } } - workflow_yaml = build_github_actions_workflow( - workflow_name="CI", - jobs=jobs, - concurrency_group="ci-main", - environment_variables={"FOO": "bar"}, - secrets={"TOKEN": "GITHUB_TOKEN"}, - events={"push": True, "pull_request": False}, - inputs={"run-tests": {"required": False, "type": "boolean", "default": True}}, - ) + with patch( + "extended_data.connectors.github.wrap_raw_data_for_export", + wraps=github_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export: + workflow_yaml = build_github_actions_workflow( + workflow_name="CI", + jobs=jobs, + concurrency_group="ci-main", + environment_variables={"FOO": "bar"}, + secrets={"TOKEN": "GITHUB_TOKEN"}, + events={"push": True, "pull_request": False}, + inputs={"run-tests": {"required": False, "type": "boolean", "default": True}}, + ) parsed = YAML().load(workflow_yaml) assert parsed["name"] == "CI" assert parsed["concurrency"] == "ci-main" assert parsed["env"]["FOO"] == "bar" - assert parsed["env"]["TOKEN"] == "${{ secrets.GITHUB_TOKEN }}" + assert parsed["env"]["TOKEN"] == "${{secrets.GITHUB_TOKEN}}" assert "workflow_dispatch" in parsed["on"] assert parsed["jobs"]["build"]["steps"][1]["run"] == "pytest" + mock_wrap_for_export.assert_called_once() + assert mock_wrap_for_export.call_args.kwargs == {"allow_encoding": "yaml"} diff --git a/tests/logging/test_exit_run.py b/tests/logging/test_exit_run.py index 68a47ca..fa1699f 100644 --- a/tests/logging/test_exit_run.py +++ b/tests/logging/test_exit_run.py @@ -11,6 +11,8 @@ import pytest +import extended_data.logging.logging as logging_module + from extended_data.logging import ExitRunError, Logging @@ -311,12 +313,17 @@ def test_exit_run_writes_to_stdout_and_exits(self, logger: Logging, tmp_path: Pa with ( patch("sys.stdout.write") as mock_write, patch("sys.exit") as mock_exit, + patch( + "extended_data.logging.logging.wrap_raw_data_for_export", + wraps=logging_module.wrap_raw_data_for_export, + ) as mock_wrap_for_export, ): logger.exit_run(results) mock_write.assert_called_once() written = mock_write.call_args[0][0] assert json.loads(written) == results mock_exit.assert_called_once_with(0) + mock_wrap_for_export.assert_any_call(results, allow_encoding="json", default=str) def test_exit_run_wraps_in_key(self, logger: Logging, tmp_path: Path) -> None: """Test that exit_run wraps results in specified key.""" From a211eb697dc49cfb6d9f8ed61498c5e4d97d468c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:08:25 -0500 Subject: [PATCH 253/287] refactor: rename connector base API --- README.md | 6 +-- docs/package-surface.md | 4 +- examples/connectors/README.md | 4 +- pyproject.toml | 2 +- src/extended_data/__init__.py | 8 ++-- src/extended_data/connectors/__init__.py | 4 +- src/extended_data/connectors/_optional.py | 2 +- .../connectors/anthropic/__init__.py | 4 +- src/extended_data/connectors/aws/__init__.py | 4 +- src/extended_data/connectors/base.py | 12 ++--- src/extended_data/connectors/connectors.py | 8 ++-- .../connectors/cursor/__init__.py | 4 +- .../connectors/github/__init__.py | 4 +- .../connectors/google/__init__.py | 6 +-- src/extended_data/connectors/google/jules.py | 4 +- .../connectors/meshy/connector.py | 4 +- src/extended_data/connectors/registry.py | 22 +++++----- .../connectors/secrets/__init__.py | 6 +-- .../connectors/slack/__init__.py | 6 +-- src/extended_data/connectors/surface.py | 2 +- .../connectors/vault/__init__.py | 4 +- src/extended_data/connectors/zoom/__init__.py | 4 +- tests/connectors/test_base.py | 4 +- .../test_connector_payload_contracts.py | 44 +++++++++---------- tests/core/test_package_surface.py | 5 +++ tests/core/test_release_hygiene.py | 3 +- 26 files changed, 93 insertions(+), 87 deletions(-) diff --git a/README.md b/README.md index 61422ce..7a2de20 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ extended_data/ io/ Tier 3 file, import, export, and base64 processors inputs/ InputProvider and decorator-based input injection logging/ structured lifecycle logging - connectors/ Tier 3 ConnectorFabric and vendor adapters + connectors/ Tier 3 ConnectorFabric and data adapters secrets/ SecretSync CLI bridge and typed result exports workflows/ Tier 3 higher-order workflow composition ``` @@ -128,10 +128,10 @@ remaining migration work fails fast. Tier 1 public exports stay function-oriented; use `get_default_dict()` for nested or sorted default mappings instead of importing the internal helper class. -Vendor connectors are first-class adapters in the data fabric. `ConnectorFabric` +Connectors are first-class adapters in the data fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with -generic vendor lookup. `list_connectors()` returns registered connectors whose +generic connector lookup. `list_connectors()` returns registered connectors whose runtime requirements are installed; use `list_connector_info()` for the full catalog, including known connectors that need an `extended-data[...]` extra. Secret-like cache key fields such as `token`, `api_key`, `password`, and diff --git a/docs/package-surface.md b/docs/package-surface.md index 23aecf7..1586d1f 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -252,7 +252,7 @@ collections. `exit_run()` formatting failures also report a redacted result snapshot and suppress the internal formatting exception chain so diagnostics do not echo raw payload data. -`ConnectorFabric` caches and coordinates vendor connectors while sharing input +`ConnectorFabric` caches and coordinates registered connectors while sharing input loading, logging, data normalization, retry behavior, and serialization. `AWSConnector` and `GoogleConnector` are unified connector classes in this major version: common S3, Organizations, SSO, Workspace, Cloud Resource @@ -308,7 +308,7 @@ fields such as `token`, `api_key`, `password`, and `client_secret` before storing cache entries, so cache inspection and debug output do not expose raw credential material. -Connectors that inherit `VendorConnectorBase` can keep raw transport access with +Connectors that inherit `ConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other verb-specific helpers to decode HTTP JSON, YAML, TOML, HCL, or text responses through the same Tier 2 container bridge used by file and input decoding. diff --git a/examples/connectors/README.md b/examples/connectors/README.md index ae3ac96..85ae1c3 100644 --- a/examples/connectors/README.md +++ b/examples/connectors/README.md @@ -1,9 +1,9 @@ # Connector Examples This directory contains working examples for `extended_data.connectors` and the -vendor adapters that hang off `ConnectorFabric`. +registered adapters that hang off `ConnectorFabric`. -Connector examples assume the major-version `extended-data` contract: vendor +Connector examples assume the major-version `extended-data` contract: external data payloads are promoted into Tier 2 containers at connector boundaries. Callers can use `ExtendedDict`, `ExtendedList`, and `ExtendedString` methods on decoded API, file, and SDK-shaped results, then call `to_builtin()` only when a diff --git a/pyproject.toml b/pyproject.toml index 2f0a566..a775f8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "extended-data" version = "7.0.0" -description = "Comprehensive Python data utilities for serialization, inputs, logging, vendor data, and workflows" +description = "Comprehensive Python data utilities for serialization, inputs, logging, external data, and workflows" requires-python = ">=3.10" license = { text = "MIT" } readme = "README.md" diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 38d64ac..aa69290 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -1,7 +1,7 @@ """Extended Data. This package provides Python utilities for structured data primitives, inputs, -logging, vendor data connectors, and workflow-oriented integrations. +logging, external data connectors, and workflow-oriented integrations. """ from __future__ import annotations @@ -53,6 +53,7 @@ from extended_data.connectors import ( AnthropicConnector, AWSConnector, + ConnectorBase, ConnectorFabric, ConnectorInfo, CursorConnector, @@ -62,7 +63,6 @@ MeshyConnector, SlackConnector, VaultConnector, - VendorConnectorBase, ZoomConnector, get_connector, get_connector_class, @@ -96,7 +96,7 @@ "SyncOptions": ("extended_data.secrets", "SyncOptions"), "SyncResult": ("extended_data.secrets", "SyncResult"), "VaultConnector": ("extended_data.connectors", "VaultConnector"), - "VendorConnectorBase": ("extended_data.connectors", "VendorConnectorBase"), + "ConnectorBase": ("extended_data.connectors", "ConnectorBase"), "ZoomConnector": ("extended_data.connectors", "ZoomConnector"), "directed_inputs": ("extended_data.inputs", "directed_inputs"), "get_connector": ("extended_data.connectors", "get_connector"), @@ -122,6 +122,7 @@ def __getattr__(name: str) -> Any: __all__ = [ "AWSConnector", "AnthropicConnector", + "ConnectorBase", "ConnectorFabric", "ConnectorInfo", "CursorConnector", @@ -150,7 +151,6 @@ def __getattr__(name: str) -> Any: "SyncOptions", "SyncResult", "VaultConnector", - "VendorConnectorBase", "WorkflowAction", "WorkflowResult", "WorkflowStep", diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index eecbd0b..03eabe6 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -58,7 +58,7 @@ class MyConnector(AWSConnector): AWSS3Mixin, AWSSSOmixin, ) -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.connectors.cloud_params import ( get_aws_call_params, get_cloud_call_params, @@ -90,6 +90,7 @@ class MyConnector(AWSConnector): "AWSS3Mixin", "AWSSSOmixin", "AnthropicConnector", + "ConnectorBase", "ConnectorFabric", "ConnectorInfo", "CursorConnector", @@ -104,7 +105,6 @@ class MyConnector(AWSConnector): "SecretsConnector", "SlackConnector", "VaultConnector", - "VendorConnectorBase", "ZoomConnector", "__version__", "get_aws_call_params", diff --git a/src/extended_data/connectors/_optional.py b/src/extended_data/connectors/_optional.py index f761dd9..97fe29c 100644 --- a/src/extended_data/connectors/_optional.py +++ b/src/extended_data/connectors/_optional.py @@ -25,7 +25,7 @@ # Mapping of package names to their extras PACKAGE_TO_EXTRA: dict[str, str] = { - # Vendor connectors + # Connector extras "boto3": "aws", "google.cloud": "google", "google.api_core": "google", diff --git a/src/extended_data/connectors/anthropic/__init__.py b/src/extended_data/connectors/anthropic/__init__.py index cb037f6..69b371b 100644 --- a/src/extended_data/connectors/anthropic/__init__.py +++ b/src/extended_data/connectors/anthropic/__init__.py @@ -37,7 +37,7 @@ from pydantic import BaseModel, ConfigDict, Field, ValidationError -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, extend_data, to_builtin from extended_data.logging import Logging from extended_data.primitives.redaction import redact_sensitive_text @@ -198,7 +198,7 @@ class AgentExecutionResult: # ============================================================================= -class AnthropicConnector(VendorConnectorBase): +class AnthropicConnector(ConnectorBase): """Anthropic Claude API connector. Provides HTTP client access to Anthropic's Claude AI API for message diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 701f072..13561cb 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -24,7 +24,7 @@ from extended_data.connectors.aws.organizations import AWSOrganizationsMixin from extended_data.connectors.aws.s3 import AWSS3Mixin from extended_data.connectors.aws.sso import AWSSSOmixin -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging from extended_data.primitives import is_nothing @@ -60,7 +60,7 @@ def _load_aws_sdk() -> Any: return boto3 -class AWSConnector(AWSOrganizationsMixin, AWSSSOmixin, AWSS3Mixin, VendorConnectorBase): +class AWSConnector(AWSOrganizationsMixin, AWSSSOmixin, AWSS3Mixin, ConnectorBase): """AWS connector for boto3 client, resource, and vendor data operations. This first-class connector provides: diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 4675b3a..b36ad6e 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -1,6 +1,6 @@ """Base class for all extended data connectors. -This module provides VendorConnectorBase - the foundation for ALL connectors +This module provides ConnectorBase - the foundation for ALL connectors in the package connector fabric. It extends InputProvider and provides: 1. Credential loading from env vars, stdin, or direct inputs @@ -11,9 +11,9 @@ ALL connectors should extend this class instead of InputProvider directly. Usage: - from extended_data import ExtendedDict, VendorConnectorBase + from extended_data import ExtendedDict, ConnectorBase - class MyConnector(VendorConnectorBase): + class MyConnector(ConnectorBase): API_KEY_ENV = "MY_API_KEY" # Required env var name BASE_URL = "https://api.example.com" @@ -79,7 +79,7 @@ def __init__(self, message: str, status_code: int | None = None): self.status_code = status_code -class VendorConnectorBase(InputProvider, ABC): +class ConnectorBase(InputProvider, ABC): """Base class for all extended data connectors. Provides: @@ -114,8 +114,8 @@ class VendorConnectorBase(InputProvider, ABC): # Each subclass gets its own lock and timestamp to avoid cross-connector interference. # This is intentionally class-level (not instance-level) so all instances of the same # connector type share rate limiting, but different connector types are independent. - _rate_limit_locks: ClassVar[dict[builtins.type[VendorConnectorBase], threading.Lock]] = {} - _last_request_times: ClassVar[dict[builtins.type[VendorConnectorBase], float]] = {} + _rate_limit_locks: ClassVar[dict[builtins.type[ConnectorBase], threading.Lock]] = {} + _last_request_times: ClassVar[dict[builtins.type[ConnectorBase], float]] = {} def __init__( self, diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index 49407c4..b1f9cba 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -1,4 +1,4 @@ -"""ConnectorFabric - cached vendor connector access for Extended Data.""" +"""ConnectorFabric - cached connector access for Extended Data.""" from __future__ import annotations @@ -54,7 +54,7 @@ def _cache_safe_value(name: str, value: Any) -> Any: # Optional connectors - imported lazily when methods are called -# This allows the package to be imported without all vendor SDKs installed +# This allows the package to be imported without all optional SDKs installed if TYPE_CHECKING: import boto3 @@ -73,7 +73,7 @@ def _cache_safe_value(name: str, value: Any) -> Any: class ConnectorFabric(InputProvider): """Public API for extended data connectors with client caching. - This class provides cached access to registered vendor connectors while + This class provides cached access to registered connectors while sharing input snapshots, lifecycle logging, and data normalization. Usage: @@ -134,7 +134,7 @@ def get_connector(self, name: str, **kwargs: Any) -> Any: The connector receives the fabric's shared inputs and logger unless explicit values are passed in ``kwargs``. This is the generic path for - vendor adapters that are registered through entry points or built-ins. + connectors that are registered through entry points or built-ins. """ connector_name = name.strip().lower() cache_kwargs = {"name": connector_name, **kwargs} diff --git a/src/extended_data/connectors/cursor/__init__.py b/src/extended_data/connectors/cursor/__init__.py index 5e23d5a..fd1976e 100644 --- a/src/extended_data/connectors/cursor/__init__.py +++ b/src/extended_data/connectors/cursor/__init__.py @@ -34,7 +34,7 @@ from pydantic import BaseModel, ConfigDict, Field, ValidationError -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, to_builtin from extended_data.logging import Logging from extended_data.primitives.redaction import redact_sensitive_text @@ -339,7 +339,7 @@ def sanitize_error(error: Any, *, values: Iterable[Any] | None = None) -> str: # ============================================================================= -class CursorConnector(VendorConnectorBase): +class CursorConnector(ConnectorBase): """Cursor Background Agent API connector. Provides HTTP client access to Cursor's agent management API for spawning, diff --git a/src/extended_data/connectors/github/__init__.py b/src/extended_data/connectors/github/__init__.py index 1aad132..185b85a 100644 --- a/src/extended_data/connectors/github/__init__.py +++ b/src/extended_data/connectors/github/__init__.py @@ -9,7 +9,7 @@ from typing import Any from extended_data.connectors._optional import require_extra -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.connectors.github._diagnostics import safe_github_ref, safe_github_text from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple from extended_data.io import ( @@ -81,7 +81,7 @@ def get_github_api_error(exc: BaseException) -> str | None: DEFAULT_PER_PAGE = 100 -class GitHubConnector(VendorConnectorBase): +class GitHubConnector(ConnectorBase): """GitHub connector for repository operations.""" def __init__( diff --git a/src/extended_data/connectors/google/__init__.py b/src/extended_data/connectors/google/__init__.py index 6243c16..abb957b 100644 --- a/src/extended_data/connectors/google/__init__.py +++ b/src/extended_data/connectors/google/__init__.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, cast from extended_data.connectors._optional import require_extra -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.connectors.google.billing import GoogleBillingMixin from extended_data.connectors.google.cloud import GoogleCloudMixin from extended_data.connectors.google.services import GoogleServicesMixin @@ -57,7 +57,7 @@ class GoogleConnector( GoogleCloudMixin, GoogleBillingMixin, GoogleServicesMixin, - VendorConnectorBase, + ConnectorBase, ): """Google Cloud and Workspace connector. @@ -84,7 +84,7 @@ def __init__( scopes: OAuth scopes to request. Defaults to common scopes. subject: Email to impersonate via domain-wide delegation. logger: Optional Logging instance. - **kwargs: Additional arguments passed to VendorConnectorBase. + **kwargs: Additional arguments passed to ConnectorBase. """ super().__init__(logger=logger, **kwargs) _load_google_sdk() diff --git a/src/extended_data/connectors/google/jules.py b/src/extended_data/connectors/google/jules.py index ec6dce4..397c3ae 100644 --- a/src/extended_data/connectors/google/jules.py +++ b/src/extended_data/connectors/google/jules.py @@ -35,7 +35,7 @@ from pydantic import BaseModel, Field, ValidationError -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.connectors.google._diagnostics import safe_google_text from extended_data.containers import ExtendedDict, ExtendedList, to_builtin from extended_data.primitives.redaction import redact_sensitive_data @@ -130,7 +130,7 @@ def __init__(self, message: str, code: int = 0, details: Any = None): self.details = details -class JulesConnector(VendorConnectorBase): +class JulesConnector(ConnectorBase): """Connector for Google Jules AI Agent API. Provides methods to interact with Jules for automated coding tasks. diff --git a/src/extended_data/connectors/meshy/connector.py b/src/extended_data/connectors/meshy/connector.py index 9043a36..38a0024 100644 --- a/src/extended_data/connectors/meshy/connector.py +++ b/src/extended_data/connectors/meshy/connector.py @@ -8,12 +8,12 @@ from typing import Any -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.connectors.meshy import animate, image3d, retexture, rigging, text3d from extended_data.containers import ExtendedDict, ExtendedString -class MeshyConnector(VendorConnectorBase): +class MeshyConnector(ConnectorBase): """Meshy AI 3D generation connector. Provides access to text-to-3D, image-to-3D, rigging, animation, and retexturing. diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 44385c2..5168188 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -1,9 +1,9 @@ -"""Vendor Connector Registry with Entry Points. +"""Connector Registry with Entry Points. This module provides automatic discovery and registration of extended data connectors using Python's entry points system. This allows: -1. DRY interface via VendorConnectorBase ABC +1. DRY interface via ConnectorBase ABC 2. Automatic discovery of all connectors (even from other packages) 3. Unified factory function for instantiation 4. Same registry used by both MCP and CLI @@ -46,7 +46,7 @@ if TYPE_CHECKING: - from extended_data.connectors.base import VendorConnectorBase + from extended_data.connectors.base import ConnectorBase @dataclass(frozen=True) @@ -111,7 +111,7 @@ def as_dict(self) -> ExtendedDict: # Cache for discovered connectors -_connector_cache: dict[str, builtins.type[VendorConnectorBase]] | None = None +_connector_cache: dict[str, builtins.type[ConnectorBase]] | None = None _missing_builtin_connectors: dict[str, ImportError] = {} @@ -120,14 +120,14 @@ def _normalize_connector_name(name: str) -> str: return name.strip().lower() -def _discover_connectors() -> dict[str, builtins.type[VendorConnectorBase]]: +def _discover_connectors() -> dict[str, builtins.type[ConnectorBase]]: """Discover all registered connectors via entry points.""" global _connector_cache if _connector_cache is not None: return _connector_cache - connectors: dict[str, builtins.type[VendorConnectorBase]] = {} + connectors: dict[str, builtins.type[ConnectorBase]] = {} # Python 3.10+ uses importlib.metadata from importlib.metadata import entry_points @@ -187,7 +187,7 @@ def _raise_unregistered_builtin_connector(name: str) -> NoReturn: ) -def _list_connector_classes() -> dict[str, builtins.type[VendorConnectorBase]]: +def _list_connector_classes() -> dict[str, builtins.type[ConnectorBase]]: """List available connector classes for internal tool registration.""" return _discover_connectors().copy() @@ -207,7 +207,7 @@ def list_connectors() -> ExtendedList[ExtendedString]: ) -def get_connector_class(name: str) -> builtins.type[VendorConnectorBase]: +def get_connector_class(name: str) -> builtins.type[ConnectorBase]: """Get a connector class by name. Args: @@ -239,7 +239,7 @@ def get_connector_class(name: str) -> builtins.type[VendorConnectorBase]: return connectors[name_lower] -def get_connector(name: str, **kwargs: Any) -> VendorConnectorBase: +def get_connector(name: str, **kwargs: Any) -> ConnectorBase: """Factory to instantiate a connector by name. Args: @@ -267,7 +267,7 @@ def clear_cache() -> None: _missing_builtin_connectors.clear() -def _get_description(cls: builtins.type[VendorConnectorBase]) -> str | None: +def _get_description(cls: builtins.type[ConnectorBase]) -> str | None: """Get the first useful line from a connector docstring.""" if not cls.__doc__: return None @@ -278,7 +278,7 @@ def _get_description(cls: builtins.type[VendorConnectorBase]) -> str | None: return None -def _available_connector_info(name: str, cls: builtins.type[VendorConnectorBase]) -> ConnectorInfo: +def _available_connector_info(name: str, cls: builtins.type[ConnectorBase]) -> ConnectorInfo: """Build metadata for a loadable connector.""" spec = BUILTIN_CONNECTORS.get(name) source = "builtin" if spec else "entry_point" diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index d6f98e6..0c5579c 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -38,7 +38,7 @@ from pathlib import Path from typing import Any -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.containers import ExtendedDict, extend_data, to_builtin from extended_data.io import DataFile, wrap_raw_data_for_export from extended_data.io.files import decode_file @@ -140,7 +140,7 @@ def to_dict(self) -> ExtendedDict: return extend_data(asdict(self)) -class SecretsConnector(VendorConnectorBase): +class SecretsConnector(ConnectorBase): """Enterprise-grade SecretSync connector. This connector wraps the standalone SecretSync project @@ -168,7 +168,7 @@ def __init__( Args: cli_path: Path to secretsync CLI binary (for CLI mode) logger: Logger instance - **kwargs: Passed to VendorConnectorBase + **kwargs: Passed to ConnectorBase """ super().__init__(logger=logger, **kwargs) diff --git a/src/extended_data/connectors/slack/__init__.py b/src/extended_data/connectors/slack/__init__.py index c701b49..bc770cb 100644 --- a/src/extended_data/connectors/slack/__init__.py +++ b/src/extended_data/connectors/slack/__init__.py @@ -24,7 +24,7 @@ def _batched(iterable: Iterable[Any], n: int) -> Iterator[tuple[Any, ...]]: from extended_data.connectors._optional import require_extra -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, extend_data, to_builtin from extended_data.io import wrap_raw_data_for_export from extended_data.logging import Logging @@ -204,7 +204,7 @@ def get_rich_text_blocks( return extend_data([{"type": "rich_text", "elements": elements}, get_divider()]) -class SlackConnector(VendorConnectorBase): +class SlackConnector(ConnectorBase): """Slack connector for messaging, directory, and channel management.""" def __init__( @@ -220,7 +220,7 @@ def __init__( token: Slack user token with directory scopes. bot_token: Bot token used for posting messages. logger: Optional shared logger instance. - **kwargs: Extra keyword arguments forwarded to VendorConnectorBase. + **kwargs: Extra keyword arguments forwarded to ConnectorBase. """ super().__init__(logger=logger, **kwargs) _load_slack_sdk() diff --git a/src/extended_data/connectors/surface.py b/src/extended_data/connectors/surface.py index 6e722e8..a3f56fb 100644 --- a/src/extended_data/connectors/surface.py +++ b/src/extended_data/connectors/surface.py @@ -31,7 +31,7 @@ def is_connector_data_method(method: Any) -> bool: return False qualname = getattr(method, "__qualname__", "") - if qualname.startswith(("VendorConnectorBase.", "InputProvider.")): + if qualname.startswith(("ConnectorBase.", "InputProvider.")): return False return annotation_includes_extended_payload(return_annotation(method)) diff --git a/src/extended_data/connectors/vault/__init__.py b/src/extended_data/connectors/vault/__init__.py index 32cbc1b..1cb2bdc 100644 --- a/src/extended_data/connectors/vault/__init__.py +++ b/src/extended_data/connectors/vault/__init__.py @@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any from extended_data.connectors._optional import require_extra -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.logging import Logging from extended_data.primitives import is_nothing @@ -69,7 +69,7 @@ def _iter_diagnostic_values(values: Iterable[Any]) -> Iterable[Any]: yield value -class VaultConnector(VendorConnectorBase): +class VaultConnector(ConnectorBase): """Vault connector with token and AppRole authentication.""" def __init__( diff --git a/src/extended_data/connectors/zoom/__init__.py b/src/extended_data/connectors/zoom/__init__.py index 26a7030..aa46e2a 100644 --- a/src/extended_data/connectors/zoom/__init__.py +++ b/src/extended_data/connectors/zoom/__init__.py @@ -9,7 +9,7 @@ import requests -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.containers import ExtendedDict, ExtendedList, to_builtin from extended_data.io.files import decode_file from extended_data.logging import Logging @@ -50,7 +50,7 @@ def _zoom_response_error(action: str, data: Any, *sensitive_values: Any) -> Runt return RuntimeError(f"{action}: {_safe_zoom_text(data, *sensitive_values)}") -class ZoomConnector(VendorConnectorBase): +class ZoomConnector(ConnectorBase): """Zoom connector for user management.""" def __init__( diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index 4366725..72a6390 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -11,13 +11,13 @@ from pydantic import BaseModel, Field -from extended_data.connectors.base import ConnectorAPIError, RateLimitError, VendorConnectorBase +from extended_data.connectors.base import ConnectorAPIError, ConnectorBase, RateLimitError from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.io import DataFile from extended_data.logging import Logging -class ExampleConnector(VendorConnectorBase): +class ExampleConnector(ConnectorBase): """Small connector used to exercise the base class.""" BASE_URL = "https://api.example.com" diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 0a4f73d..15aa6f6 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -17,7 +17,7 @@ from extended_data.connectors.aws.organizations import AWSOrganizationsMixin from extended_data.connectors.aws.s3 import AWSS3Mixin from extended_data.connectors.aws.sso import AWSSSOmixin -from extended_data.connectors.base import VendorConnectorBase +from extended_data.connectors.base import ConnectorBase from extended_data.connectors.cursor import CursorConnector from extended_data.connectors.github import GitHubConnector from extended_data.connectors.google import GoogleConnector @@ -206,7 +206,7 @@ RAW_CONNECTOR_BOUNDARIES = { ("src/extended_data/connectors/ai_tools.py", "build_langchain_tools"), - ("src/extended_data/connectors/base.py", "VendorConnectorBase.get_tools"), + ("src/extended_data/connectors/base.py", "ConnectorBase.get_tools"), ("src/extended_data/connectors/surface.py", "connector_data_methods"), ("src/extended_data/connectors/zoom/__init__.py", "ZoomConnector.get_headers"), } @@ -240,26 +240,26 @@ } RAW_DATA_SURFACE_METHODS = ( - VendorConnectorBase.close, - VendorConnectorBase.decode_response_file, - VendorConnectorBase.delete, - VendorConnectorBase.delete_data, - VendorConnectorBase.download, - VendorConnectorBase.extend_result, - VendorConnectorBase.get, - VendorConnectorBase.get_ai_tool_definitions, - VendorConnectorBase.get_data, - VendorConnectorBase.get_tools, - VendorConnectorBase.handle_ai_tool_call, - VendorConnectorBase.patch, - VendorConnectorBase.patch_data, - VendorConnectorBase.post, - VendorConnectorBase.post_data, - VendorConnectorBase.put, - VendorConnectorBase.put_data, - VendorConnectorBase.request, - VendorConnectorBase.request_data, - VendorConnectorBase.request_data_file, + ConnectorBase.close, + ConnectorBase.decode_response_file, + ConnectorBase.delete, + ConnectorBase.delete_data, + ConnectorBase.download, + ConnectorBase.extend_result, + ConnectorBase.get, + ConnectorBase.get_ai_tool_definitions, + ConnectorBase.get_data, + ConnectorBase.get_tools, + ConnectorBase.handle_ai_tool_call, + ConnectorBase.patch, + ConnectorBase.patch_data, + ConnectorBase.post, + ConnectorBase.post_data, + ConnectorBase.put, + ConnectorBase.put_data, + ConnectorBase.request, + ConnectorBase.request_data, + ConnectorBase.request_data_file, InputProvider.freeze_inputs, InputProvider.get_input, InputProvider.merge_inputs, diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 702146e..6116585 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -94,9 +94,13 @@ def test_root_lazy_exports_do_not_describe_tier1_primitives() -> None: def test_clean_major_version_public_names() -> None: """The public surface uses integrated extended-data names.""" assert inputs.InputProvider.__name__ == "InputProvider" + assert connectors.ConnectorBase.__name__ == "ConnectorBase" assert connectors.ConnectorFabric is ConnectorFabric + assert extended_data.ConnectorBase is connectors.ConnectorBase assert not hasattr(inputs, "DirectedInputsClass") + assert not hasattr(connectors, "VendorConnectorBase") assert not hasattr(connectors, "VendorConnectors") + assert not hasattr(extended_data, "VendorConnectorBase") assert not hasattr(connectors, "AWSConnectorFull") assert not hasattr(connectors, "GoogleConnectorFull") assert not hasattr(connectors, "GoogleCloudConnector") @@ -147,6 +151,7 @@ def test_root_exports_first_class_integrated_surfaces() -> None: assert extended_data.DataWorkflow.__name__ == "DataWorkflow" assert extended_data.InputProvider is InputProvider assert extended_data.Logging is Logging + assert extended_data.ConnectorBase is connectors.ConnectorBase assert extended_data.ConnectorFabric is ConnectorFabric assert extended_data.ConnectorInfo.__name__ == "ConnectorInfo" assert extended_data.WorkflowResult.__name__ == "WorkflowResult" diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index b7d1e12..f3eee95 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -23,6 +23,7 @@ REPO_ROOT / "README.md", ) OLD_PROJECT_TERMS = ("terraform-modules", "TerraformDataSource") +OLD_PUBLIC_API_NAMES = ("VendorConnectorBase",) OLD_PACKAGE_NAMESPACES = ( "directed_inputs_class", "extended_data_types", @@ -155,7 +156,7 @@ def test_public_text_does_not_reference_old_project_origins() -> None: if path.suffix in {".pyc", ".png"}: continue text = path.read_text(encoding="utf-8") - for term in OLD_PROJECT_TERMS: + for term in (*OLD_PROJECT_TERMS, *OLD_PUBLIC_API_NAMES): if term in text: offenders.append(f"{path.relative_to(REPO_ROOT)}: {term}") From 4dbbc34f1df1f741ebd164b9ff587be885daab45 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:13:01 -0500 Subject: [PATCH 254/287] docs: frame connectors as external data surfaces --- README.md | 17 ++++++------ docs/package-surface.md | 14 +++++----- src/extended_data/connectors/__init__.py | 2 +- src/extended_data/connectors/aws/__init__.py | 2 +- tests/core/test_release_hygiene.py | 29 ++++++++++++++++++++ 5 files changed, 47 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7a2de20..716d3d9 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Extended Data Comprehensive Python data utilities for serialization, configuration inputs, -structured logging, vendor data connectors, and workflow-oriented integrations. +structured logging, external data connectors, and workflow-oriented integrations. This repository is the clean major-version consolidation of the previous `extended-data-library` Python packages. The old package namespaces are not @@ -79,7 +79,7 @@ Connector names are normalized before lookup. If a known built-in connector is requested without its optional extra installed, the registry raises an `ImportError` with the matching `extended-data[...]` install target. -Inspect connector availability before wiring vendor workflows: +Inspect connector availability before wiring external data workflows: ```python names = connectors.list_connectors() @@ -143,7 +143,7 @@ operations live on those connectors directly rather than on separate Google registry names are unified as well: use `google` for Workspace, Cloud, Billing, and service discovery rather than split `google_*` connector aliases. AWS Secrets Manager prefix loading is exposed as the generic -`load_secrets_by_prefix()` data method rather than as a vendor-specific helper. +`load_secrets_by_prefix()` data method rather than as a service-specific helper. AWS secret listing/deletion and Vault role filtering APIs use the canonical `prefix` keyword; the old `name_prefix` convenience keyword is intentionally not preserved. @@ -178,10 +178,11 @@ redaction primitives for common secret-bearing keys and token-shaped strings. CLI and MCP connector calls pass method arguments through `values=[...]` as context-sensitive diagnostic data, and connectors can add their own operation-specific values for resource IDs, paths, URLs, emails, prompt text, or -vendor handles. Connector data methods can return structured vendor payloads -without making stdout, tool responses, logs, or raised transport errors a -secret leak by default. Raw SDK/client objects and raw transport responses -remain available from the methods that explicitly return them. +external payload handles. Connector data methods can return structured +connector payloads without making stdout, tool responses, logs, or raised +transport errors a secret leak by default. Raw SDK/client objects and raw +transport responses remain available from the methods that explicitly return +them. The `secrets` connector integrates with the standalone SecretSync project (`jbcom/secrets-sync`) through the `secretsync` CLI. It expects @@ -208,7 +209,7 @@ The package is intentionally tiered: `tuple`, or `MutableSet`-compatible primitives and expose ergonomic methods over Tier 1 functions. - Tier 3 processors use the first two tiers to handle files, inputs, API data, - vendor integrations, and workflows. + external integrations, and workflows. Tier 3 decoders return Tier 2 containers by default, so data files, Base64 payloads, and directed inputs can immediately use diff --git a/docs/package-surface.md b/docs/package-surface.md index 1586d1f..a646d96 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -49,7 +49,7 @@ from extended_data.primitives import ( `UserString`, `UserDict`, `UserList`, immutable `tuple`, or `MutableSet`-compatible bases depending on the underlying data shape. - Tier 3 processors use the first two tiers to handle files, imports, exports, - inputs, API data, vendor integrations, and workflows. + inputs, API data, external integrations, and workflows. Clean major-version primitive names, including JSON/YAML/TOML/HCL codecs, live under `extended_data.primitives` and prefer explicit Python words over @@ -65,7 +65,7 @@ internal sorted-default mapping helper class. Use `redact_sensitive_text()` and `redact_sensitive_data()` when diagnostics or JSON-like payloads need common secret-bearing keys and token-shaped strings removed before display. Pass `values=[...]` when a caller knows additional -context-specific identifiers, such as emails, paths, URLs, or vendor resource +context-specific identifiers, such as emails, paths, URLs, or external resource IDs, must be withheld as well; URL-encoded forms of those values are redacted too. @@ -265,7 +265,7 @@ discovery operations. Split `google_cloud`, `google_workspace`, and `google_billing` connector aliases are intentionally not preserved. AWS Secrets Manager prefix loading is generic too: use `AWSConnector.load_secrets_by_prefix()` when a workflow needs a promoted mapping -of secret names to values. The old vendor-specific ASM loader name is +of secret names to values. The old service-specific ASM loader name is intentionally not preserved. AWS secret listing/deletion and Vault role filtering use the canonical `prefix` keyword. The old `name_prefix` convenience keyword is intentionally not @@ -298,7 +298,7 @@ stripped and lowercased before lookup. Every built-in connector class registered by name is also exported from `extended_data` and `extended_data.connectors`. Those exports are real classes, -not `None` sentinels. Vendor SDKs load when connector instances need them, so +not `None` sentinels. Optional SDKs load when connector instances need them, so package import remains lightweight while missing optional extras still fail at the operation boundary with install guidance. `list_connectors()` reports the registered connectors whose runtime requirements are installed; use @@ -318,7 +318,7 @@ validation or redaction boundaries. Use `request_data_file()` when an API workflow needs the decoded data plus non-secret response provenance such as source URL, HTTP status, content type, method, and endpoint in a `DataFile` artifact. -Connector methods that return vendor data payloads should call +Connector methods that return external data payloads should call `extend_result()` at the return boundary, making SDK-shaped dictionaries, lists, decoded repository files, GraphQL results, and workflow-builder output first-class `ExtendedDict`, `ExtendedList`, `ExtendedTuple`, and @@ -351,7 +351,7 @@ such as `password`, `api_key`, `access_token`, `authorization`, and errors expose them. CLI and MCP connector calls pass method arguments through `values=[...]` as context-specific diagnostic data, and connectors can add their own operation-specific values for resource IDs, paths, URLs, emails, prompt -text, or vendor payload handles that are sensitive only in that operation. +text, or external payload handles that are sensitive only in that operation. LangChain, CrewAI, Strands, and auto-detection factory functions still return plain framework tool object lists. @@ -400,7 +400,7 @@ extended-data methods github --json ## Optional Integrations -Install only the vendor or AI layers you need: +Install only the external service or AI layers you need: ```bash pip install "extended-data[aws,github,vault]" diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index 03eabe6..3918541 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -66,7 +66,7 @@ class MyConnector(AWSConnector): ) from extended_data.connectors.connectors import ConnectorFabric -# Built-in connector classes; vendor SDKs are loaded by connector instances. +# Built-in connector classes; optional SDKs are loaded by connector instances. from extended_data.connectors.cursor import CursorConnector from extended_data.connectors.github import GitHubConnector from extended_data.connectors.google import ( diff --git a/src/extended_data/connectors/aws/__init__.py b/src/extended_data/connectors/aws/__init__.py index 13561cb..eb81e78 100644 --- a/src/extended_data/connectors/aws/__init__.py +++ b/src/extended_data/connectors/aws/__init__.py @@ -61,7 +61,7 @@ def _load_aws_sdk() -> Any: class AWSConnector(AWSOrganizationsMixin, AWSSSOmixin, AWSS3Mixin, ConnectorBase): - """AWS connector for boto3 client, resource, and vendor data operations. + """AWS connector for boto3 client, resource, and external data operations. This first-class connector provides: - Session management and role assumption diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index f3eee95..9c556b8 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -33,6 +33,18 @@ REMOVED_PUBLIC_KEYWORDS = ("prefer_native", "unhump_results") FUTURE_API_PROMISES = ("will be available", "coming soon") BOOTSTRAP_TEXT_MARKERS = ("(NEW)",) +IMPRECISE_VENDOR_FRAMING = ( + "vendor data connectors", + "vendor workflows", + "vendor integrations", + "vendor-specific", + "vendor data payloads", + "vendor data operations", + "vendor payload handles", + "vendor resource", + "structured vendor payloads", + "vendor or AI layers", +) SECRETSSYNC_PROJECT_PATTERNS = ( re.compile(r"\bsecretssync\s+(?:Go\s+)?(?:project|library|repo|repository|CLI|connector|bindings?)\b", re.IGNORECASE), re.compile(r"\b(?:project|library|repo|repository|CLI|connector|bindings?)\s+secretssync\b", re.IGNORECASE), @@ -337,6 +349,23 @@ def test_public_guidance_does_not_use_removed_runtime_keywords() -> None: assert offenders == [] +def test_public_guidance_uses_integrated_connector_framing() -> None: + """Public docs should frame connectors as integrated external-data surfaces.""" + offenders: list[str] = [] + paths = [REPO_ROOT / "README.md"] + paths.extend(path for root in (REPO_ROOT / "docs", REPO_ROOT / "examples", REPO_ROOT / "src") for path in root.rglob("*")) + + for path in sorted(path for path in paths if path.is_file()): + if path.suffix in {".pyc", ".png"}: + continue + text = path.read_text(encoding="utf-8") + for phrase in IMPRECISE_VENDOR_FRAMING: + if phrase in text: + offenders.append(f"{path.relative_to(REPO_ROOT)}: {phrase}") + + assert offenders == [] + + def test_public_text_does_not_promise_future_api_surfaces() -> None: """Clean-break docs should describe current surfaces instead of placeholders.""" offenders: list[str] = [] From 3d5bda49a44f18de18cc9ee29bce707eb8ebb509 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:16:53 -0500 Subject: [PATCH 255/287] fix: support keyed mappings in extended dict update --- src/extended_data/containers/mappings.py | 13 ++++++++++--- tests/core/test_containers.py | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/extended_data/containers/mappings.py b/src/extended_data/containers/mappings.py index bbf2fad..2b4f02a 100644 --- a/src/extended_data/containers/mappings.py +++ b/src/extended_data/containers/mappings.py @@ -65,9 +65,16 @@ def update(self, *args: Any, **kwargs: Any) -> None: # type: ignore[misc] if args: other = args[0] - items = other.items() if hasattr(other, "items") else other - for key, value in items: - self[key] = value + if hasattr(other, "items"): + for key, value in other.items(): + self[key] = value + elif hasattr(other, "keys") and hasattr(other, "__getitem__"): + keys = other.keys() + for key in keys: + self[key] = other[key] + else: + for key, value in other: + self[key] = value for key, value in kwargs.items(): self[key] = value diff --git a/tests/core/test_containers.py b/tests/core/test_containers.py index d329208..ca33595 100644 --- a/tests/core/test_containers.py +++ b/tests/core/test_containers.py @@ -227,6 +227,28 @@ def test_extended_dict_promotes_nested_values_on_mutation() -> None: assert value["service"]["name"].upper_first() == "Api" +def test_extended_dict_update_accepts_keys_getitem_mappings() -> None: + """Mapping-like objects should route through __setitem__ promotion.""" + + class KeyedMapping: + def __init__(self) -> None: + self._data = {"service": {"name": "api"}} + + def keys(self) -> list[str]: + return list(self._data) + + def __getitem__(self, key: str) -> object: + return self._data[key] + + value = ExtendedDict() + + value.update(KeyedMapping()) + + assert isinstance(value["service"], ExtendedDict) + assert isinstance(value["service"]["name"], ExtendedString) + assert value["service"]["name"].upper_first() == "Api" + + def test_extended_list_composes_sequence_primitives() -> None: """ExtendedList composes Tier 1 sequence primitives.""" value = ExtendedList([1, [2, [3]], "", 2]) From fc789a66cc9c3e028f2f6fe104b779303f5f91ad Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:29:49 -0500 Subject: [PATCH 256/287] docs: frame extended data as standalone package --- README.md | 7 +++---- docs/package-surface.md | 4 ++-- tests/core/test_release_hygiene.py | 23 ++++++++++++++++++++++- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 716d3d9..3cd1797 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,8 @@ Comprehensive Python data utilities for serialization, configuration inputs, structured logging, external data connectors, and workflow-oriented integrations. -This repository is the clean major-version consolidation of the previous -`extended-data-library` Python packages. The old package namespaces are not -preserved; the public API now lives under `extended_data`. +The public API lives under one `extended_data` namespace with explicit tiers for +pure primitives, extended containers, and higher-order data processors. ## Install @@ -124,7 +123,7 @@ string conversion. Use `redact_sensitive_text()` and IDs, emails, paths, or URLs, must be withheld in addition to common secret fields. The old `bytestostr` and `strto*` helper names are not preserved. Old package import namespaces are not shimmed; missing imports are intentional so -remaining migration work fails fast. +incorrect imports fail fast. Tier 1 public exports stay function-oriented; use `get_default_dict()` for nested or sorted default mappings instead of importing the internal helper class. diff --git a/docs/package-surface.md b/docs/package-surface.md index a646d96..a81bf35 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -442,8 +442,8 @@ incompatible CrewAI installs fail with the same user-managed install guidance. Optional dependency checks live in `extended_data.connectors._optional`; there are no old package compatibility shims in the public API. Missing old imports -are intentional in this major version so unfinished migration work stays -visible. When a known built-in connector is requested without its optional extra +are intentional in this major version so incorrect callers fail loudly. When a +known built-in connector is requested without its optional extra installed, the registry raises an `ImportError` with the exact `extended-data[...]` install target instead of reporting the connector as unknown. Built-in connectors must also be registered through the diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 9c556b8..fa11c5f 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -22,7 +22,7 @@ REPO_ROOT / "examples", REPO_ROOT / "README.md", ) -OLD_PROJECT_TERMS = ("terraform-modules", "TerraformDataSource") +OLD_PROJECT_TERMS = ("extended-data-library", "terraform-modules", "TerraformDataSource") OLD_PUBLIC_API_NAMES = ("VendorConnectorBase",) OLD_PACKAGE_NAMESPACES = ( "directed_inputs_class", @@ -33,6 +33,10 @@ REMOVED_PUBLIC_KEYWORDS = ("prefer_native", "unhump_results") FUTURE_API_PROMISES = ("will be available", "coming soon") BOOTSTRAP_TEXT_MARKERS = ("(NEW)",) +EXTRACTION_ERA_FRAMING = ( + "remaining migration work", + "unfinished migration work", +) IMPRECISE_VENDOR_FRAMING = ( "vendor data connectors", "vendor workflows", @@ -366,6 +370,23 @@ def test_public_guidance_uses_integrated_connector_framing() -> None: assert offenders == [] +def test_public_guidance_uses_standalone_package_framing() -> None: + """Public docs should not frame Extended Data as an extraction artifact.""" + offenders: list[str] = [] + paths = [REPO_ROOT / "README.md"] + paths.extend(path for root in (REPO_ROOT / "docs", REPO_ROOT / "examples", REPO_ROOT / "src") for path in root.rglob("*")) + + for path in sorted(path for path in paths if path.is_file()): + if path.suffix in {".pyc", ".png"}: + continue + text = path.read_text(encoding="utf-8") + for phrase in EXTRACTION_ERA_FRAMING: + if phrase in text: + offenders.append(f"{path.relative_to(REPO_ROOT)}: {phrase}") + + assert offenders == [] + + def test_public_text_does_not_promise_future_api_surfaces() -> None: """Clean-break docs should describe current surfaces instead of placeholders.""" offenders: list[str] = [] From 4991d55e355b82d7111779f6140006baa9b0ec05 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:33:36 -0500 Subject: [PATCH 257/287] fix: type generic fabric connector lookup --- src/extended_data/connectors/connectors.py | 3 ++- tests/core/test_package_surface.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index b1f9cba..ebaf57c 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any # Import zoom directly (no extra deps) +from extended_data.connectors.base import ConnectorBase from extended_data.connectors.registry import ( get_connector_class, ) @@ -129,7 +130,7 @@ def get_connector_info(self, name: str, *, include_unavailable: bool = True) -> """Get catalog metadata for one connector.""" return get_registered_connector_info(name, include_unavailable=include_unavailable) - def get_connector(self, name: str, **kwargs: Any) -> Any: + def get_connector(self, name: str, **kwargs: Any) -> ConnectorBase: """Get a cached connector instance by registry name. The connector receives the fabric's shared inputs and logger unless diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 6116585..f158b5f 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -170,6 +170,7 @@ def test_root_exports_first_class_integrated_surfaces() -> None: assert isinstance(connector_names[0], ExtendedString) assert get_type_hints(connectors.list_connectors)["return"] == ExtendedList[ExtendedString] assert get_type_hints(ConnectorFabric.list_connectors)["return"] == ExtendedList[ExtendedString] + assert get_type_hints(ConnectorFabric.get_connector)["return"] is connectors.ConnectorBase assert "cursor" in connector_names From 4f6c2a828d7760c31824e6a15591472bbffe4228 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:42:35 -0500 Subject: [PATCH 258/287] feat: add connector catalog metadata queries --- README.md | 13 +- docs/package-surface.md | 19 ++- src/extended_data/__init__.py | 12 ++ src/extended_data/connectors/__init__.py | 8 + src/extended_data/connectors/connectors.py | 38 +++++ src/extended_data/connectors/registry.py | 170 +++++++++++++++++++-- tests/connectors/test_connectors.py | 20 +++ tests/core/test_package_surface.py | 24 +++ 8 files changed, 285 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 3cd1797..96f0f73 100644 --- a/README.md +++ b/README.md @@ -84,11 +84,17 @@ Inspect connector availability before wiring external data workflows: names = connectors.list_connectors() catalog = connectors.list_connector_info() github_info = connectors.get_connector_info("github") +cloud_connectors = connectors.list_connectors_by_category("cloud") +repository_connectors = connectors.list_connectors_by_capability("repositories") ``` `list_connectors()` returns an `ExtendedList` of available connector names. -Use `list_connector_info()` when a workflow needs availability, extra, install, -class, module, and description metadata. +Use `list_connector_info()` when a workflow needs availability, category, +capability, extra, install, class, module, and description metadata. Use +`list_connector_categories()`, `list_connector_capabilities()`, +`list_connectors_by_category()`, and `list_connectors_by_capability()` when a +workflow needs to select integrations by data domain instead of hard-coding a +single connector name. The same catalog is available from the CLI: @@ -133,6 +139,9 @@ context, caches connector instances, and lets specialized helpers coexist with generic connector lookup. `list_connectors()` returns registered connectors whose runtime requirements are installed; use `list_connector_info()` for the full catalog, including known connectors that need an `extended-data[...]` extra. +Catalog entries include normalized categories and capabilities so workflows can +select cloud, AI, communications, development, media, or secrets integrations +without string matching class names. Secret-like cache key fields such as `token`, `api_key`, `password`, and `client_secret` are hashed before they are stored in the fabric cache. `AWSConnector` and `GoogleConnector` are unified first-class classes: S3, diff --git a/docs/package-surface.md b/docs/package-surface.md index a81bf35..c923e6c 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -303,10 +303,14 @@ package import remains lightweight while missing optional extras still fail at the operation boundary with install guidance. `list_connectors()` reports the registered connectors whose runtime requirements are installed; use `list_connector_info()` when tooling needs the complete catalog plus missing -dependency and install guidance. `ConnectorFabric` hashes secret-like cache-key -fields such as `token`, `api_key`, `password`, and `client_secret` before -storing cache entries, so cache inspection and debug output do not expose raw -credential material. +dependency and install guidance. Catalog entries include normalized categories +and capabilities; `list_connector_categories()`, +`list_connector_capabilities()`, `list_connectors_by_category()`, and +`list_connectors_by_capability()` let workflows select integrations by data +domain or supported operation without parsing class names. `ConnectorFabric` +hashes secret-like cache-key fields such as `token`, `api_key`, `password`, and +`client_secret` before storing cache entries, so cache inspection and debug +output do not expose raw credential material. Connectors that inherit `ConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other @@ -385,11 +389,14 @@ run in the current environment: names = fabric.list_connectors() catalog = fabric.list_connector_info() github_info = fabric.get_connector_info("github") +cloud_connectors = fabric.list_connectors_by_category("cloud") +repository_connectors = fabric.list_connectors_by_capability("repositories") ``` `list_connectors()` returns an `ExtendedList` of available connector names. -Each catalog entry includes availability, source, extra name, install command, -required packages, missing packages, module, class, and description fields. +Each catalog entry includes availability, source, category, capabilities, extra +name, install command, required packages, missing packages, module, class, and +description fields. The installed CLI exposes the same discovery layer for shell automation: ```bash diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index aa69290..3b68012 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -67,8 +67,12 @@ get_connector, get_connector_class, get_connector_info, + list_connector_capabilities, + list_connector_categories, list_connector_info, list_connectors, + list_connectors_by_capability, + list_connectors_by_category, ) from extended_data.inputs import InputProvider, directed_inputs, input_config from extended_data.logging import ExitRunError, KeyTransform, Logging @@ -103,8 +107,12 @@ "get_connector_class": ("extended_data.connectors", "get_connector_class"), "get_connector_info": ("extended_data.connectors", "get_connector_info"), "input_config": ("extended_data.inputs", "input_config"), + "list_connector_capabilities": ("extended_data.connectors", "list_connector_capabilities"), + "list_connector_categories": ("extended_data.connectors", "list_connector_categories"), "list_connector_info": ("extended_data.connectors", "list_connector_info"), "list_connectors": ("extended_data.connectors", "list_connectors"), + "list_connectors_by_capability": ("extended_data.connectors", "list_connectors_by_capability"), + "list_connectors_by_category": ("extended_data.connectors", "list_connectors_by_category"), } @@ -174,8 +182,12 @@ def __getattr__(name: str) -> Any: "get_tld", "input_config", "is_url", + "list_connector_capabilities", + "list_connector_categories", "list_connector_info", "list_connectors", + "list_connectors_by_capability", + "list_connectors_by_category", "make_raw_data_export_safe", "match_file_extensions", "read_data_file", diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index 3918541..302cb9d 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -113,8 +113,12 @@ class MyConnector(AWSConnector): "get_connector_class", "get_connector_info", "get_google_call_params", + "list_connector_capabilities", + "list_connector_categories", "list_connector_info", "list_connectors", + "list_connectors_by_capability", + "list_connectors_by_category", "meshy", ] @@ -124,6 +128,10 @@ class MyConnector(AWSConnector): get_connector, get_connector_class, get_connector_info, + list_connector_capabilities, + list_connector_categories, list_connector_info, list_connectors, + list_connectors_by_capability, + list_connectors_by_category, ) diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index ebaf57c..e8671f8 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -14,12 +14,24 @@ from extended_data.connectors.registry import ( get_connector_info as get_registered_connector_info, ) +from extended_data.connectors.registry import ( + list_connector_capabilities as list_registered_connector_capabilities, +) +from extended_data.connectors.registry import ( + list_connector_categories as list_registered_connector_categories, +) from extended_data.connectors.registry import ( list_connector_info as list_registered_connector_info, ) from extended_data.connectors.registry import ( list_connectors as list_registered_connectors, ) +from extended_data.connectors.registry import ( + list_connectors_by_capability as list_registered_connectors_by_capability, +) +from extended_data.connectors.registry import ( + list_connectors_by_category as list_registered_connectors_by_category, +) from extended_data.connectors.zoom import ZoomConnector from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.inputs import InputProvider @@ -126,6 +138,32 @@ def list_connector_info(self, *, include_unavailable: bool = True) -> ExtendedLi """List connector catalog metadata.""" return list_registered_connector_info(include_unavailable=include_unavailable) + def list_connector_categories(self, *, include_unavailable: bool = True) -> ExtendedList[ExtendedString]: + """List connector catalog categories.""" + return list_registered_connector_categories(include_unavailable=include_unavailable) + + def list_connector_capabilities(self, *, include_unavailable: bool = True) -> ExtendedList[ExtendedString]: + """List connector catalog capabilities.""" + return list_registered_connector_capabilities(include_unavailable=include_unavailable) + + def list_connectors_by_category( + self, + category: str, + *, + include_unavailable: bool = True, + ) -> ExtendedList[ExtendedDict]: + """List connector catalog metadata for a category.""" + return list_registered_connectors_by_category(category, include_unavailable=include_unavailable) + + def list_connectors_by_capability( + self, + capability: str, + *, + include_unavailable: bool = True, + ) -> ExtendedList[ExtendedDict]: + """List connector catalog metadata for a capability.""" + return list_registered_connectors_by_capability(capability, include_unavailable=include_unavailable) + def get_connector_info(self, name: str, *, include_unavailable: bool = True) -> ExtendedDict: """Get catalog metadata for one connector.""" return get_registered_connector_info(name, include_unavailable=include_unavailable) diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 5168188..2af1edf 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -56,6 +56,8 @@ class BuiltinConnectorSpec: module_path: str class_name: str extra: str + category: str = "external" + capabilities: tuple[str, ...] = () @dataclass(frozen=True) @@ -66,6 +68,8 @@ class ConnectorInfo: available: bool source: str extra: str | None + category: str + capabilities: tuple[str, ...] install: str | None requirements: tuple[str, ...] missing: tuple[str, ...] @@ -82,6 +86,8 @@ def as_dict(self) -> ExtendedDict: "available": self.available, "source": self.source, "extra": self.extra, + "category": self.category, + "capabilities": list(self.capabilities), "install": self.install, "requirements": list(self.requirements), "missing": list(self.missing), @@ -95,18 +101,84 @@ def as_dict(self) -> ExtendedDict: BUILTIN_CONNECTORS: dict[str, BuiltinConnectorSpec] = { # Google connectors - "jules": BuiltinConnectorSpec("extended_data.connectors.google.jules", "JulesConnector", "google"), - "google": BuiltinConnectorSpec("extended_data.connectors.google", "GoogleConnector", "google"), + "jules": BuiltinConnectorSpec( + "extended_data.connectors.google.jules", + "JulesConnector", + "google", + category="ai", + capabilities=("sources", "sessions"), + ), + "google": BuiltinConnectorSpec( + "extended_data.connectors.google", + "GoogleConnector", + "google", + category="cloud", + capabilities=("workspace", "cloud", "billing", "services", "iam"), + ), # Other connectors - "cursor": BuiltinConnectorSpec("extended_data.connectors.cursor", "CursorConnector", "cursor"), - "github": BuiltinConnectorSpec("extended_data.connectors.github", "GitHubConnector", "github"), - "meshy": BuiltinConnectorSpec("extended_data.connectors.meshy", "MeshyConnector", "meshy"), - "secrets": BuiltinConnectorSpec("extended_data.connectors.secrets", "SecretsConnector", "secrets"), - "anthropic": BuiltinConnectorSpec("extended_data.connectors.anthropic", "AnthropicConnector", "anthropic"), - "aws": BuiltinConnectorSpec("extended_data.connectors.aws", "AWSConnector", "aws"), - "slack": BuiltinConnectorSpec("extended_data.connectors.slack", "SlackConnector", "slack"), - "zoom": BuiltinConnectorSpec("extended_data.connectors.zoom", "ZoomConnector", "zoom"), - "vault": BuiltinConnectorSpec("extended_data.connectors.vault", "VaultConnector", "vault"), + "cursor": BuiltinConnectorSpec( + "extended_data.connectors.cursor", + "CursorConnector", + "cursor", + category="ai", + capabilities=("agents", "repositories", "models"), + ), + "github": BuiltinConnectorSpec( + "extended_data.connectors.github", + "GitHubConnector", + "github", + category="development", + capabilities=("repositories", "teams", "files", "graphql", "workflows"), + ), + "meshy": BuiltinConnectorSpec( + "extended_data.connectors.meshy", + "MeshyConnector", + "meshy", + category="media", + capabilities=("3d-generation", "animation", "rigging", "retexturing", "metadata"), + ), + "secrets": BuiltinConnectorSpec( + "extended_data.connectors.secrets", + "SecretsConnector", + "secrets", + category="secrets", + capabilities=("pipeline", "dry-run", "merge", "validation"), + ), + "anthropic": BuiltinConnectorSpec( + "extended_data.connectors.anthropic", + "AnthropicConnector", + "anthropic", + category="ai", + capabilities=("messages", "models", "tools"), + ), + "aws": BuiltinConnectorSpec( + "extended_data.connectors.aws", + "AWSConnector", + "aws", + category="cloud", + capabilities=("identity", "secrets", "storage", "organizations", "sso"), + ), + "slack": BuiltinConnectorSpec( + "extended_data.connectors.slack", + "SlackConnector", + "slack", + category="communications", + capabilities=("messages", "channels", "users", "usergroups"), + ), + "zoom": BuiltinConnectorSpec( + "extended_data.connectors.zoom", + "ZoomConnector", + "zoom", + category="communications", + capabilities=("users", "meetings"), + ), + "vault": BuiltinConnectorSpec( + "extended_data.connectors.vault", + "VaultConnector", + "vault", + category="secrets", + capabilities=("kv", "aws-iam", "leases"), + ), } @@ -120,6 +192,11 @@ def _normalize_connector_name(name: str) -> str: return name.strip().lower() +def _normalize_catalog_token(value: object) -> str: + """Normalize connector catalog categories and capabilities.""" + return str(value).strip().lower().replace("_", "-") + + def _discover_connectors() -> dict[str, builtins.type[ConnectorBase]]: """Discover all registered connectors via entry points.""" global _connector_cache @@ -278,6 +355,23 @@ def _get_description(cls: builtins.type[ConnectorBase]) -> str | None: return None +def _get_category(cls: builtins.type[ConnectorBase], spec: BuiltinConnectorSpec | None) -> str: + """Get normalized category metadata for a connector.""" + raw_category = spec.category if spec else getattr(cls, "CONNECTOR_CATEGORY", "external") + return _normalize_catalog_token(raw_category) or "external" + + +def _get_capabilities(cls: builtins.type[ConnectorBase], spec: BuiltinConnectorSpec | None) -> tuple[str, ...]: + """Get normalized capability metadata for a connector.""" + raw_capabilities = spec.capabilities if spec else getattr(cls, "CONNECTOR_CAPABILITIES", ()) + capabilities = ( + _normalize_catalog_token(capability) + for capability in raw_capabilities + if _normalize_catalog_token(capability) + ) + return tuple(dict.fromkeys(capabilities)) + + def _available_connector_info(name: str, cls: builtins.type[ConnectorBase]) -> ConnectorInfo: """Build metadata for a loadable connector.""" spec = BUILTIN_CONNECTORS.get(name) @@ -293,6 +387,8 @@ def _available_connector_info(name: str, cls: builtins.type[ConnectorBase]) -> C available=not missing, source=source, extra=extra, + category=_get_category(cls, spec), + capabilities=_get_capabilities(cls, spec), install=str(install_value) if install_value is not None else None, requirements=requirements, missing=missing, @@ -318,6 +414,8 @@ def _missing_builtin_connector_info(name: str, error: ImportError | None) -> Con available=False, source="builtin", extra=spec.extra, + category=spec.category, + capabilities=spec.capabilities, install=str(install) if (install := get_connector_install_command(name)) is not None else None, requirements=tuple(str(requirement) for requirement in get_connector_requirements(name)), missing=tuple(str(requirement) for requirement in get_missing_connector_requirements(name)), @@ -365,3 +463,53 @@ def list_connector_info(*, include_unavailable: bool = True) -> ExtendedList[Ext if not include_unavailable: return extend_data([connector for connector in info if connector["available"]]) return extend_data(info) + + +def list_connector_categories(*, include_unavailable: bool = True) -> ExtendedList[ExtendedString]: + """List connector catalog categories.""" + categories = { + str(connector["category"]) + for connector in list_connector_info(include_unavailable=include_unavailable) + if connector["category"] + } + return extend_data(sorted(categories)) + + +def list_connector_capabilities(*, include_unavailable: bool = True) -> ExtendedList[ExtendedString]: + """List connector catalog capabilities.""" + capabilities: set[str] = set() + for connector in list_connector_info(include_unavailable=include_unavailable): + capabilities.update(str(capability) for capability in connector["capabilities"]) + return extend_data(sorted(capabilities)) + + +def list_connectors_by_category( + category: str, + *, + include_unavailable: bool = True, +) -> ExtendedList[ExtendedDict]: + """List connector catalog entries for a category.""" + normalized = _normalize_catalog_token(category) + return extend_data( + [ + connector + for connector in list_connector_info(include_unavailable=include_unavailable) + if str(connector["category"]) == normalized + ], + ) + + +def list_connectors_by_capability( + capability: str, + *, + include_unavailable: bool = True, +) -> ExtendedList[ExtendedDict]: + """List connector catalog entries for a capability.""" + normalized = _normalize_catalog_token(capability) + return extend_data( + [ + connector + for connector in list_connector_info(include_unavailable=include_unavailable) + if normalized in {str(value) for value in connector["capabilities"]} + ], + ) diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 20e815c..d196954 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -182,9 +182,29 @@ def test_connector_fabric_exposes_catalog_info(self): assert "cursor" in names assert "github" in names github_info = vc.get_connector_info(" github ") + categories = vc.list_connector_categories() + capabilities = vc.list_connector_capabilities() + cloud_connectors = vc.list_connectors_by_category("cloud") + repository_connectors = vc.list_connectors_by_capability("repositories") connector_names = vc.list_connectors() assert isinstance(github_info, ExtendedDict) assert github_info["name"] == "github" + assert github_info["category"] == "development" + assert "repositories" in github_info["capabilities"] + assert isinstance(github_info["capabilities"], ExtendedList) + assert isinstance(github_info["capabilities"][0], ExtendedString) + assert isinstance(categories, ExtendedList) + assert isinstance(categories[0], ExtendedString) + assert "ai" in categories + assert "cloud" in categories + assert isinstance(capabilities, ExtendedList) + assert isinstance(capabilities[0], ExtendedString) + assert "repositories" in capabilities + assert isinstance(cloud_connectors, ExtendedList) + assert all(isinstance(connector, ExtendedDict) for connector in cloud_connectors) + assert {"aws", "google"} <= {connector["name"] for connector in cloud_connectors} + assert isinstance(repository_connectors, ExtendedList) + assert "github" in {connector["name"] for connector in repository_connectors} assert isinstance(connector_names, ExtendedList) assert isinstance(connector_names[0], ExtendedString) assert "cursor" in connector_names diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index f158b5f..5d1ff86 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -165,11 +165,35 @@ def test_root_exports_first_class_integrated_surfaces() -> None: assert callable(extended_data.read_data_file) assert callable(extended_data.get_connector) assert callable(extended_data.list_connector_info) + assert callable(extended_data.list_connector_categories) + assert callable(extended_data.list_connector_capabilities) + assert callable(extended_data.list_connectors_by_category) + assert callable(extended_data.list_connectors_by_capability) connector_names = extended_data.list_connectors() + connector_categories = extended_data.list_connector_categories() + connector_capabilities = extended_data.list_connector_capabilities() + cloud_connectors = extended_data.list_connectors_by_category("cloud") + repository_connectors = extended_data.list_connectors_by_capability("repositories") assert isinstance(connector_names, ExtendedList) assert isinstance(connector_names[0], ExtendedString) + assert isinstance(connector_categories, ExtendedList) + assert isinstance(connector_categories[0], ExtendedString) + assert "cloud" in connector_categories + assert isinstance(connector_capabilities, ExtendedList) + assert isinstance(connector_capabilities[0], ExtendedString) + assert "repositories" in connector_capabilities + assert isinstance(cloud_connectors, ExtendedList) + assert isinstance(cloud_connectors[0], ExtendedDict) + assert "aws" in {connector["name"] for connector in cloud_connectors} + assert isinstance(repository_connectors, ExtendedList) + assert isinstance(repository_connectors[0], ExtendedDict) + assert "github" in {connector["name"] for connector in repository_connectors} assert get_type_hints(connectors.list_connectors)["return"] == ExtendedList[ExtendedString] + assert get_type_hints(connectors.list_connector_categories)["return"] == ExtendedList[ExtendedString] + assert get_type_hints(connectors.list_connector_capabilities)["return"] == ExtendedList[ExtendedString] assert get_type_hints(ConnectorFabric.list_connectors)["return"] == ExtendedList[ExtendedString] + assert get_type_hints(ConnectorFabric.list_connector_categories)["return"] == ExtendedList[ExtendedString] + assert get_type_hints(ConnectorFabric.list_connector_capabilities)["return"] == ExtendedList[ExtendedString] assert get_type_hints(ConnectorFabric.get_connector)["return"] is connectors.ConnectorBase assert "cursor" in connector_names From e1c492a6399bff6b5c9f4994a035506fd56971b6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:46:38 -0500 Subject: [PATCH 259/287] test: document custom connector catalog metadata --- README.md | 3 +++ docs/package-surface.md | 3 +++ src/extended_data/connectors/base.py | 4 ++++ src/extended_data/connectors/registry.py | 8 +++----- tests/connectors/test_connectors.py | 25 ++++++++++++++++++++++++ 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 96f0f73..eca558d 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,9 @@ catalog, including known connectors that need an `extended-data[...]` extra. Catalog entries include normalized categories and capabilities so workflows can select cloud, AI, communications, development, media, or secrets integrations without string matching class names. +Custom `ConnectorBase` subclasses can set `CONNECTOR_CATEGORY` and +`CONNECTOR_CAPABILITIES` to publish the same metadata through entry-point +registration. Secret-like cache key fields such as `token`, `api_key`, `password`, and `client_secret` are hashed before they are stored in the fabric cache. `AWSConnector` and `GoogleConnector` are unified first-class classes: S3, diff --git a/docs/package-surface.md b/docs/package-surface.md index c923e6c..6aed078 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -311,6 +311,9 @@ domain or supported operation without parsing class names. `ConnectorFabric` hashes secret-like cache-key fields such as `token`, `api_key`, `password`, and `client_secret` before storing cache entries, so cache inspection and debug output do not expose raw credential material. +Custom `ConnectorBase` subclasses can set `CONNECTOR_CATEGORY` and +`CONNECTOR_CAPABILITIES` so entry-point connectors participate in the same +catalog query surface. Connectors that inherit `ConnectorBase` can keep raw transport access with `request()` or use `request_data()`, `get_data()`, `post_data()`, and the other diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index b36ad6e..a191cd2 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -93,6 +93,8 @@ class ConnectorBase(InputProvider, ABC): Class Attributes: BASE_URL: API base URL (required for HTTP connectors) API_KEY_ENV: Environment variable name for API key + CONNECTOR_CATEGORY: Catalog category for registry metadata + CONNECTOR_CAPABILITIES: Catalog capabilities for registry metadata TIMEOUT: HTTP timeout in seconds (default 300) MIN_REQUEST_INTERVAL: Minimum seconds between requests (rate limiting) MAX_RETRIES: Maximum retry attempts (default 5) @@ -106,6 +108,8 @@ class ConnectorBase(InputProvider, ABC): # Class-level configuration - override in subclasses BASE_URL: ClassVar[str] = "" API_KEY_ENV: ClassVar[str] = "" + CONNECTOR_CATEGORY: ClassVar[str] = "external" + CONNECTOR_CAPABILITIES: ClassVar[tuple[str, ...]] = () TIMEOUT: ClassVar[float] = 300.0 MIN_REQUEST_INTERVAL: ClassVar[float] = 0.0 # No rate limit by default MAX_RETRIES: ClassVar[int] = 5 diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index 2af1edf..a221980 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -364,11 +364,9 @@ def _get_category(cls: builtins.type[ConnectorBase], spec: BuiltinConnectorSpec def _get_capabilities(cls: builtins.type[ConnectorBase], spec: BuiltinConnectorSpec | None) -> tuple[str, ...]: """Get normalized capability metadata for a connector.""" raw_capabilities = spec.capabilities if spec else getattr(cls, "CONNECTOR_CAPABILITIES", ()) - capabilities = ( - _normalize_catalog_token(capability) - for capability in raw_capabilities - if _normalize_catalog_token(capability) - ) + capability_values = (raw_capabilities,) if isinstance(raw_capabilities, str) else raw_capabilities + capabilities = [_normalize_catalog_token(capability) for capability in capability_values] + capabilities = [capability for capability in capabilities if capability] return tuple(dict.fromkeys(capabilities)) diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index d196954..7a92cec 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -7,6 +7,7 @@ import pytest from extended_data.connectors import registry +from extended_data.connectors.base import ConnectorBase from extended_data.connectors.connectors import ConnectorFabric from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString @@ -210,6 +211,30 @@ def test_connector_fabric_exposes_catalog_info(self): assert "cursor" in connector_names assert ("github" in connector_names) is github_info["available"] + def test_external_connector_metadata_uses_base_class_catalog_contract(self, monkeypatch): + """Entry-point connectors can publish category and capability metadata.""" + + class CustomConnector(ConnectorBase): + CONNECTOR_CATEGORY = "Data_Warehouse" + CONNECTOR_CAPABILITIES = ("SQL", "Files", "sql") + + monkeypatch.setattr(registry, "_connector_cache", {"custom": CustomConnector}) + monkeypatch.setattr(registry, "_missing_builtin_connectors", {}) + + info = registry.get_connector_info("custom") + categories = registry.list_connector_categories() + capabilities = registry.list_connector_capabilities() + warehouse_connectors = registry.list_connectors_by_category("data_warehouse") + sql_connectors = registry.list_connectors_by_capability("sql") + + assert info["source"] == "entry_point" + assert info["category"] == "data-warehouse" + assert info["capabilities"] == ["sql", "files"] + assert "data-warehouse" in categories + assert "sql" in capabilities + assert warehouse_connectors[0]["name"] == "custom" + assert sql_connectors[0]["name"] == "custom" + @requires_boto3 @patch("extended_data.connectors.aws.AWSConnector") def test_get_aws_connector(self, mock_aws): From 485c752cef98b3fcb7a42e00016d536c89e46eda Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 22:51:27 -0500 Subject: [PATCH 260/287] feat: filter connector catalog in CLI --- README.md | 2 ++ docs/package-surface.md | 2 ++ src/extended_data/connectors/cli.py | 44 +++++++++++++++++++++++--- tests/connectors/test_cli.py | 49 +++++++++++++++++++++++++++-- 4 files changed, 91 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index eca558d..0934735 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,8 @@ The same catalog is available from the CLI: ```bash extended-data list +extended-data list --category cloud +extended-data list --capability repositories --json extended-data info github --json extended-data methods github --json ``` diff --git a/docs/package-surface.md b/docs/package-surface.md index 6aed078..4c64c2a 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -404,6 +404,8 @@ The installed CLI exposes the same discovery layer for shell automation: ```bash extended-data list --json +extended-data list --category cloud +extended-data list --capability repositories --json extended-data info github --json extended-data methods github --json ``` diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index 6bdfb90..8864b61 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -6,6 +6,8 @@ Usage: # List available connectors extended-data list + extended-data list --category cloud + extended-data list --capability repositories # Call any connector data method extended-data call [--arg value ...] @@ -27,6 +29,8 @@ get_connector_class, get_connector_info, list_connector_info, + list_connectors_by_capability, + list_connectors_by_category, ) from extended_data.connectors.surface import connector_data_methods, is_connector_data_method from extended_data.containers import ExtendedList @@ -93,6 +97,31 @@ def _write_stderr(message: str) -> None: sys.stderr.write(f"{redact_sensitive_text(message)}\n") +def _filter_connector_info(args: argparse.Namespace) -> ExtendedList[Any]: + """Return connector catalog entries filtered by CLI flags.""" + include_unavailable = not getattr(args, "available_only", False) + info = list_connector_info(include_unavailable=include_unavailable) + names: set[str] | None = None + + if category := getattr(args, "category", None): + names = { + str(connector["name"]) + for connector in list_connectors_by_category(category, include_unavailable=include_unavailable) + } + + if capability := getattr(args, "capability", None): + capability_names = { + str(connector["name"]) + for connector in list_connectors_by_capability(capability, include_unavailable=include_unavailable) + } + names = capability_names if names is None else names & capability_names + + if names is None: + return info + + return ExtendedList(connector for connector in info if str(connector["name"]) in names) + + # ============================================================================= # Commands # ============================================================================= @@ -100,20 +129,21 @@ def _write_stderr(message: str) -> None: def cmd_list(args: argparse.Namespace) -> int: """List connector catalog entries.""" - info = list_connector_info(include_unavailable=not getattr(args, "available_only", False)) + info = _filter_connector_info(args) if args.json: _write_stdout(_json_output(info)) return 0 - _write_stdout(f"{'name':<18} {'status':<11} {'extra':<10} {'class':<28} install") + _write_stdout(f"{'name':<18} {'status':<11} {'category':<16} {'capabilities':<34} {'extra':<10} install") for c in info: status = "available" if c["available"] else "missing" name = str(c["name"]) + category = str(c.get("category") or "-") + capabilities = _format_list(c.get("capabilities")) extra = str(c.get("extra") or "-") - class_name = str(c.get("class") or "-") install = str(c.get("install") or "-") - _write_stdout(f"{name:<18} {status:<11} {extra:<10} {class_name:<28} {install}") + _write_stdout(f"{name:<18} {status:<11} {category:<16} {capabilities:<34} {extra:<10} {install}") return 0 @@ -220,6 +250,8 @@ def cmd_info(args: argparse.Namespace) -> int: "name", "available", "source", + "category", + "capabilities", "extra", "install", "requirements", @@ -253,6 +285,8 @@ def main() -> int: epilog=""" Examples: extended-data list # List all connectors + extended-data list --category cloud # List cloud connectors + extended-data list --capability files # List connectors by capability extended-data methods jules # List Jules data methods extended-data call jules list_sources # Call a method extended-data call cursor list_agents @@ -265,6 +299,8 @@ def main() -> int: list_parser = subparsers.add_parser("list", help="List available connectors") list_parser.add_argument("--json", action="store_true", help="JSON output") list_parser.add_argument("--available-only", action="store_true", help="Hide connectors with missing extras") + list_parser.add_argument("--category", help="Filter by catalog category") + list_parser.add_argument("--capability", help="Filter by catalog capability") list_parser.set_defaults(func=cmd_list) # Methods command diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index f59c587..483fe03 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -36,7 +36,7 @@ def secrets(self) -> ExtendedDict: def test_cli_list() -> None: """Test the list command.""" - args = argparse.Namespace(json=False, available_only=False) + args = argparse.Namespace(json=False, available_only=False, category=None, capability=None) with patch("sys.stdout.write") as mock_write: exit_code = cmd_list(args) assert exit_code == 0 @@ -45,11 +45,14 @@ def test_cli_list() -> None: output = "".join(call.args[0] for call in mock_write.call_args_list if call.args) assert "aws" in output assert "google" in output + assert "category" in output + assert "capabilities" in output + assert "cloud" in output def test_cli_list_json() -> None: """List command can emit machine-readable connector metadata.""" - args = argparse.Namespace(json=True, available_only=False) + args = argparse.Namespace(json=True, available_only=False, category=None, capability=None) with patch("sys.stdout.write") as mock_write: exit_code = cmd_list(args) @@ -57,9 +60,49 @@ def test_cli_list_json() -> None: output = mock_write.call_args.args[0] assert '"name": "github"' in output assert '"available":' in output + assert '"category": "development"' in output + assert '"capabilities":' in output assert "api_key_env" not in output +def test_cli_list_filters_by_category() -> None: + """List command can filter the connector catalog by category.""" + args = argparse.Namespace(json=False, available_only=False, category="cloud", capability=None) + with patch("sys.stdout.write") as mock_write: + exit_code = cmd_list(args) + + assert exit_code == 0 + output = "".join(call.args[0] for call in mock_write.call_args_list if call.args) + assert "aws" in output + assert "google" in output + assert "github" not in output + + +def test_cli_list_filters_by_capability_json() -> None: + """List command can emit capability-filtered connector metadata.""" + args = argparse.Namespace(json=True, available_only=False, category=None, capability="repositories") + with patch("sys.stdout.write") as mock_write: + exit_code = cmd_list(args) + + assert exit_code == 0 + entries = json.loads(mock_write.call_args.args[0]) + names = {entry["name"] for entry in entries} + assert "github" in names + assert "cursor" in names + assert "aws" not in names + + +def test_cli_list_intersects_category_and_capability_filters() -> None: + """Category and capability filters should narrow the same catalog result.""" + args = argparse.Namespace(json=True, available_only=False, category="ai", capability="repositories") + with patch("sys.stdout.write") as mock_write: + exit_code = cmd_list(args) + + assert exit_code == 0 + entries = json.loads(mock_write.call_args.args[0]) + assert [entry["name"] for entry in entries] == ["cursor"] + + def test_cli_info() -> None: """Info command prints connector metadata.""" args = argparse.Namespace(connector=" github ", json=False) @@ -69,6 +112,8 @@ def test_cli_info() -> None: assert exit_code == 0 output = "".join(call.args[0] for call in mock_write.call_args_list if call.args) assert "name: github" in output + assert "category: development" in output + assert "capabilities: repositories, teams, files, graphql, workflows" in output assert "install: pip install extended-data[github]" in output From 0804983d4b223405381bd6c95e193395cab97ba6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:00:07 -0500 Subject: [PATCH 261/287] feat: expose connector catalog over MCP --- README.md | 5 ++ docs/package-surface.md | 6 ++ src/extended_data/connectors/mcp.py | 95 ++++++++++++++++++++++++++++- tests/connectors/test_mcp.py | 53 ++++++++++++++++ 4 files changed, 158 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0934735..3c55828 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,11 @@ payload contract; framework factory functions still return framework tool objects. The generic CLI `call` command and MCP bridge expose only methods that advertise Extended Data payload returns. +The MCP bridge also exposes credential-free catalog tools such as +`extended_data_list_connector_info`, +`extended_data_list_connectors_by_category`, and +`extended_data_list_connectors_by_capability` so MCP clients can discover +usable integrations before invoking connector methods. CLI `--arg` values that look like JSON are decoded through the same structured data boundary used by files, inputs, and connector payloads before method dispatch. diff --git a/docs/package-surface.md b/docs/package-surface.md index 4c64c2a..4e19d91 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -337,6 +337,12 @@ their payload returns as `ExtendedDict` or `ExtendedList[ExtendedDict]`. The generic CLI `call` command and MCP bridge expose only connector methods that advertise Extended Data payload returns, so raw SDK client factories and low-level HTTP helpers do not leak into serialized tool catalogs. +The MCP bridge also publishes credential-free catalog tools: +`extended_data_list_connectors`, `extended_data_list_connector_info`, +`extended_data_get_connector_info`, `extended_data_list_connector_categories`, +`extended_data_list_connector_capabilities`, +`extended_data_list_connectors_by_category`, and +`extended_data_list_connectors_by_capability`. CLI `--arg` values that look like JSON are decoded through the shared structured data boundary before method dispatch, matching file, input, and connector payload decoding. diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 91d7779..26d1f3d 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -27,7 +27,17 @@ from collections.abc import Callable, Iterable, Mapping from typing import Any, cast -from extended_data.connectors.registry import _list_connector_classes, get_connector +from extended_data.connectors.registry import ( + _list_connector_classes, + get_connector, + get_connector_info, + list_connector_capabilities, + list_connector_categories, + list_connector_info, + list_connectors, + list_connectors_by_capability, + list_connectors_by_category, +) from extended_data.connectors.surface import connector_data_methods from extended_data.containers import to_builtin from extended_data.io import wrap_raw_data_for_export @@ -98,6 +108,78 @@ def _get_public_methods(connector_class: builtins.type[Any]) -> list[tuple[str, return connector_data_methods(connector_class) +def _catalog_tool_definitions() -> dict[str, dict[str, Any]]: + """Build credential-free connector catalog MCP tools.""" + empty_schema: dict[str, Any] = {"type": "object", "properties": {}, "required": []} + include_unavailable_schema: dict[str, Any] = { + "type": "object", + "properties": {"include_unavailable": {"type": "boolean", "default": True}}, + "required": [], + } + name_schema: dict[str, Any] = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "include_unavailable": {"type": "boolean", "default": True}, + }, + "required": ["name"], + } + category_schema: dict[str, Any] = { + "type": "object", + "properties": { + "category": {"type": "string"}, + "include_unavailable": {"type": "boolean", "default": True}, + }, + "required": ["category"], + } + capability_schema: dict[str, Any] = { + "type": "object", + "properties": { + "capability": {"type": "string"}, + "include_unavailable": {"type": "boolean", "default": True}, + }, + "required": ["capability"], + } + + return { + "extended_data_list_connectors": { + "description": "List available Extended Data connector names.", + "parameters": empty_schema, + "handler": list_connectors, + }, + "extended_data_list_connector_info": { + "description": "List Extended Data connector catalog metadata.", + "parameters": include_unavailable_schema, + "handler": list_connector_info, + }, + "extended_data_get_connector_info": { + "description": "Get Extended Data catalog metadata for one connector.", + "parameters": name_schema, + "handler": get_connector_info, + }, + "extended_data_list_connector_categories": { + "description": "List Extended Data connector catalog categories.", + "parameters": include_unavailable_schema, + "handler": list_connector_categories, + }, + "extended_data_list_connector_capabilities": { + "description": "List Extended Data connector catalog capabilities.", + "parameters": include_unavailable_schema, + "handler": list_connector_capabilities, + }, + "extended_data_list_connectors_by_category": { + "description": "List Extended Data connector catalog entries for a category.", + "parameters": category_schema, + "handler": list_connectors_by_category, + }, + "extended_data_list_connectors_by_capability": { + "description": "List Extended Data connector catalog entries for a capability.", + "parameters": capability_schema, + "handler": list_connectors_by_capability, + }, + } + + def _jsonable_tool_result(result: Any) -> Any: """Lower connector tool results to JSON-compatible Python data.""" if hasattr(result, "model_dump"): @@ -138,6 +220,7 @@ def create_server() -> Any: # Build tool registry from all connectors tools: dict[str, dict[str, Any]] = {} + tools.update(_catalog_tool_definitions()) # Discover all connectors connectors = _list_connector_classes() @@ -189,6 +272,16 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: return [TextContent(type="text", text=_unknown_tool_text(name))] tool = tools[name] + handler = tool.get("handler") + if callable(handler): + try: + result = handler(**arguments) + if inspect.iscoroutine(result): + result = await result + return [TextContent(type="text", text=_tool_result_text(result))] + except Exception as e: + return [TextContent(type="text", text=_tool_error_text(e, arguments.values()))] + connector_name = tool["connector"] method_name = tool["method"] diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index c870b27..b7a85ec 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -8,6 +8,7 @@ from extended_data.connectors import mcp as mcp_module from extended_data.connectors.mcp import ( + _catalog_tool_definitions, _get_public_methods, _jsonable_tool_result, _tool_error_text, @@ -42,6 +43,58 @@ def test_mcp_public_methods_only_include_extended_payload_boundaries() -> None: assert "replace_inputs" not in method_names +def test_catalog_tools_expose_connector_discovery_without_credentials() -> None: + """Generic MCP should expose connector catalog queries as first-class tools.""" + tools = _catalog_tool_definitions() + + expected = { + "extended_data_list_connectors", + "extended_data_list_connector_info", + "extended_data_get_connector_info", + "extended_data_list_connector_categories", + "extended_data_list_connector_capabilities", + "extended_data_list_connectors_by_category", + "extended_data_list_connectors_by_capability", + } + + assert expected <= set(tools) + assert tools["extended_data_get_connector_info"]["parameters"]["required"] == ["name"] + assert tools["extended_data_list_connectors_by_category"]["parameters"]["required"] == ["category"] + assert tools["extended_data_list_connectors_by_capability"]["parameters"]["required"] == ["capability"] + + +def test_catalog_tool_handlers_return_tier2_catalog_payloads() -> None: + """Catalog MCP handlers should reuse the registry's Tier 2 payload surface.""" + tools = _catalog_tool_definitions() + + names = tools["extended_data_list_connectors"]["handler"]() + github = tools["extended_data_get_connector_info"]["handler"](name="github") + categories = tools["extended_data_list_connector_categories"]["handler"]() + repositories = tools["extended_data_list_connectors_by_capability"]["handler"](capability="repositories") + + assert isinstance(names, ExtendedList) + assert "github" in names + assert isinstance(github, ExtendedDict) + assert github["category"] == "development" + assert "repositories" in github["capabilities"] + assert isinstance(categories, ExtendedList) + assert "cloud" in categories + assert isinstance(repositories, ExtendedList) + assert "github" in {connector["name"] for connector in repositories} + + +def test_catalog_tool_result_text_uses_shared_export_boundary() -> None: + """Catalog MCP tool output should serialize like connector method output.""" + tools = _catalog_tool_definitions() + payload = tools["extended_data_get_connector_info"]["handler"](name="github") + + text = _tool_result_text(payload) + + assert '"name": "github"' in text + assert '"category": "development"' in text + assert '"capabilities": [' in text + + def test_jsonable_tool_result_lowers_extended_mapping_payloads() -> None: """MCP result serialization keeps Tier 2 mapping payloads as JSON objects.""" payload = ExtendedDict({"service": {"name": "api"}}) From 650a3400836ca05667977968e937aa1a54d46eba Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:03:35 -0500 Subject: [PATCH 262/287] test: avoid optional connector availability assumption --- tests/connectors/test_mcp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index b7a85ec..6860116 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -73,7 +73,7 @@ def test_catalog_tool_handlers_return_tier2_catalog_payloads() -> None: repositories = tools["extended_data_list_connectors_by_capability"]["handler"](capability="repositories") assert isinstance(names, ExtendedList) - assert "github" in names + assert "cursor" in names assert isinstance(github, ExtendedDict) assert github["category"] == "development" assert "repositories" in github["capabilities"] From 2938b053d82915fd96e81c18358b28892d9a642c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:10:35 -0500 Subject: [PATCH 263/287] feat: split connector catalog and availability queries --- README.md | 20 ++++++++++----- docs/package-surface.md | 30 ++++++++++++---------- src/extended_data/__init__.py | 3 +++ src/extended_data/connectors/__init__.py | 2 ++ src/extended_data/connectors/cli.py | 6 ++--- src/extended_data/connectors/connectors.py | 9 ++++++- src/extended_data/connectors/mcp.py | 12 ++++++--- src/extended_data/connectors/registry.py | 24 +++++++++-------- tests/connectors/test_connectors.py | 21 ++++++++++----- tests/connectors/test_mcp.py | 7 ++++- tests/core/test_package_surface.py | 7 +++++ 11 files changed, 96 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 3c55828..8d2ab3a 100644 --- a/README.md +++ b/README.md @@ -78,19 +78,24 @@ Connector names are normalized before lookup. If a known built-in connector is requested without its optional extra installed, the registry raises an `ImportError` with the matching `extended-data[...]` install target. -Inspect connector availability before wiring external data workflows: +Inspect the connector catalog and runtime availability before wiring external +data workflows: ```python names = connectors.list_connectors() +available = connectors.list_available_connectors() catalog = connectors.list_connector_info() github_info = connectors.get_connector_info("github") cloud_connectors = connectors.list_connectors_by_category("cloud") repository_connectors = connectors.list_connectors_by_capability("repositories") ``` -`list_connectors()` returns an `ExtendedList` of available connector names. -Use `list_connector_info()` when a workflow needs availability, category, -capability, extra, install, class, module, and description metadata. Use +`list_connectors()` returns an `ExtendedList` of catalog connector names, +including known built-ins whose optional SDK extras are not installed yet. Use +`list_available_connectors()` when a workflow needs only connectors runnable in +the current environment. Use `list_connector_info()` when a workflow needs +availability, category, capability, extra, install, class, module, and +description metadata. Use `list_connector_categories()`, `list_connector_capabilities()`, `list_connectors_by_category()`, and `list_connectors_by_capability()` when a workflow needs to select integrations by data domain instead of hard-coding a @@ -138,9 +143,10 @@ nested or sorted default mappings instead of importing the internal helper class Connectors are first-class adapters in the data fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with -generic connector lookup. `list_connectors()` returns registered connectors whose -runtime requirements are installed; use `list_connector_info()` for the full -catalog, including known connectors that need an `extended-data[...]` extra. +generic connector lookup. `list_connectors()` returns the full connector +catalog, including known connectors that need an `extended-data[...]` extra; use +`list_available_connectors()` for registered connectors whose runtime +requirements are installed. Catalog entries include normalized categories and capabilities so workflows can select cloud, AI, communications, development, media, or secrets integrations without string matching class names. diff --git a/docs/package-surface.md b/docs/package-surface.md index 4e19d91..afad0f6 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -301,10 +301,12 @@ Every built-in connector class registered by name is also exported from not `None` sentinels. Optional SDKs load when connector instances need them, so package import remains lightweight while missing optional extras still fail at the operation boundary with install guidance. `list_connectors()` reports the -registered connectors whose runtime requirements are installed; use -`list_connector_info()` when tooling needs the complete catalog plus missing -dependency and install guidance. Catalog entries include normalized categories -and capabilities; `list_connector_categories()`, +complete connector catalog, including known connectors whose optional SDK extras +are not installed; use `list_available_connectors()` for only connectors whose +runtime requirements are installed. Use `list_connector_info()` when tooling +needs the complete catalog plus missing dependency and install guidance. +Catalog entries include normalized categories and capabilities; +`list_connector_categories()`, `list_connector_capabilities()`, `list_connectors_by_category()`, and `list_connectors_by_capability()` let workflows select integrations by data domain or supported operation without parsing class names. `ConnectorFabric` @@ -338,9 +340,9 @@ The generic CLI `call` command and MCP bridge expose only connector methods that advertise Extended Data payload returns, so raw SDK client factories and low-level HTTP helpers do not leak into serialized tool catalogs. The MCP bridge also publishes credential-free catalog tools: -`extended_data_list_connectors`, `extended_data_list_connector_info`, -`extended_data_get_connector_info`, `extended_data_list_connector_categories`, -`extended_data_list_connector_capabilities`, +`extended_data_list_connectors`, `extended_data_list_available_connectors`, +`extended_data_list_connector_info`, `extended_data_get_connector_info`, +`extended_data_list_connector_categories`, `extended_data_list_connector_capabilities`, `extended_data_list_connectors_by_category`, and `extended_data_list_connectors_by_capability`. CLI `--arg` values that look like JSON are decoded through the shared @@ -391,21 +393,23 @@ result = SecretsConnector().run_pipeline( ) ``` -Use the catalog helpers when a workflow needs to inspect which integrations can -run in the current environment: +Use the catalog helpers when a workflow needs to inspect known integrations and +which ones can run in the current environment: ```python names = fabric.list_connectors() +available = fabric.list_available_connectors() catalog = fabric.list_connector_info() github_info = fabric.get_connector_info("github") cloud_connectors = fabric.list_connectors_by_category("cloud") repository_connectors = fabric.list_connectors_by_capability("repositories") ``` -`list_connectors()` returns an `ExtendedList` of available connector names. -Each catalog entry includes availability, source, category, capabilities, extra -name, install command, required packages, missing packages, module, class, and -description fields. +`list_connectors()` returns an `ExtendedList` of catalog connector names. +`list_available_connectors()` returns the subset runnable in the current +environment. Each catalog entry includes availability, source, category, +capabilities, extra name, install command, required packages, missing packages, +module, class, and description fields. The installed CLI exposes the same discovery layer for shell automation: ```bash diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 3b68012..1a669c1 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -67,6 +67,7 @@ get_connector, get_connector_class, get_connector_info, + list_available_connectors, list_connector_capabilities, list_connector_categories, list_connector_info, @@ -107,6 +108,7 @@ "get_connector_class": ("extended_data.connectors", "get_connector_class"), "get_connector_info": ("extended_data.connectors", "get_connector_info"), "input_config": ("extended_data.inputs", "input_config"), + "list_available_connectors": ("extended_data.connectors", "list_available_connectors"), "list_connector_capabilities": ("extended_data.connectors", "list_connector_capabilities"), "list_connector_categories": ("extended_data.connectors", "list_connector_categories"), "list_connector_info": ("extended_data.connectors", "list_connector_info"), @@ -182,6 +184,7 @@ def __getattr__(name: str) -> Any: "get_tld", "input_config", "is_url", + "list_available_connectors", "list_connector_capabilities", "list_connector_categories", "list_connector_info", diff --git a/src/extended_data/connectors/__init__.py b/src/extended_data/connectors/__init__.py index 302cb9d..87331c2 100644 --- a/src/extended_data/connectors/__init__.py +++ b/src/extended_data/connectors/__init__.py @@ -113,6 +113,7 @@ class MyConnector(AWSConnector): "get_connector_class", "get_connector_info", "get_google_call_params", + "list_available_connectors", "list_connector_capabilities", "list_connector_categories", "list_connector_info", @@ -128,6 +129,7 @@ class MyConnector(AWSConnector): get_connector, get_connector_class, get_connector_info, + list_available_connectors, list_connector_capabilities, list_connector_categories, list_connector_info, diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index 8864b61..4fe91cc 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -4,7 +4,7 @@ using the central registry for discovery. Usage: - # List available connectors + # List connector catalog entries extended-data list extended-data list --category cloud extended-data list --capability repositories @@ -284,7 +284,7 @@ def main() -> int: formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - extended-data list # List all connectors + extended-data list # List connector catalog entries extended-data list --category cloud # List cloud connectors extended-data list --capability files # List connectors by capability extended-data methods jules # List Jules data methods @@ -296,7 +296,7 @@ def main() -> int: subparsers = parser.add_subparsers(dest="command", help="Commands") # List command - list_parser = subparsers.add_parser("list", help="List available connectors") + list_parser = subparsers.add_parser("list", help="List connector catalog entries") list_parser.add_argument("--json", action="store_true", help="JSON output") list_parser.add_argument("--available-only", action="store_true", help="Hide connectors with missing extras") list_parser.add_argument("--category", help="Filter by catalog category") diff --git a/src/extended_data/connectors/connectors.py b/src/extended_data/connectors/connectors.py index e8671f8..6bfc6a1 100644 --- a/src/extended_data/connectors/connectors.py +++ b/src/extended_data/connectors/connectors.py @@ -14,6 +14,9 @@ from extended_data.connectors.registry import ( get_connector_info as get_registered_connector_info, ) +from extended_data.connectors.registry import ( + list_available_connectors as list_registered_available_connectors, +) from extended_data.connectors.registry import ( list_connector_capabilities as list_registered_connector_capabilities, ) @@ -131,9 +134,13 @@ def _set_cached_client(self, client_type: str, client: Any, **kwargs: Any) -> No self._client_cache[client_type][cache_key] = client def list_connectors(self) -> ExtendedList[ExtendedString]: - """List connector names available in the current environment.""" + """List connector catalog names.""" return list_registered_connectors() + def list_available_connectors(self) -> ExtendedList[ExtendedString]: + """List connector names available in the current environment.""" + return list_registered_available_connectors() + def list_connector_info(self, *, include_unavailable: bool = True) -> ExtendedList[ExtendedDict]: """List connector catalog metadata.""" return list_registered_connector_info(include_unavailable=include_unavailable) diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index 26d1f3d..a10e739 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -31,6 +31,7 @@ _list_connector_classes, get_connector, get_connector_info, + list_available_connectors, list_connector_capabilities, list_connector_categories, list_connector_info, @@ -110,12 +111,12 @@ def _get_public_methods(connector_class: builtins.type[Any]) -> list[tuple[str, def _catalog_tool_definitions() -> dict[str, dict[str, Any]]: """Build credential-free connector catalog MCP tools.""" - empty_schema: dict[str, Any] = {"type": "object", "properties": {}, "required": []} include_unavailable_schema: dict[str, Any] = { "type": "object", "properties": {"include_unavailable": {"type": "boolean", "default": True}}, "required": [], } + empty_schema: dict[str, Any] = {"type": "object", "properties": {}, "required": []} name_schema: dict[str, Any] = { "type": "object", "properties": { @@ -143,10 +144,15 @@ def _catalog_tool_definitions() -> dict[str, dict[str, Any]]: return { "extended_data_list_connectors": { - "description": "List available Extended Data connector names.", - "parameters": empty_schema, + "description": "List Extended Data connector catalog names.", + "parameters": include_unavailable_schema, "handler": list_connectors, }, + "extended_data_list_available_connectors": { + "description": "List Extended Data connectors available in the current environment.", + "parameters": empty_schema, + "handler": list_available_connectors, + }, "extended_data_list_connector_info": { "description": "List Extended Data connector catalog metadata.", "parameters": include_unavailable_schema, diff --git a/src/extended_data/connectors/registry.py b/src/extended_data/connectors/registry.py index a221980..143919b 100644 --- a/src/extended_data/connectors/registry.py +++ b/src/extended_data/connectors/registry.py @@ -9,10 +9,11 @@ 4. Same registry used by both MCP and CLI Usage: - from extended_data.connectors.registry import get_connector, list_connectors + from extended_data.connectors.registry import get_connector, list_available_connectors, list_connectors - # List available connectors - available = list_connectors() + # List catalog connectors or only runtime-ready connectors + catalog = list_connectors() + available = list_available_connectors() # ExtendedList(["anthropic", "aws", "cursor", ...]) # Get a specific connector instance @@ -269,21 +270,22 @@ def _list_connector_classes() -> dict[str, builtins.type[ConnectorBase]]: return _discover_connectors().copy() -def list_connectors() -> ExtendedList[ExtendedString]: - """List registered connector names whose runtime requirements are installed. +def list_connectors(*, include_unavailable: bool = True) -> ExtendedList[ExtendedString]: + """List connector catalog names. Returns: - ExtendedList of usable connector registry names. + ExtendedList of known connector registry names. """ return extend_data( - sorted( - name - for name in _discover_connectors() - if not get_missing_connector_requirements(name) - ), + [str(connector["name"]) for connector in list_connector_info(include_unavailable=include_unavailable)], ) +def list_available_connectors() -> ExtendedList[ExtendedString]: + """List connector names whose runtime requirements are installed.""" + return list_connectors(include_unavailable=False) + + def get_connector_class(name: str) -> builtins.type[ConnectorBase]: """Get a connector class by name. diff --git a/tests/connectors/test_connectors.py b/tests/connectors/test_connectors.py index 7a92cec..94134ae 100644 --- a/tests/connectors/test_connectors.py +++ b/tests/connectors/test_connectors.py @@ -188,6 +188,7 @@ def test_connector_fabric_exposes_catalog_info(self): cloud_connectors = vc.list_connectors_by_category("cloud") repository_connectors = vc.list_connectors_by_capability("repositories") connector_names = vc.list_connectors() + available_connector_names = vc.list_available_connectors() assert isinstance(github_info, ExtendedDict) assert github_info["name"] == "github" assert github_info["category"] == "development" @@ -209,7 +210,11 @@ def test_connector_fabric_exposes_catalog_info(self): assert isinstance(connector_names, ExtendedList) assert isinstance(connector_names[0], ExtendedString) assert "cursor" in connector_names - assert ("github" in connector_names) is github_info["available"] + assert "github" in connector_names + assert isinstance(available_connector_names, ExtendedList) + assert "cursor" in available_connector_names + assert set(available_connector_names) <= set(connector_names) + assert ("github" in available_connector_names) is github_info["available"] def test_external_connector_metadata_uses_base_class_catalog_contract(self, monkeypatch): """Entry-point connectors can publish category and capability metadata.""" @@ -537,8 +542,8 @@ def test_available_only_catalog_filters_missing_builtins(self): assert isinstance(info, ExtendedList) assert all(connector["available"] for connector in info) - def test_list_connectors_filters_registered_connectors_with_missing_requirements(self, monkeypatch): - """Connector name lists only include registered connectors that can be used.""" + def test_list_connectors_reports_catalog_names_and_available_names_explicitly(self, monkeypatch): + """Connector catalog names and runtime-available names are separate APIs.""" class CursorConnector: pass @@ -561,7 +566,11 @@ class GitHubConnector: lambda name: ExtendedList(["github"]) if name == "github" else ExtendedList(), ) - names = registry.list_connectors() + catalog_names = registry.list_connectors() + available_names = registry.list_available_connectors() - assert isinstance(names, ExtendedList) - assert names == ["cursor"] + assert isinstance(catalog_names, ExtendedList) + assert "cursor" in catalog_names + assert "github" in catalog_names + assert isinstance(available_names, ExtendedList) + assert available_names == ["cursor"] diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index 6860116..ba26b87 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -49,6 +49,7 @@ def test_catalog_tools_expose_connector_discovery_without_credentials() -> None: expected = { "extended_data_list_connectors", + "extended_data_list_available_connectors", "extended_data_list_connector_info", "extended_data_get_connector_info", "extended_data_list_connector_categories", @@ -68,12 +69,16 @@ def test_catalog_tool_handlers_return_tier2_catalog_payloads() -> None: tools = _catalog_tool_definitions() names = tools["extended_data_list_connectors"]["handler"]() + available_names = tools["extended_data_list_available_connectors"]["handler"]() github = tools["extended_data_get_connector_info"]["handler"](name="github") categories = tools["extended_data_list_connector_categories"]["handler"]() repositories = tools["extended_data_list_connectors_by_capability"]["handler"](capability="repositories") assert isinstance(names, ExtendedList) - assert "cursor" in names + assert "github" in names + assert isinstance(available_names, ExtendedList) + assert "cursor" in available_names + assert set(available_names) <= set(names) assert isinstance(github, ExtendedDict) assert github["category"] == "development" assert "repositories" in github["capabilities"] diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 5d1ff86..63fe1b9 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -164,18 +164,23 @@ def test_root_exports_first_class_integrated_surfaces() -> None: assert callable(extended_data.directed_inputs) assert callable(extended_data.read_data_file) assert callable(extended_data.get_connector) + assert callable(extended_data.list_available_connectors) assert callable(extended_data.list_connector_info) assert callable(extended_data.list_connector_categories) assert callable(extended_data.list_connector_capabilities) assert callable(extended_data.list_connectors_by_category) assert callable(extended_data.list_connectors_by_capability) connector_names = extended_data.list_connectors() + available_connector_names = extended_data.list_available_connectors() connector_categories = extended_data.list_connector_categories() connector_capabilities = extended_data.list_connector_capabilities() cloud_connectors = extended_data.list_connectors_by_category("cloud") repository_connectors = extended_data.list_connectors_by_capability("repositories") assert isinstance(connector_names, ExtendedList) assert isinstance(connector_names[0], ExtendedString) + assert isinstance(available_connector_names, ExtendedList) + assert isinstance(available_connector_names[0], ExtendedString) + assert set(available_connector_names) <= set(connector_names) assert isinstance(connector_categories, ExtendedList) assert isinstance(connector_categories[0], ExtendedString) assert "cloud" in connector_categories @@ -189,9 +194,11 @@ def test_root_exports_first_class_integrated_surfaces() -> None: assert isinstance(repository_connectors[0], ExtendedDict) assert "github" in {connector["name"] for connector in repository_connectors} assert get_type_hints(connectors.list_connectors)["return"] == ExtendedList[ExtendedString] + assert get_type_hints(connectors.list_available_connectors)["return"] == ExtendedList[ExtendedString] assert get_type_hints(connectors.list_connector_categories)["return"] == ExtendedList[ExtendedString] assert get_type_hints(connectors.list_connector_capabilities)["return"] == ExtendedList[ExtendedString] assert get_type_hints(ConnectorFabric.list_connectors)["return"] == ExtendedList[ExtendedString] + assert get_type_hints(ConnectorFabric.list_available_connectors)["return"] == ExtendedList[ExtendedString] assert get_type_hints(ConnectorFabric.list_connector_categories)["return"] == ExtendedList[ExtendedString] assert get_type_hints(ConnectorFabric.list_connector_capabilities)["return"] == ExtendedList[ExtendedString] assert get_type_hints(ConnectorFabric.get_connector)["return"] is connectors.ConnectorBase From 55b0b2bb8e3dbd2dad84c98ffff40ec77e115756 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:14:19 -0500 Subject: [PATCH 264/287] docs: describe connectors as first-class integrations --- README.md | 4 ++-- docs/package-surface.md | 4 ++-- examples/connectors/README.md | 2 +- src/extended_data/__init__.py | 2 +- src/extended_data/connectors/meshy/README.md | 2 +- src/extended_data/connectors/secrets/__init__.py | 8 ++++---- tests/core/test_package_surface.py | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 8d2ab3a..d24247f 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ extended_data/ io/ Tier 3 file, import, export, and base64 processors inputs/ InputProvider and decorator-based input injection logging/ structured lifecycle logging - connectors/ Tier 3 ConnectorFabric and data adapters + connectors/ Tier 3 ConnectorFabric and data integrations secrets/ SecretSync CLI bridge and typed result exports workflows/ Tier 3 higher-order workflow composition ``` @@ -140,7 +140,7 @@ incorrect imports fail fast. Tier 1 public exports stay function-oriented; use `get_default_dict()` for nested or sorted default mappings instead of importing the internal helper class. -Connectors are first-class adapters in the data fabric. `ConnectorFabric` +Connectors are first-class data integrations in the fabric. `ConnectorFabric` uses the registry to resolve connectors by name, injects shared input/logging context, caches connector instances, and lets specialized helpers coexist with generic connector lookup. `list_connectors()` returns the full connector diff --git a/docs/package-surface.md b/docs/package-surface.md index afad0f6..c7b0d4b 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -2,7 +2,7 @@ `extended-data` is one Python distribution with a single `extended_data` namespace. The root package exposes first-class containers, Tier 3 processors, -and integrated adapters; pure Tier 1 utilities are imported from +and integrated connectors; pure Tier 1 utilities are imported from `extended_data.primitives`. The old `extended_data_types`, `lifecyclelogging`, `directed_inputs_class`, and `vendor_connectors` import namespaces are not @@ -375,7 +375,7 @@ payload = github.get_repository_file("service.json") assert payload["service"]["name"].upper_first() == "Api" ``` -The `secrets` adapter is the Python-facing bridge to the standalone SecretSync +The `secrets` connector is the Python-facing bridge to the standalone SecretSync project (`jbcom/secrets-sync`). It uses the `secretsync` CLI, which must emit the stable `secretsync pipeline --output json` result envelope for both dry-run and apply runs. The connector decodes that envelope through the shared file/data diff --git a/examples/connectors/README.md b/examples/connectors/README.md index 85ae1c3..17f1551 100644 --- a/examples/connectors/README.md +++ b/examples/connectors/README.md @@ -1,7 +1,7 @@ # Connector Examples This directory contains working examples for `extended_data.connectors` and the -registered adapters that hang off `ConnectorFabric`. +registered integrations that hang off `ConnectorFabric`. Connector examples assume the major-version `extended-data` contract: external data payloads are promoted into Tier 2 containers at connector boundaries. diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 1a669c1..7a53178 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -119,7 +119,7 @@ def __getattr__(name: str) -> Any: - """Lazily expose integrated adapters and processors at the package root.""" + """Lazily expose integrated connectors and processors at the package root.""" if name not in _LAZY_EXPORTS: raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/extended_data/connectors/meshy/README.md b/src/extended_data/connectors/meshy/README.md index 5038948..e18bf38 100644 --- a/src/extended_data/connectors/meshy/README.md +++ b/src/extended_data/connectors/meshy/README.md @@ -2,7 +2,7 @@ Meshy support is part of `extended-data` and lives under `extended_data.connectors.meshy`. It provides functional API helpers, a -`MeshyConnector` fabric adapter, job orchestration, webhook handling, AI tool +`MeshyConnector` fabric integration, job orchestration, webhook handling, AI tool adapters, and an MCP server. ## Install diff --git a/src/extended_data/connectors/secrets/__init__.py b/src/extended_data/connectors/secrets/__init__.py index 0c5579c..75ada2c 100644 --- a/src/extended_data/connectors/secrets/__init__.py +++ b/src/extended_data/connectors/secrets/__init__.py @@ -6,8 +6,8 @@ inheritance, versioning, and CI/CD integration. The connector executes the supported `secretsync` subprocess CLI contract. -Alternate runtime adapters should be added only after SecretSync publishes a -stable adapter contract. +Alternate runtime transports should be added only after SecretSync publishes a +stable runtime contract. Example usage: from extended_data.connectors.secrets import SecretsConnector @@ -153,8 +153,8 @@ class SecretsConnector(ConnectorBase): - Dry-run with visual diff output - CI/CD integration with exit codes - Alternate runtime adapters are intentionally not accepted here until - SecretSync publishes a stable adapter contract. + Alternate runtime transports are intentionally not accepted here until + SecretSync publishes a stable runtime contract. """ def __init__( diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index 63fe1b9..cc69120 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -88,7 +88,7 @@ def test_root_lazy_exports_do_not_describe_tier1_primitives() -> None: lazy_loader_docs = extended_data.__getattr__.__doc__ or "" assert "primitives" not in lazy_loader_docs - assert "adapters and processors" in lazy_loader_docs + assert "connectors and processors" in lazy_loader_docs def test_clean_major_version_public_names() -> None: From 6c697056b1d218e929be7f8d62c331ce2c1c5f50 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:21:17 -0500 Subject: [PATCH 265/287] feat: add package-level data CLI --- README.md | 5 +- docs/package-surface.md | 7 ++ pyproject.toml | 2 +- src/extended_data/cli.py | 106 ++++++++++++++++++++++++++++ src/extended_data/connectors/cli.py | 6 +- tests/core/test_package_cli.py | 67 ++++++++++++++++++ 6 files changed, 188 insertions(+), 5 deletions(-) create mode 100644 src/extended_data/cli.py create mode 100644 tests/core/test_package_cli.py diff --git a/README.md b/README.md index d24247f..fcd154d 100644 --- a/README.md +++ b/README.md @@ -101,9 +101,12 @@ description metadata. Use workflow needs to select integrations by data domain instead of hard-coding a single connector name. -The same catalog is available from the CLI: +The installed CLI exposes the package's Tier 3 data boundary plus the connector +catalog and call surface: ```bash +extended-data decode '{"service": {"name": "api"}}' --suffix json +extended-data decode --file config.yaml --output json extended-data list extended-data list --category cloud extended-data list --capability repositories --json diff --git a/docs/package-surface.md b/docs/package-surface.md index c7b0d4b..c13aa99 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -413,6 +413,8 @@ module, class, and description fields. The installed CLI exposes the same discovery layer for shell automation: ```bash +extended-data decode '{"service": {"name": "api"}}' --suffix json +extended-data decode --file config.yaml --output json extended-data list --json extended-data list --category cloud extended-data list --capability repositories --json @@ -420,6 +422,11 @@ extended-data info github --json extended-data methods github --json ``` +The `extended-data` console script is the package-level CLI. Data commands use +`DataFile` and the shared export boundary directly; connector commands are +delegated to the connector CLI so existing catalog, method, call, and MCP +workflows stay on the same entrypoint. + ## Optional Integrations Install only the external service or AI layers you need: diff --git a/pyproject.toml b/pyproject.toml index a775f8b..dd16739 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -153,7 +153,7 @@ all = [ ] [project.scripts] -extended-data = "extended_data.connectors.cli:main" +extended-data = "extended_data.cli:main" extended-data-mcp = "extended_data.connectors.mcp:main" meshy-mcp = "extended_data.connectors.meshy.mcp:main" diff --git a/src/extended_data/cli.py b/src/extended_data/cli.py new file mode 100644 index 0000000..781476e --- /dev/null +++ b/src/extended_data/cli.py @@ -0,0 +1,106 @@ +"""Top-level command line interface for Extended Data.""" + +from __future__ import annotations + +import argparse +import sys + +from collections.abc import Sequence +from typing import Any, cast + +from extended_data.io import DataFile +from extended_data.primitives.redaction import redact_sensitive_text + + +CONNECTOR_COMMANDS = frozenset({"call", "info", "list", "mcp", "methods"}) +OUTPUT_ENCODINGS = ("json", "yaml", "toml", "hcl", "raw") + + +def _write_stdout(message: str) -> None: + """Write one CLI output line.""" + sys.stdout.write(f"{message}\n") + + +def _write_stderr(message: str) -> None: + """Write one CLI error line.""" + sys.stderr.write(f"{redact_sensitive_text(message)}\n") + + +def _decode_artifact(args: argparse.Namespace) -> DataFile: + """Decode an inline payload or file path into a DataFile artifact.""" + value = getattr(args, "value", None) + file_path = getattr(args, "file_path", None) + + if value is not None and file_path is not None: + raise ValueError("pass either VALUE or --file, not both") + if value is None and file_path is None: + raise ValueError("pass VALUE or --file") + if file_path is not None: + return DataFile.read(file_path, suffix=args.suffix) + return DataFile.decode(cast(str, value), suffix=args.suffix) + + +def cmd_decode(args: argparse.Namespace) -> int: + """Decode structured data and write it through the shared export boundary.""" + try: + artifact = _decode_artifact(args) + format_opts: dict[str, Any] = {} + if args.output == "json" and not args.compact: + format_opts["indent_2"] = True + _write_stdout(artifact.wrap_for_export(allow_encoding=args.output, **format_opts)) + return 0 + except Exception as e: + _write_stderr(str(e)) + return 1 + + +def _build_parser() -> argparse.ArgumentParser: + """Build the top-level Extended Data argument parser.""" + parser = argparse.ArgumentParser( + prog="extended-data", + description="CLI for Extended Data primitives, files, workflows, and connectors", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + extended-data decode '{"service": {"name": "api"}}' --suffix json + extended-data decode --file config.yaml --output json + extended-data list --category cloud + extended-data call github get_repository_file --path service.json --json + """, + ) + subparsers = parser.add_subparsers(dest="command", help="Commands") + + decode_parser = subparsers.add_parser("decode", help="Decode inline data or a file") + decode_parser.add_argument("value", nargs="?", help="Inline payload to decode") + decode_parser.add_argument("--file", dest="file_path", help="File path or URL to decode") + decode_parser.add_argument("--suffix", help="Input format override") + decode_parser.add_argument("--output", choices=OUTPUT_ENCODINGS, default="json", help="Output encoding") + decode_parser.add_argument("--compact", action="store_true", help="Compact JSON output") + decode_parser.set_defaults(func=cmd_decode) + + return parser + + +def main(argv: Sequence[str] | None = None) -> int: + """Run the Extended Data CLI.""" + args = list(argv) if argv is not None else sys.argv[1:] + if args and args[0] in CONNECTOR_COMMANDS: + from extended_data.connectors.cli import main as connectors_main + + return connectors_main(args) + + parser = _build_parser() + parsed = parser.parse_args(args) + + if not parsed.command: + parser.print_help() + return 0 + + try: + return parsed.func(parsed) + except KeyboardInterrupt: + return 130 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index 4fe91cc..f087f1c 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -21,7 +21,7 @@ import argparse import sys -from collections.abc import Mapping +from collections.abc import Mapping, Sequence from typing import Any from extended_data.connectors.registry import ( @@ -276,7 +276,7 @@ def cmd_info(args: argparse.Namespace) -> int: # ============================================================================= -def main() -> int: +def main(argv: Sequence[str] | None = None) -> int: """Main CLI entry point.""" parser = argparse.ArgumentParser( prog="extended-data", @@ -328,7 +328,7 @@ def main() -> int: mcp_parser.set_defaults(func=cmd_mcp) # Parse and execute - args = parser.parse_args() + args = parser.parse_args(argv) if not args.command: parser.print_help() diff --git a/tests/core/test_package_cli.py b/tests/core/test_package_cli.py new file mode 100644 index 0000000..6d2a61a --- /dev/null +++ b/tests/core/test_package_cli.py @@ -0,0 +1,67 @@ +"""Tests for the top-level Extended Data CLI.""" + +from __future__ import annotations + +import json + +from unittest.mock import patch + +from extended_data import cli as cli_module + + +def _stdout_text(mock_write) -> str: + """Return concatenated stdout writes from a patched writer.""" + return "".join(call.args[0] for call in mock_write.call_args_list if call.args) + + +def test_decode_inline_json_exports_through_datafile_boundary() -> None: + """The top-level CLI should expose Tier 3 decode/export utilities.""" + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main(["decode", '{"service": {"name": "api"}}', "--suffix", "json"]) + + assert exit_code == 0 + assert json.loads(_stdout_text(mock_write)) == {"service": {"name": "api"}} + + +def test_decode_file_can_export_yaml(tmp_path) -> None: + """File decoding should use DataFile and the shared export boundary.""" + config = tmp_path / "service.json" + config.write_text('{"service": {"name": "api"}}', encoding="utf-8") + + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main(["decode", "--file", str(config), "--output", "yaml"]) + + assert exit_code == 0 + output = _stdout_text(mock_write) + assert "service:" in output + assert "name: api" in output + + +def test_decode_requires_one_input_source() -> None: + """Decode should fail clearly when no inline value or file path is supplied.""" + with patch("sys.stderr.write") as mock_write: + exit_code = cli_module.main(["decode"]) + + assert exit_code == 1 + assert "pass VALUE or --file" in _stdout_text(mock_write) + + +def test_decode_rejects_ambiguous_input_sources(tmp_path) -> None: + """Decode should not guess when both inline and file input are supplied.""" + config = tmp_path / "service.json" + config.write_text('{"service": "api"}', encoding="utf-8") + + with patch("sys.stderr.write") as mock_write: + exit_code = cli_module.main(["decode", "{}", "--file", str(config)]) + + assert exit_code == 1 + assert "pass either VALUE or --file" in _stdout_text(mock_write) + + +def test_connector_commands_delegate_to_connector_cli() -> None: + """Existing connector commands remain available from the package entrypoint.""" + with patch("extended_data.connectors.cli.main", return_value=7) as mock_main: + exit_code = cli_module.main(["list", "--json"]) + + assert exit_code == 7 + mock_main.assert_called_once_with(["list", "--json"]) From bf75773aef7018bd201b32f6fa03e9de7bb6e13e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:26:57 -0500 Subject: [PATCH 266/287] feat: expose workflow merge in package CLI --- README.md | 1 + docs/package-surface.md | 7 ++-- src/extended_data/cli.py | 62 +++++++++++++++++++++++++++++++--- tests/core/test_package_cli.py | 46 +++++++++++++++++++++++++ 4 files changed, 109 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index fcd154d..b299d53 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,7 @@ catalog and call surface: ```bash extended-data decode '{"service": {"name": "api"}}' --suffix json extended-data decode --file config.yaml --output json +extended-data merge config/base.yaml config/dev.yaml --output yaml extended-data list extended-data list --category cloud extended-data list --capability repositories --json diff --git a/docs/package-surface.md b/docs/package-surface.md index c13aa99..cd6306d 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -415,6 +415,7 @@ The installed CLI exposes the same discovery layer for shell automation: ```bash extended-data decode '{"service": {"name": "api"}}' --suffix json extended-data decode --file config.yaml --output json +extended-data merge config/base.yaml config/dev.yaml --output yaml extended-data list --json extended-data list --category cloud extended-data list --capability repositories --json @@ -423,9 +424,9 @@ extended-data methods github --json ``` The `extended-data` console script is the package-level CLI. Data commands use -`DataFile` and the shared export boundary directly; connector commands are -delegated to the connector CLI so existing catalog, method, call, and MCP -workflows stay on the same entrypoint. +`DataFile`, `DataWorkflow`, and the shared export boundary directly; connector +commands are delegated to the connector CLI so existing catalog, method, call, +and MCP workflows stay on the same entrypoint. ## Optional Integrations diff --git a/src/extended_data/cli.py b/src/extended_data/cli.py index 781476e..552851c 100644 --- a/src/extended_data/cli.py +++ b/src/extended_data/cli.py @@ -10,6 +10,7 @@ from extended_data.io import DataFile from extended_data.primitives.redaction import redact_sensitive_text +from extended_data.workflows import DataWorkflow, WorkflowAction, WorkflowResult CONNECTOR_COMMANDS = frozenset({"call", "info", "list", "mcp", "methods"}) @@ -44,10 +45,53 @@ def cmd_decode(args: argparse.Namespace) -> int: """Decode structured data and write it through the shared export boundary.""" try: artifact = _decode_artifact(args) - format_opts: dict[str, Any] = {} - if args.output == "json" and not args.compact: - format_opts["indent_2"] = True - _write_stdout(artifact.wrap_for_export(allow_encoding=args.output, **format_opts)) + _write_stdout(artifact.wrap_for_export(allow_encoding=args.output, **_json_format_opts(args))) + return 0 + except Exception as e: + _write_stderr(str(e)) + return 1 + + +def _json_format_opts(args: argparse.Namespace) -> dict[str, Any]: + """Return common JSON formatting options for CLI export commands.""" + if args.output == "json" and not args.compact: + return {"indent_2": True} + return {} + + +def _merge_workflow(args: argparse.Namespace) -> DataWorkflow: + """Build a layered merge workflow from CLI arguments.""" + file_paths = args.file_paths + if len(file_paths) < 2: + raise ValueError("merge requires at least two files") + + workflow = DataWorkflow.from_file(file_paths[0], suffix=args.suffix) + for file_path in file_paths[1:]: + artifact = DataFile.read(file_path, suffix=args.suffix) + merge_value = artifact.as_extended() + workflow = workflow.then((f"merge:{file_path}", _deep_merge_action(merge_value))) + return workflow + + +def _deep_merge_action(value: Any) -> WorkflowAction: + """Return a typed workflow action that deep-merges one value.""" + + def merge(data: Any) -> Any: + return data.deep_merge(value) + + return merge + + +def cmd_merge(args: argparse.Namespace) -> int: + """Merge structured files through DataWorkflow and write or print the result.""" + try: + workflow = _merge_workflow(args) + result: WorkflowResult + if args.write: + result = workflow.write(args.write, encoding=args.output, allow_empty=args.allow_empty) + else: + result = workflow.result() + _write_stdout(result.wrap_for_export(allow_encoding=args.output, **_json_format_opts(args))) return 0 except Exception as e: _write_stderr(str(e)) @@ -64,6 +108,7 @@ def _build_parser() -> argparse.ArgumentParser: Examples: extended-data decode '{"service": {"name": "api"}}' --suffix json extended-data decode --file config.yaml --output json + extended-data merge base.yaml env.yaml --output yaml extended-data list --category cloud extended-data call github get_repository_file --path service.json --json """, @@ -78,6 +123,15 @@ def _build_parser() -> argparse.ArgumentParser: decode_parser.add_argument("--compact", action="store_true", help="Compact JSON output") decode_parser.set_defaults(func=cmd_decode) + merge_parser = subparsers.add_parser("merge", help="Deep merge structured files") + merge_parser.add_argument("file_paths", nargs="+", help="Structured files to merge in order") + merge_parser.add_argument("--suffix", help="Input format override for all files") + merge_parser.add_argument("--output", choices=OUTPUT_ENCODINGS, default="json", help="Output encoding") + merge_parser.add_argument("--compact", action="store_true", help="Compact JSON output") + merge_parser.add_argument("--write", help="Write merged output to this file") + merge_parser.add_argument("--allow-empty", action="store_true", help="Allow writing empty merged output") + merge_parser.set_defaults(func=cmd_merge) + return parser diff --git a/tests/core/test_package_cli.py b/tests/core/test_package_cli.py index 6d2a61a..ae36369 100644 --- a/tests/core/test_package_cli.py +++ b/tests/core/test_package_cli.py @@ -65,3 +65,49 @@ def test_connector_commands_delegate_to_connector_cli() -> None: assert exit_code == 7 mock_main.assert_called_once_with(["list", "--json"]) + + +def test_merge_files_exports_deep_merged_workflow_result(tmp_path) -> None: + """The top-level CLI should expose a DataWorkflow-backed merge command.""" + base = tmp_path / "base.yaml" + env = tmp_path / "env.yaml" + base.write_text("service:\n name: api\n debug: false\nports:\n - 8080\n", encoding="utf-8") + env.write_text("service:\n debug: true\nports:\n - 8081\n", encoding="utf-8") + + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main(["merge", str(base), str(env), "--output", "json"]) + + assert exit_code == 0 + assert json.loads(_stdout_text(mock_write)) == { + "service": {"name": "api", "debug": True}, + "ports": [8080, 8081], + } + + +def test_merge_files_can_write_output_artifact(tmp_path) -> None: + """Merged workflow output can be written through the shared file boundary.""" + base = tmp_path / "base.json" + env = tmp_path / "env.json" + output = tmp_path / "build" / "service.yaml" + base.write_text('{"service": {"name": "api", "debug": false}}', encoding="utf-8") + env.write_text('{"service": {"debug": true}}', encoding="utf-8") + + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main(["merge", str(base), str(env), "--output", "yaml", "--write", str(output)]) + + assert exit_code == 0 + output_text = output.read_text(encoding="utf-8") + assert _stdout_text(mock_write) == f"{output_text}\n" + assert "debug: true" in output_text + + +def test_merge_requires_multiple_files(tmp_path) -> None: + """Merge should fail loudly instead of treating a single file as a workflow.""" + base = tmp_path / "base.json" + base.write_text('{"service": "api"}', encoding="utf-8") + + with patch("sys.stderr.write") as mock_write: + exit_code = cli_module.main(["merge", str(base)]) + + assert exit_code == 1 + assert "merge requires at least two files" in _stdout_text(mock_write) From cb53b682cfbac954d1db16f1d8a325a13981270e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:30:39 -0500 Subject: [PATCH 267/287] test: lock package CLI script boundaries --- tests/core/test_release_hygiene.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index fa11c5f..2f40d24 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -315,6 +315,17 @@ def test_project_scripts_point_to_callables() -> None: assert offenders == [] +def test_project_scripts_preserve_package_cli_boundaries() -> None: + """The broad CLI entrypoint should not regress to a connector-only module.""" + scripts = {str(name): str(target) for name, target in _pyproject()["project"]["scripts"].items()} + + assert scripts == { + "extended-data": "extended_data.cli:main", + "extended-data-mcp": "extended_data.connectors.mcp:main", + "meshy-mcp": "extended_data.connectors.meshy.mcp:main", + } + + def test_readme_package_shape_matches_public_subpackages() -> None: """The documented tier layout should match the actual top-level package directories.""" source_root = REPO_ROOT / "src" / "extended_data" From 4a6428f04cf9dd3cacfc99992d584b05ba334972 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:35:11 -0500 Subject: [PATCH 268/287] feat: expose datafile inspection in package CLI --- README.md | 1 + docs/package-surface.md | 1 + src/extended_data/cli.py | 20 ++++++++++++++++ tests/core/test_package_cli.py | 42 ++++++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+) diff --git a/README.md b/README.md index b299d53..e9f92ee 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,7 @@ catalog and call surface: ```bash extended-data decode '{"service": {"name": "api"}}' --suffix json extended-data decode --file config.yaml --output json +extended-data inspect --file config.yaml extended-data merge config/base.yaml config/dev.yaml --output yaml extended-data list extended-data list --category cloud diff --git a/docs/package-surface.md b/docs/package-surface.md index cd6306d..ba7af58 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -415,6 +415,7 @@ The installed CLI exposes the same discovery layer for shell automation: ```bash extended-data decode '{"service": {"name": "api"}}' --suffix json extended-data decode --file config.yaml --output json +extended-data inspect --file config.yaml extended-data merge config/base.yaml config/dev.yaml --output yaml extended-data list --json extended-data list --category cloud diff --git a/src/extended_data/cli.py b/src/extended_data/cli.py index 552851c..dc73dc0 100644 --- a/src/extended_data/cli.py +++ b/src/extended_data/cli.py @@ -52,6 +52,17 @@ def cmd_decode(args: argparse.Namespace) -> int: return 1 +def cmd_inspect(args: argparse.Namespace) -> int: + """Decode structured data and write its DataFile metadata.""" + try: + artifact = _decode_artifact(args) + _write_stdout(artifact.metadata.wrap_for_export(allow_encoding=args.output, **_json_format_opts(args))) + return 0 + except Exception as e: + _write_stderr(str(e)) + return 1 + + def _json_format_opts(args: argparse.Namespace) -> dict[str, Any]: """Return common JSON formatting options for CLI export commands.""" if args.output == "json" and not args.compact: @@ -108,6 +119,7 @@ def _build_parser() -> argparse.ArgumentParser: Examples: extended-data decode '{"service": {"name": "api"}}' --suffix json extended-data decode --file config.yaml --output json + extended-data inspect --file config.yaml extended-data merge base.yaml env.yaml --output yaml extended-data list --category cloud extended-data call github get_repository_file --path service.json --json @@ -123,6 +135,14 @@ def _build_parser() -> argparse.ArgumentParser: decode_parser.add_argument("--compact", action="store_true", help="Compact JSON output") decode_parser.set_defaults(func=cmd_decode) + inspect_parser = subparsers.add_parser("inspect", help="Decode data and print artifact metadata") + inspect_parser.add_argument("value", nargs="?", help="Inline payload to inspect") + inspect_parser.add_argument("--file", dest="file_path", help="File path or URL to inspect") + inspect_parser.add_argument("--suffix", help="Input format override") + inspect_parser.add_argument("--output", choices=OUTPUT_ENCODINGS, default="json", help="Output encoding") + inspect_parser.add_argument("--compact", action="store_true", help="Compact JSON output") + inspect_parser.set_defaults(func=cmd_inspect) + merge_parser = subparsers.add_parser("merge", help="Deep merge structured files") merge_parser.add_argument("file_paths", nargs="+", help="Structured files to merge in order") merge_parser.add_argument("--suffix", help="Input format override for all files") diff --git a/tests/core/test_package_cli.py b/tests/core/test_package_cli.py index ae36369..c0236d4 100644 --- a/tests/core/test_package_cli.py +++ b/tests/core/test_package_cli.py @@ -58,6 +58,48 @@ def test_decode_rejects_ambiguous_input_sources(tmp_path) -> None: assert "pass either VALUE or --file" in _stdout_text(mock_write) +def test_inspect_file_exports_datafile_metadata(tmp_path) -> None: + """Inspect should expose the same promoted metadata DataFile carries.""" + config = tmp_path / "service.yaml" + config.write_text("service:\n name: api\n", encoding="utf-8") + + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main(["inspect", "--file", str(config)]) + + assert exit_code == 0 + metadata = json.loads(_stdout_text(mock_write)) + assert metadata["source"] == str(config) + assert metadata["encoding"] == "yaml" + assert metadata["path"] == str(config.resolve()) + assert metadata["is_url"] is False + assert metadata["data_type"] == "ExtendedDict" + + +def test_inspect_inline_payload_reports_memory_source() -> None: + """Inline inspect keeps in-memory payload provenance explicit.""" + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main(["inspect", '{"service": "api"}', "--suffix", "json"]) + + assert exit_code == 0 + metadata = json.loads(_stdout_text(mock_write)) + assert metadata["source"] == "memory" + assert metadata["encoding"] == "json" + assert metadata["path"] is None + assert metadata["data_type"] == "ExtendedDict" + + +def test_inspect_rejects_ambiguous_input_sources(tmp_path) -> None: + """Inspect should share decode's explicit source selection behavior.""" + config = tmp_path / "service.json" + config.write_text('{"service": "api"}', encoding="utf-8") + + with patch("sys.stderr.write") as mock_write: + exit_code = cli_module.main(["inspect", "{}", "--file", str(config)]) + + assert exit_code == 1 + assert "pass either VALUE or --file" in _stdout_text(mock_write) + + def test_connector_commands_delegate_to_connector_cli() -> None: """Existing connector commands remain available from the package entrypoint.""" with patch("extended_data.connectors.cli.main", return_value=7) as mock_main: From 11c7639663d4cfa1976ed0408d61159133349bd3 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:42:32 -0500 Subject: [PATCH 269/287] feat: expose data transforms in package CLI --- README.md | 1 + docs/package-surface.md | 1 + src/extended_data/cli.py | 80 ++++++++++++++++++++++++ tests/core/test_package_cli.py | 111 +++++++++++++++++++++++++++++++++ 4 files changed, 193 insertions(+) diff --git a/README.md b/README.md index e9f92ee..0124042 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ extended-data decode '{"service": {"name": "api"}}' --suffix json extended-data decode --file config.yaml --output json extended-data inspect --file config.yaml extended-data merge config/base.yaml config/dev.yaml --output yaml +extended-data transform --file payload.json --step reconstruct --step unhump extended-data list extended-data list --category cloud extended-data list --capability repositories --json diff --git a/docs/package-surface.md b/docs/package-surface.md index ba7af58..0ce64d3 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -417,6 +417,7 @@ extended-data decode '{"service": {"name": "api"}}' --suffix json extended-data decode --file config.yaml --output json extended-data inspect --file config.yaml extended-data merge config/base.yaml config/dev.yaml --output yaml +extended-data transform --file payload.json --step reconstruct --step unhump extended-data list --json extended-data list --category cloud extended-data list --capability repositories --json diff --git a/src/extended_data/cli.py b/src/extended_data/cli.py index dc73dc0..6e54253 100644 --- a/src/extended_data/cli.py +++ b/src/extended_data/cli.py @@ -15,6 +15,20 @@ CONNECTOR_COMMANDS = frozenset({"call", "info", "list", "mcp", "methods"}) OUTPUT_ENCODINGS = ("json", "yaml", "toml", "hcl", "raw") +TRANSFORM_METHODS = { + "compact": "compact", + "deduplicate": "deduplicate", + "flatten": "flatten", + "humanize": "humanize", + "reconstruct": "reconstruct_special_types", + "titleize": "titleize", + "to-camel-case": "to_camel_case", + "to-kebab-case": "to_kebab_case", + "to-pascal-case": "to_pascal_case", + "to-snake-case": "to_snake_case", + "unhump": "unhump", + "unique": "unique", +} def _write_stdout(message: str) -> None: @@ -109,6 +123,54 @@ def cmd_merge(args: argparse.Namespace) -> int: return 1 +def _transform_workflow(args: argparse.Namespace) -> DataWorkflow: + """Build a workflow that applies named Tier 2 transforms.""" + steps = args.steps or [] + if not steps: + raise ValueError("transform requires at least one --step") + + workflow = _decode_artifact(args).workflow() + for step in steps: + workflow = workflow.then((f"transform:{step}", _transform_action(step))) + return workflow + + +def _transform_action(step: str) -> WorkflowAction: + """Return a typed workflow action for one supported transform step.""" + method_name = TRANSFORM_METHODS[step] + + def transform(data: Any) -> Any: + method = _transform_method(data, step, method_name) + if not callable(method): + raise TypeError(f"transform {step!r} is not available for {type(data).__name__}") + return method() + + return transform + + +def _transform_method(data: Any, step: str, method_name: str) -> Any: + """Return the best method for a transform step on the current data shape.""" + if step == "reconstruct": + return getattr(data, "reconstruct_special_types", None) or getattr(data, "reconstruct_special_type", None) + return getattr(data, method_name, None) + + +def cmd_transform(args: argparse.Namespace) -> int: + """Apply named Tier 2 transforms through DataWorkflow.""" + try: + workflow = _transform_workflow(args) + result: WorkflowResult + if args.write: + result = workflow.write(args.write, encoding=args.output, allow_empty=args.allow_empty) + else: + result = workflow.result() + _write_stdout(result.wrap_for_export(allow_encoding=args.output, **_json_format_opts(args))) + return 0 + except Exception as e: + _write_stderr(str(e)) + return 1 + + def _build_parser() -> argparse.ArgumentParser: """Build the top-level Extended Data argument parser.""" parser = argparse.ArgumentParser( @@ -121,6 +183,7 @@ def _build_parser() -> argparse.ArgumentParser: extended-data decode --file config.yaml --output json extended-data inspect --file config.yaml extended-data merge base.yaml env.yaml --output yaml + extended-data transform --file payload.json --step reconstruct --step unhump extended-data list --category cloud extended-data call github get_repository_file --path service.json --json """, @@ -152,6 +215,23 @@ def _build_parser() -> argparse.ArgumentParser: merge_parser.add_argument("--allow-empty", action="store_true", help="Allow writing empty merged output") merge_parser.set_defaults(func=cmd_merge) + transform_parser = subparsers.add_parser("transform", help="Apply named Extended Data transforms") + transform_parser.add_argument("value", nargs="?", help="Inline payload to transform") + transform_parser.add_argument("--file", dest="file_path", help="File path or URL to transform") + transform_parser.add_argument("--suffix", help="Input format override") + transform_parser.add_argument( + "--step", + dest="steps", + action="append", + choices=sorted(TRANSFORM_METHODS), + help="Transform step to apply in order", + ) + transform_parser.add_argument("--output", choices=OUTPUT_ENCODINGS, default="json", help="Output encoding") + transform_parser.add_argument("--compact", action="store_true", help="Compact JSON output") + transform_parser.add_argument("--write", help="Write transformed output to this file") + transform_parser.add_argument("--allow-empty", action="store_true", help="Allow writing empty transformed output") + transform_parser.set_defaults(func=cmd_transform) + return parser diff --git a/tests/core/test_package_cli.py b/tests/core/test_package_cli.py index c0236d4..baacbd5 100644 --- a/tests/core/test_package_cli.py +++ b/tests/core/test_package_cli.py @@ -153,3 +153,114 @@ def test_merge_requires_multiple_files(tmp_path) -> None: assert exit_code == 1 assert "merge requires at least two files" in _stdout_text(mock_write) + + +def test_transform_file_applies_ordered_tier2_steps(tmp_path) -> None: + """Transform should expose common Tier 2 operations through DataWorkflow.""" + payload = tmp_path / "payload.json" + payload.write_text( + '{"HTTPResponseCode": "200", "SelectedServices": ["api", "api", "worker"], "EmptyValue": ""}', + encoding="utf-8", + ) + + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main( + [ + "transform", + "--file", + str(payload), + "--step", + "reconstruct", + "--step", + "unhump", + "--step", + "deduplicate", + "--step", + "compact", + ] + ) + + assert exit_code == 0 + assert json.loads(_stdout_text(mock_write)) == { + "http_response_code": 200, + "selected_services": ["api", "worker"], + } + + +def test_transform_inline_string_applies_string_primitives() -> None: + """String-specific transforms should be available from the package CLI.""" + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main( + [ + "transform", + "API Response Value", + "--suffix", + "raw", + "--step", + "to-snake-case", + "--output", + "raw", + ] + ) + + assert exit_code == 0 + assert _stdout_text(mock_write) == "api_response_value\n" + + +def test_transform_inline_string_can_reconstruct_scalar() -> None: + """Scalar reconstruction should use the string primitive when needed.""" + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main(["transform", "200", "--suffix", "raw", "--step", "reconstruct"]) + + assert exit_code == 0 + assert json.loads(_stdout_text(mock_write)) == 200 + + +def test_transform_requires_at_least_one_step(tmp_path) -> None: + """Transform should fail clearly when no primitive/container step is requested.""" + payload = tmp_path / "payload.json" + payload.write_text('{"service": "api"}', encoding="utf-8") + + with patch("sys.stderr.write") as mock_write: + exit_code = cli_module.main(["transform", "--file", str(payload)]) + + assert exit_code == 1 + assert "transform requires at least one --step" in _stdout_text(mock_write) + + +def test_transform_reports_step_that_does_not_match_data_shape() -> None: + """Shape-specific transforms should fail loudly instead of silently lowering data.""" + with patch("sys.stderr.write") as mock_write: + exit_code = cli_module.main(["transform", '["api"]', "--suffix", "json", "--step", "unhump"]) + + assert exit_code == 1 + assert "transform 'unhump' is not available for ExtendedList" in _stdout_text(mock_write) + + +def test_transform_can_write_output_artifact(tmp_path) -> None: + """Transformed workflow output can be written through the shared file boundary.""" + payload = tmp_path / "payload.json" + output = tmp_path / "build" / "payload.yaml" + payload.write_text('{"HTTPResponseCode": "200"}', encoding="utf-8") + + with patch("sys.stdout.write") as mock_write: + exit_code = cli_module.main( + [ + "transform", + "--file", + str(payload), + "--step", + "reconstruct", + "--step", + "unhump", + "--output", + "yaml", + "--write", + str(output), + ] + ) + + assert exit_code == 0 + output_text = output.read_text(encoding="utf-8") + assert _stdout_text(mock_write) == f"{output_text}\n" + assert "http_response_code: 200" in output_text From 12098087de00a4cab37ba7ce2b3de4dd9e34fb9f Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:48:38 -0500 Subject: [PATCH 270/287] feat: add workflow named transforms --- README.md | 8 +++- docs/package-surface.md | 35 ++++++++++---- src/extended_data/__init__.py | 14 +++++- src/extended_data/cli.py | 43 ++--------------- src/extended_data/workflows/__init__.py | 59 +++++++++++++++++++++++ tests/core/test_package_surface.py | 4 ++ tests/core/test_workflows.py | 64 +++++++++++++++++++++++++ 7 files changed, 174 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 0124042..9edd40f 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ data = decode_json('{"status": "ok"}') payload = ExtendedDict(data).deep_merge({"source": "example"}) decoded_file = decode_file('{"service": {"name": "api"}}', suffix="json") artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json") -workflow = DataWorkflow.from_value(payload).then(("normalize", lambda data: data.unhump())).result() +workflow = DataWorkflow.from_value(payload).transform("unhump").result() print(encode_yaml(payload)) print(decoded_file["service"]["name"].upper_first()) @@ -296,7 +296,11 @@ transformations, write an output artifact, and keep the step trail in a transformations, lowering/promoting, and writes, so file and API provenance can stay with the result. Completed workflow results expose detached promoted views with `as_extended()` plus direct `to_export_safe()` and `wrap_for_export()` -helpers. Missing file inputs and empty writes fail loudly. +helpers. `DataWorkflow.transform()` applies the same named Tier 2 transform +catalog exposed by the package CLI, including `reconstruct`, `unhump`, +`deduplicate`, `compact`, and string case transforms. Missing file inputs, +unknown transform names, shape-incompatible transforms, and empty writes fail +loudly. `InputProvider` stores its active, frozen, and merged input snapshots as `ExtendedDict` values, so direct input-data access can use Tier 2 container methods. `snapshot_inputs()` returns detached active or frozen snapshots, and diff --git a/docs/package-surface.md b/docs/package-surface.md index 0ce64d3..ee67347 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -26,6 +26,7 @@ from extended_data import ( SecretsConnector, SlackConnector, SyncOptions, + list_data_transform_steps, extend_data, to_builtin, ) @@ -180,14 +181,18 @@ metadata cannot override the sanitized core `source` and `path` fields. `DataWorkflow` is the Tier 3 composition surface for higher-order data processing. It reads or decodes structured data through the file and format processors, accepts `DataFile` artifacts with `from_data_file()`, promotes -values into Tier 2 containers by default, applies named transformation steps, -writes output artifacts, and returns a `WorkflowResult` with the completed -value, output path, step trail, and promoted metadata. Workflow metadata is -preserved across `then()`, `run()`, `as_builtin()`, `as_extended()`, and -`write()`, so file and API provenance from `DataFile` artifacts remains attached -to the result. `WorkflowResult.as_extended()` returns a detached promoted view -of the completed value, and result-level `to_export_safe()` / -`wrap_for_export()` expose the same export boundary used by Tier 2 containers. +values into Tier 2 containers by default, applies reusable `WorkflowStep` +functions or named transform steps, writes output artifacts, and returns a +`WorkflowResult` with the completed value, output path, step trail, and +promoted metadata. `DataWorkflow.transform()` applies the same named Tier 2 +transform catalog exposed by the CLI, including `reconstruct`, `unhump`, +`deduplicate`, `compact`, and string case transforms. Workflow metadata is +preserved across `then()`, `run()`, `transform()`, `as_builtin()`, +`as_extended()`, and `write()`, so file and API provenance from `DataFile` +artifacts remains attached to the result. `WorkflowResult.as_extended()` returns +a detached promoted view of the completed value, and result-level +`to_export_safe()` / `wrap_for_export()` expose the same export boundary used by +Tier 2 containers. ```python from extended_data import DataWorkflow @@ -196,17 +201,27 @@ env_data = DataWorkflow.from_file("config/dev.yaml").value result = ( DataWorkflow.from_file("config/base.yaml") .then(("merge-env", lambda data: data.deep_merge(env_data))) + .transform("reconstruct", "unhump") .write("build/config.yaml") ) -assert result.steps == ("read:config/base.yaml", "merge-env", "write:build/config.yaml") +assert result.steps == ( + "read:config/base.yaml", + "merge-env", + "transform:reconstruct", + "transform:unhump", + "write:build/config.yaml", +) assert result.metadata["source"] == "config/base.yaml" assert result.as_extended()["service"]["name"].upper_first() == "Api" assert result.to_export_safe()["service"]["name"] == "api" +assert "unhump" in list_data_transform_steps() ``` Missing workflow input files raise `FileNotFoundError`, and empty workflow -writes raise `ValueError` unless `allow_empty=True` is passed. +writes raise `ValueError` unless `allow_empty=True` is passed. Unknown transform +names and transforms that do not match the current data shape raise instead of +silently preserving stale workflow state. `InputProvider` loads input data from explicit mappings, environment variables, and stdin, then decodes or coerces values through the shared primitive and diff --git a/src/extended_data/__init__.py b/src/extended_data/__init__.py index 7a53178..a92226b 100644 --- a/src/extended_data/__init__.py +++ b/src/extended_data/__init__.py @@ -46,7 +46,16 @@ ) from extended_data.io.importers import unwrap_raw_data_from_import from extended_data.primitives.formats.errors import DataDecodeError -from extended_data.workflows import DataWorkflow, StepLike, WorkflowAction, WorkflowResult, WorkflowStep +from extended_data.workflows import ( + DATA_TRANSFORM_STEPS, + DataWorkflow, + StepLike, + WorkflowAction, + WorkflowResult, + WorkflowStep, + data_transform_action, + list_data_transform_steps, +) if TYPE_CHECKING: @@ -130,6 +139,7 @@ def __getattr__(name: str) -> Any: __all__ = [ + "DATA_TRANSFORM_STEPS", "AWSConnector", "AnthropicConnector", "ConnectorBase", @@ -169,6 +179,7 @@ def __getattr__(name: str) -> Any: "base64_decode", "base64_encode", "clone_repository_to_temp", + "data_transform_action", "decode_file", "delete_file", "directed_inputs", @@ -191,6 +202,7 @@ def __getattr__(name: str) -> Any: "list_connectors", "list_connectors_by_capability", "list_connectors_by_category", + "list_data_transform_steps", "make_raw_data_export_safe", "match_file_extensions", "read_data_file", diff --git a/src/extended_data/cli.py b/src/extended_data/cli.py index 6e54253..388ed9d 100644 --- a/src/extended_data/cli.py +++ b/src/extended_data/cli.py @@ -10,25 +10,11 @@ from extended_data.io import DataFile from extended_data.primitives.redaction import redact_sensitive_text -from extended_data.workflows import DataWorkflow, WorkflowAction, WorkflowResult +from extended_data.workflows import DataWorkflow, WorkflowAction, WorkflowResult, list_data_transform_steps CONNECTOR_COMMANDS = frozenset({"call", "info", "list", "mcp", "methods"}) OUTPUT_ENCODINGS = ("json", "yaml", "toml", "hcl", "raw") -TRANSFORM_METHODS = { - "compact": "compact", - "deduplicate": "deduplicate", - "flatten": "flatten", - "humanize": "humanize", - "reconstruct": "reconstruct_special_types", - "titleize": "titleize", - "to-camel-case": "to_camel_case", - "to-kebab-case": "to_kebab_case", - "to-pascal-case": "to_pascal_case", - "to-snake-case": "to_snake_case", - "unhump": "unhump", - "unique": "unique", -} def _write_stdout(message: str) -> None: @@ -129,30 +115,7 @@ def _transform_workflow(args: argparse.Namespace) -> DataWorkflow: if not steps: raise ValueError("transform requires at least one --step") - workflow = _decode_artifact(args).workflow() - for step in steps: - workflow = workflow.then((f"transform:{step}", _transform_action(step))) - return workflow - - -def _transform_action(step: str) -> WorkflowAction: - """Return a typed workflow action for one supported transform step.""" - method_name = TRANSFORM_METHODS[step] - - def transform(data: Any) -> Any: - method = _transform_method(data, step, method_name) - if not callable(method): - raise TypeError(f"transform {step!r} is not available for {type(data).__name__}") - return method() - - return transform - - -def _transform_method(data: Any, step: str, method_name: str) -> Any: - """Return the best method for a transform step on the current data shape.""" - if step == "reconstruct": - return getattr(data, "reconstruct_special_types", None) or getattr(data, "reconstruct_special_type", None) - return getattr(data, method_name, None) + return _decode_artifact(args).workflow().transform(*steps) def cmd_transform(args: argparse.Namespace) -> int: @@ -223,7 +186,7 @@ def _build_parser() -> argparse.ArgumentParser: "--step", dest="steps", action="append", - choices=sorted(TRANSFORM_METHODS), + choices=list_data_transform_steps(), help="Transform step to apply in order", ) transform_parser.add_argument("--output", choices=OUTPUT_ENCODINGS, default="json", help="Output encoding") diff --git a/src/extended_data/workflows/__init__.py b/src/extended_data/workflows/__init__.py index c37a979..38711b5 100644 --- a/src/extended_data/workflows/__init__.py +++ b/src/extended_data/workflows/__init__.py @@ -6,6 +6,7 @@ from copy import deepcopy from dataclasses import dataclass, field from pathlib import Path +from types import MappingProxyType from typing import Any, TypeAlias from extended_data.containers import ExtendedDict, extend_data, to_builtin @@ -15,6 +16,51 @@ WorkflowAction: TypeAlias = Callable[[Any], Any] StepLike: TypeAlias = "WorkflowStep | tuple[str, WorkflowAction] | WorkflowAction" +DATA_TRANSFORM_STEPS: Mapping[str, str] = MappingProxyType( + { + "compact": "compact", + "deduplicate": "deduplicate", + "flatten": "flatten", + "humanize": "humanize", + "reconstruct": "reconstruct_special_types", + "titleize": "titleize", + "to-camel-case": "to_camel_case", + "to-kebab-case": "to_kebab_case", + "to-pascal-case": "to_pascal_case", + "to-snake-case": "to_snake_case", + "unhump": "unhump", + "unique": "unique", + } +) + + +def list_data_transform_steps() -> tuple[str, ...]: + """Return the named transform steps supported by DataWorkflow.""" + return tuple(sorted(DATA_TRANSFORM_STEPS)) + + +def data_transform_action(step: str) -> WorkflowAction: + """Return a workflow action for one named Tier 2 transform step.""" + try: + method_name = DATA_TRANSFORM_STEPS[step] + except KeyError as exc: + expected = ", ".join(list_data_transform_steps()) + raise ValueError(f"unknown data transform {step!r}; expected one of: {expected}") from exc + + def transform(data: Any) -> Any: + method = _data_transform_method(data, step, method_name) + if not callable(method): + raise TypeError(f"transform {step!r} is not available for {type(data).__name__}") + return method() + + return transform + + +def _data_transform_method(data: Any, step: str, method_name: str) -> Any: + """Return the best method for a transform step on the current data shape.""" + if step == "reconstruct": + return getattr(data, "reconstruct_special_types", None) or getattr(data, "reconstruct_special_type", None) + return getattr(data, method_name, None) @dataclass(frozen=True, slots=True) @@ -206,6 +252,16 @@ def run(self, *steps: StepLike, as_extended: bool | None = None) -> DataWorkflow workflow = workflow.then(step, as_extended=as_extended) return workflow + def transform(self, *steps: str, as_extended: bool | None = None) -> DataWorkflow: + """Apply named Tier 2 transform steps in order.""" + if not steps: + raise ValueError("DataWorkflow.transform requires at least one step") + + workflow = self + for step in steps: + workflow = workflow.then((f"transform:{step}", data_transform_action(step)), as_extended=as_extended) + return workflow + def as_builtin(self) -> DataWorkflow: """Return the next workflow state with built-in Python containers.""" return DataWorkflow( @@ -287,9 +343,12 @@ def _decode_step_name(*, file_path: FilePath | None, suffix: str | None) -> str: __all__ = [ + "DATA_TRANSFORM_STEPS", "DataWorkflow", "StepLike", "WorkflowAction", "WorkflowResult", "WorkflowStep", + "data_transform_action", + "list_data_transform_steps", ] diff --git a/tests/core/test_package_surface.py b/tests/core/test_package_surface.py index cc69120..d029434 100644 --- a/tests/core/test_package_surface.py +++ b/tests/core/test_package_surface.py @@ -156,6 +156,10 @@ def test_root_exports_first_class_integrated_surfaces() -> None: assert extended_data.ConnectorInfo.__name__ == "ConnectorInfo" assert extended_data.WorkflowResult.__name__ == "WorkflowResult" assert extended_data.WorkflowStep.__name__ == "WorkflowStep" + assert callable(extended_data.data_transform_action) + assert callable(extended_data.list_data_transform_steps) + assert "unhump" in extended_data.DATA_TRANSFORM_STEPS + assert "reconstruct" in extended_data.list_data_transform_steps() assert extended_data.SecretsConnector is secrets.SecretsConnector assert extended_data.SyncOptions is secrets.SyncOptions assert extended_data.SyncResult is secrets.SyncResult diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index e39f45f..1e5572a 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -19,6 +19,8 @@ WorkflowStep, base64_decode, base64_encode, + data_transform_action, + list_data_transform_steps, read_data_file, write_file, ) @@ -88,6 +90,68 @@ def select_services(data: ExtendedDict) -> ExtendedDict: } +def test_data_workflow_applies_shared_named_transforms() -> None: + """DataWorkflow exposes common Tier 2 transforms without ad hoc lambdas.""" + raw_payload = { + "HTTPResponseCode": "200", + "SelectedServices": ["api", "api", "worker"], + "EmptyValue": "", + } + + workflow = DataWorkflow.from_value(raw_payload).transform( + "reconstruct", + "unhump", + "deduplicate", + "compact", + ) + result = workflow.result() + + assert workflow.steps == ( + "value", + "transform:reconstruct", + "transform:unhump", + "transform:deduplicate", + "transform:compact", + ) + assert result.as_builtin() == { + "http_response_code": 200, + "selected_services": ["api", "worker"], + } + + +def test_data_workflow_reconstruct_transform_handles_scalars() -> None: + """Named reconstruct should use the scalar string primitive when needed.""" + result = DataWorkflow.from_value("200").transform("reconstruct").result() + + assert result.value == 200 + + +def test_data_transform_action_reports_unknown_steps() -> None: + """Unknown named transforms should fail at the workflow boundary.""" + with pytest.raises(ValueError, match="unknown data transform 'missing'"): + data_transform_action("missing") + + +def test_data_workflow_transform_requires_steps() -> None: + """Transform calls should not silently preserve the old workflow value.""" + with pytest.raises(ValueError, match=r"DataWorkflow\.transform requires at least one step"): + DataWorkflow.from_value({"service": "api"}).transform() + + +def test_data_workflow_transform_reports_shape_mismatch() -> None: + """Shape-specific named transforms should fail when applied to incompatible data.""" + with pytest.raises(TypeError, match="transform 'unhump' is not available for ExtendedList"): + DataWorkflow.from_value(["api"]).transform("unhump") + + +def test_list_data_transform_steps_is_sorted_catalog() -> None: + """The transform catalog should be deterministic for CLIs and docs.""" + steps = list_data_transform_steps() + + assert steps == tuple(sorted(steps)) + assert {"compact", "reconstruct", "to-snake-case", "unhump"} <= set(steps) + + def test_data_workflow_starts_from_data_file_artifact() -> None: """DataFile artifacts can start named workflows without manual .data plumbing.""" artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json", metadata={"status_code": 200}) From 7a9fee651497cd479be8d0a75bb6b5a48adb4dfd Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:52:12 -0500 Subject: [PATCH 271/287] docs: exercise workflow transforms in examples --- examples/core/README.md | 2 +- examples/core/composed_workflows.py | 27 ++++++++++++++++++--------- tests/examples/test_safe_examples.py | 8 ++++++++ 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/examples/core/README.md b/examples/core/README.md index 78c6d3a..525e670 100644 --- a/examples/core/README.md +++ b/examples/core/README.md @@ -10,7 +10,7 @@ surface docs, so treat them as part of the documented contract. ### Basic Usage - [`basic_usage.py`](basic_usage.py) - Common state helpers plus first-class `ExtendedString`, `ExtendedList`, and `ExtendedDict` operations -- [`composed_workflows.py`](composed_workflows.py) - Layered config, Terraform-style HCL, YAML-native tags, and payload pipelines +- [`composed_workflows.py`](composed_workflows.py) - Layered config, named workflow transforms, Terraform-style HCL, YAML-native tags, and payload pipelines - [`serialization.py`](serialization.py) - YAML, JSON, TOML, HCL, and Base64 encoding/decoding - [`file_operations.py`](file_operations.py) - File path utilities and Git repository helpers - [`string_transformations.py`](string_transformations.py) - Case conversion and string manipulation diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index a49e175..ce3cfbb 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -12,10 +12,10 @@ from extended_data import ( DataWorkflow, - ExtendedDict, ExtendedList, base64_decode, base64_encode, + list_data_transform_steps, read_data_file, read_file, write_file, @@ -89,15 +89,22 @@ def demonstrate_api_payload_workflow() -> None: """Normalize and serialize an API-style payload.""" print("\n=== API Payload Workflow ===\n") - payload = ExtendedDict( - { - "HTTPResponseCode": 200, - "SelectedServices": ExtendedList(["api", "worker", "db"]).filter_values(denylist=["db"]), - "Tags": ["api", "api", "docs"], - } - ) + payload = { + "HTTPResponseCode": "200", + "SelectedServices": ["api", "worker", "db", "api"], + "Tags": ["api", "api", "docs"], + "EmptyValue": "", + } - normalized = payload.deduplicate().unhump() + def select_services(data): + return data | {"SelectedServices": ExtendedList(data["SelectedServices"]).filter_values(denylist=["db"])} + + workflow = ( + DataWorkflow.from_value(payload) + .then(("select-services", select_services)) + .transform("reconstruct", "deduplicate", "compact", "unhump") + ) + normalized = workflow.result().value with TemporaryDirectory() as tmpdir: tld = Path(tmpdir) @@ -105,6 +112,8 @@ def demonstrate_api_payload_workflow() -> None: payload_text = read_file("build/payload.json", tld=tld) print(payload_text) + print(f"Steps: {', '.join(workflow.steps)}") + print(f"Known transforms: {', '.join(list_data_transform_steps())}") def demonstrate_yaml_native_workflow() -> None: diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index cad6a0b..200fa38 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -156,6 +156,14 @@ def test_basic_core_example_uses_container_first_operations() -> None: assert offenders == [] +def test_composed_workflow_example_uses_named_transforms() -> None: + """The workflow example should exercise the public named-transform API.""" + text = (REPO_ROOT / "examples/core/composed_workflows.py").read_text(encoding="utf-8") + + assert ".transform(" in text + assert "list_data_transform_steps" in text + + def test_examples_do_not_import_tier1_utilities_from_root() -> None: """Examples should import pure Tier 1 utilities from extended_data.primitives.""" offenders: list[str] = [] From ba186e4099cb04f8af3174f2cdf0ce6215560fc5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Wed, 10 Jun 2026 23:58:01 -0500 Subject: [PATCH 272/287] feat: add workflow merge helpers --- README.md | 12 ++++--- docs/package-surface.md | 22 +++++++----- examples/core/composed_workflows.py | 2 +- src/extended_data/cli.py | 15 ++------ src/extended_data/workflows/__init__.py | 40 +++++++++++++++++++++ tests/core/test_workflows.py | 46 ++++++++++++++++++++++++- tests/examples/test_safe_examples.py | 3 +- 7 files changed, 110 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 9edd40f..747a26c 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ data = decode_json('{"status": "ok"}') payload = ExtendedDict(data).deep_merge({"source": "example"}) decoded_file = decode_file('{"service": {"name": "api"}}', suffix="json") artifact = DataFile.decode('{"service": {"name": "api"}}', suffix="json") -workflow = DataWorkflow.from_value(payload).transform("unhump").result() +workflow = DataWorkflow.from_value(payload).merge({"region": "us-east-1"}).transform("unhump").result() print(encode_yaml(payload)) print(decoded_file["service"]["name"].upper_first()) @@ -290,17 +290,19 @@ first-class with promoted data, promoted source metadata, detached for artifact-first processing. DataFile source labels and metadata use the shared Tier 1 redaction policy before they enter workflow steps or result metadata. `DataWorkflow` makes multi-step compositions first-class: read, -decode, or accept a `DataFile` artifact, apply named +decode, or accept a `DataFile` artifact, deep-merge mapping layers, apply named transformations, write an output artifact, and keep the step trail in a -`WorkflowResult`. Workflow metadata is promoted and preserved across +`WorkflowResult`. `DataWorkflow.merge_file()` reads a structured file through +the same `DataFile` boundary before merging it. Workflow metadata is promoted +and preserved across transformations, lowering/promoting, and writes, so file and API provenance can stay with the result. Completed workflow results expose detached promoted views with `as_extended()` plus direct `to_export_safe()` and `wrap_for_export()` helpers. `DataWorkflow.transform()` applies the same named Tier 2 transform catalog exposed by the package CLI, including `reconstruct`, `unhump`, `deduplicate`, `compact`, and string case transforms. Missing file inputs, -unknown transform names, shape-incompatible transforms, and empty writes fail -loudly. +missing merge layers, unknown transform names, shape-incompatible transforms, +and empty writes fail loudly. `InputProvider` stores its active, frozen, and merged input snapshots as `ExtendedDict` values, so direct input-data access can use Tier 2 container methods. `snapshot_inputs()` returns detached active or frozen snapshots, and diff --git a/docs/package-surface.md b/docs/package-surface.md index ee67347..8565244 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -181,13 +181,17 @@ metadata cannot override the sanitized core `source` and `path` fields. `DataWorkflow` is the Tier 3 composition surface for higher-order data processing. It reads or decodes structured data through the file and format processors, accepts `DataFile` artifacts with `from_data_file()`, promotes -values into Tier 2 containers by default, applies reusable `WorkflowStep` -functions or named transform steps, writes output artifacts, and returns a -`WorkflowResult` with the completed value, output path, step trail, and -promoted metadata. `DataWorkflow.transform()` applies the same named Tier 2 +values into Tier 2 containers by default, deep-merges in-memory or file-backed +mapping layers, applies reusable `WorkflowStep` functions or named transform +steps, writes output artifacts, and returns a `WorkflowResult` with the +completed value, output path, step trail, and promoted metadata. +`DataWorkflow.merge()` deep-merges mapping values through the Tier 2 +`ExtendedDict` primitive, and `merge_file()` decodes structured file layers +through `DataFile` before merging. `DataWorkflow.transform()` applies the same +named Tier 2 transform catalog exposed by the CLI, including `reconstruct`, `unhump`, `deduplicate`, `compact`, and string case transforms. Workflow metadata is -preserved across `then()`, `run()`, `transform()`, `as_builtin()`, +preserved across `then()`, `run()`, `merge()`, `merge_file()`, `transform()`, `as_builtin()`, `as_extended()`, and `write()`, so file and API provenance from `DataFile` artifacts remains attached to the result. `WorkflowResult.as_extended()` returns a detached promoted view of the completed value, and result-level @@ -200,7 +204,7 @@ from extended_data import DataWorkflow env_data = DataWorkflow.from_file("config/dev.yaml").value result = ( DataWorkflow.from_file("config/base.yaml") - .then(("merge-env", lambda data: data.deep_merge(env_data))) + .merge(env_data, name="merge-env") .transform("reconstruct", "unhump") .write("build/config.yaml") ) @@ -219,9 +223,9 @@ assert "unhump" in list_data_transform_steps() ``` Missing workflow input files raise `FileNotFoundError`, and empty workflow -writes raise `ValueError` unless `allow_empty=True` is passed. Unknown transform -names and transforms that do not match the current data shape raise instead of -silently preserving stale workflow state. +writes raise `ValueError` unless `allow_empty=True` is passed. Missing merge +layers, unknown transform names, and operations that do not match the current +data shape raise instead of silently preserving stale workflow state. `InputProvider` loads input data from explicit mappings, environment variables, and stdin, then decodes or coerces values through the shared primitive and diff --git a/examples/core/composed_workflows.py b/examples/core/composed_workflows.py index ce3cfbb..c63925a 100644 --- a/examples/core/composed_workflows.py +++ b/examples/core/composed_workflows.py @@ -47,7 +47,7 @@ def demonstrate_layered_config_workflow() -> None: env_data = DataWorkflow.from_file("config/dev.yaml", tld=tld).value result = ( DataWorkflow.from_file("config/base.yaml", tld=tld) - .then(("merge-env", lambda data: data.deep_merge(env_data))) + .merge(env_data, name="merge-env") .write("build/config.yaml", tld=tld) ) result.to_export_safe() diff --git a/src/extended_data/cli.py b/src/extended_data/cli.py index 388ed9d..c763e4d 100644 --- a/src/extended_data/cli.py +++ b/src/extended_data/cli.py @@ -10,7 +10,7 @@ from extended_data.io import DataFile from extended_data.primitives.redaction import redact_sensitive_text -from extended_data.workflows import DataWorkflow, WorkflowAction, WorkflowResult, list_data_transform_steps +from extended_data.workflows import DataWorkflow, WorkflowResult, list_data_transform_steps CONNECTOR_COMMANDS = frozenset({"call", "info", "list", "mcp", "methods"}) @@ -78,21 +78,10 @@ def _merge_workflow(args: argparse.Namespace) -> DataWorkflow: workflow = DataWorkflow.from_file(file_paths[0], suffix=args.suffix) for file_path in file_paths[1:]: - artifact = DataFile.read(file_path, suffix=args.suffix) - merge_value = artifact.as_extended() - workflow = workflow.then((f"merge:{file_path}", _deep_merge_action(merge_value))) + workflow = workflow.merge_file(file_path, suffix=args.suffix) return workflow -def _deep_merge_action(value: Any) -> WorkflowAction: - """Return a typed workflow action that deep-merges one value.""" - - def merge(data: Any) -> Any: - return data.deep_merge(value) - - return merge - - def cmd_merge(args: argparse.Namespace) -> int: """Merge structured files through DataWorkflow and write or print the result.""" try: diff --git a/src/extended_data/workflows/__init__.py b/src/extended_data/workflows/__init__.py index 38711b5..18da550 100644 --- a/src/extended_data/workflows/__init__.py +++ b/src/extended_data/workflows/__init__.py @@ -262,6 +262,33 @@ def transform(self, *steps: str, as_extended: bool | None = None) -> DataWorkflo workflow = workflow.then((f"transform:{step}", data_transform_action(step)), as_extended=as_extended) return workflow + def merge( + self, + *mappings: Mapping[str, Any], + name: str = "merge", + as_extended: bool | None = None, + ) -> DataWorkflow: + """Deep-merge mappings into the current workflow value.""" + if not mappings: + raise ValueError("DataWorkflow.merge requires at least one mapping") + + return self.then((name, _deep_merge_action(*mappings)), as_extended=as_extended) + + def merge_file( + self, + file_path: FilePath, + *, + suffix: str | None = None, + charset: str = "utf-8", + errors: str = "strict", + tld: Path | None = None, + name: str | None = None, + as_extended: bool | None = None, + ) -> DataWorkflow: + """Read a structured file and deep-merge it into the workflow value.""" + artifact = DataFile.read(file_path, suffix=suffix, charset=charset, errors=errors, tld=tld) + return self.merge(artifact.as_extended(), name=name or f"merge:{file_path}", as_extended=as_extended) + def as_builtin(self) -> DataWorkflow: """Return the next workflow state with built-in Python containers.""" return DataWorkflow( @@ -315,6 +342,19 @@ def write( ) +def _deep_merge_action(*mappings: Mapping[str, Any]) -> WorkflowAction: + """Return a typed workflow action that deep-merges mapping values.""" + merge_values = tuple(extend_data(deepcopy(to_builtin(mapping))) for mapping in mappings) + + def merge(data: Any) -> Any: + method = getattr(data, "deep_merge", None) + if not callable(method): + raise TypeError(f"merge is not available for {type(data).__name__}") + return method(*merge_values) + + return merge + + def _coerce_step(step: StepLike, *, name: str | None = None) -> WorkflowStep: """Normalize supported step declarations to WorkflowStep.""" if isinstance(step, WorkflowStep): diff --git a/tests/core/test_workflows.py b/tests/core/test_workflows.py index 1e5572a..01d6db3 100644 --- a/tests/core/test_workflows.py +++ b/tests/core/test_workflows.py @@ -47,7 +47,7 @@ def test_data_workflow_layered_config_round_trip(tmp_path: Path) -> None: env_data = DataWorkflow.from_file("config/dev.yaml", tld=tmp_path).value result = ( DataWorkflow.from_file("config/base.yaml", tld=tmp_path) - .then(("merge-env", lambda data: data.deep_merge(env_data))) + .merge(env_data, name="merge-env") .write("build/config.yaml", tld=tmp_path) ) @@ -90,6 +90,50 @@ def select_services(data: ExtendedDict) -> ExtendedDict: } +def test_data_workflow_deep_merges_mapping_values() -> None: + """DataWorkflow should expose deep merge without ad hoc lambda steps.""" + workflow = DataWorkflow.from_value({"service": {"name": "api"}, "ports": [8080]}).merge( + {"service": {"debug": True}, "ports": [8081]}, + name="merge-env", + ) + result = workflow.result() + + assert workflow.steps == ("value", "merge-env") + assert isinstance(workflow.value, ExtendedDict) + assert result.as_builtin() == { + "service": {"name": "api", "debug": True}, + "ports": [8080, 8081], + } + + +def test_data_workflow_merge_file_reads_and_merges_layer(tmp_path: Path) -> None: + """File-backed merge should use the same decoded DataFile boundary as reads.""" + write_file("base.yaml", {"service": {"name": "api"}, "ports": [8080]}, tld=tmp_path) + write_file("env.yaml", {"service": {"debug": "true"}, "ports": [8081]}, tld=tmp_path) + + workflow = DataWorkflow.from_file("base.yaml", tld=tmp_path).merge_file("env.yaml", tld=tmp_path) + result = workflow.transform("reconstruct").result() + + assert workflow.steps == ("read:base.yaml", "merge:env.yaml") + assert result.steps == ("read:base.yaml", "merge:env.yaml", "transform:reconstruct") + assert result.as_builtin() == { + "service": {"name": "api", "debug": True}, + "ports": [8080, 8081], + } + + +def test_data_workflow_merge_requires_mapping_values() -> None: + """Merge calls should fail loudly when no layer is provided.""" + with pytest.raises(ValueError, match=r"DataWorkflow\.merge requires at least one mapping"): + DataWorkflow.from_value({"service": "api"}).merge() + + +def test_data_workflow_merge_reports_shape_mismatch() -> None: + """Deep merge should fail when the current workflow value is not mapping-shaped.""" + with pytest.raises(TypeError, match="merge is not available for ExtendedList"): + DataWorkflow.from_value(["api"]).merge({"service": "api"}) + + def test_data_workflow_applies_shared_named_transforms() -> None: """DataWorkflow exposes common Tier 2 transforms without ad hoc lambdas.""" raw_payload = { diff --git a/tests/examples/test_safe_examples.py b/tests/examples/test_safe_examples.py index 200fa38..5019ccb 100644 --- a/tests/examples/test_safe_examples.py +++ b/tests/examples/test_safe_examples.py @@ -157,9 +157,10 @@ def test_basic_core_example_uses_container_first_operations() -> None: def test_composed_workflow_example_uses_named_transforms() -> None: - """The workflow example should exercise the public named-transform API.""" + """The workflow example should exercise public merge and transform APIs.""" text = (REPO_ROOT / "examples/core/composed_workflows.py").read_text(encoding="utf-8") + assert ".merge(" in text assert ".transform(" in text assert "list_data_transform_steps" in text From e9fefa226553ac9a2186dff188eb8161c05f120d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:02:28 -0500 Subject: [PATCH 273/287] feat: bridge connector responses into workflows --- README.md | 4 ++- docs/package-surface.md | 4 ++- src/extended_data/connectors/base.py | 21 +++++++++++++++ tests/connectors/test_base.py | 27 +++++++++++++++++++ .../test_connector_payload_contracts.py | 2 ++ 5 files changed, 56 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 747a26c..23dd233 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,9 @@ decoding primitives instead of bypassing the boundary with transport-specific JSON helpers. Use `request_data_file()` when a connector workflow needs API response data and non-secret provenance such as source URL, HTTP status, content type, method, -and endpoint in one `DataFile` artifact. +and endpoint in one `DataFile` artifact. Use `request_workflow()` when that +decoded response should immediately enter a `DataWorkflow` for merging, +transforming, writing, and provenance-preserving result handling. Data-returning AI tool wrappers expose the same `ExtendedDict`/`ExtendedList` payload contract; framework factory functions still return framework tool objects. diff --git a/docs/package-surface.md b/docs/package-surface.md index 8565244..dad2527 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -345,7 +345,9 @@ through these shared data primitives and lower to built-in values only at model validation or redaction boundaries. Use `request_data_file()` when an API workflow needs the decoded data plus non-secret response provenance such as source URL, HTTP status, content type, -method, and endpoint in a `DataFile` artifact. +method, and endpoint in a `DataFile` artifact. Use `request_workflow()` when +that response should immediately become a `DataWorkflow` with the same promoted +metadata, named transforms, merge helpers, and export/write boundary. Connector methods that return external data payloads should call `extend_result()` at the return boundary, making SDK-shaped dictionaries, lists, decoded repository files, GraphQL results, and workflow-builder output diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index a191cd2..72125ed 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -65,6 +65,7 @@ def my_operation(self) -> ExtendedDict: from extended_data.containers import ExtendedDict, ExtendedList from extended_data.io import DataFile + from extended_data.workflows import DataWorkflow class RateLimitError(Exception): @@ -491,6 +492,26 @@ def request_data_file( metadata={"method": method.upper(), "endpoint": endpoint}, ) + def request_workflow( + self, + method: str, + endpoint: str, + *, + headers: dict[str, str] | None = None, + suffix: str | None = None, + as_extended: bool = True, + **kwargs: Any, + ) -> DataWorkflow: + """Make an HTTP request and return a workflow over the decoded response artifact.""" + return self.request_data_file( + method, + endpoint, + headers=headers, + suffix=suffix, + as_extended=as_extended, + **kwargs, + ).workflow(as_extended=as_extended) + def get(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP GET request.""" return self.request("GET", endpoint, **kwargs) diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index 72a6390..fdac35d 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -15,6 +15,7 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString from extended_data.io import DataFile from extended_data.logging import Logging +from extended_data.workflows import DataWorkflow class ExampleConnector(ConnectorBase): @@ -181,6 +182,32 @@ def test_request_data_file_adds_request_provenance() -> None: mock_client.request.assert_called_once() +def test_request_workflow_starts_from_response_artifact() -> None: + """request_workflow should hand API data directly to DataWorkflow with provenance.""" + connector = _connector() + mock_client = MagicMock() + mock_client.request.return_value = httpx.Response( + 200, + content=b'{"HTTPResponseCode":"200","SelectedServices":["api","api","worker"]}', + headers={"content-type": "application/json"}, + ) + connector._client = mock_client + + workflow = connector.request_workflow("GET", "/status") + result = workflow.transform("reconstruct", "unhump", "deduplicate").result() + + assert isinstance(workflow, DataWorkflow) + assert workflow.steps == ("data_file:https://api.example.com/status",) + assert workflow.metadata["method"] == "GET" + assert workflow.metadata["endpoint"] == "/status" + assert result.metadata["status_code"] == 200 + assert result.as_builtin() == { + "http_response_code": 200, + "selected_services": ["api", "worker"], + } + mock_client.request.assert_called_once() + + def test_extend_result_promotes_connector_payloads() -> None: """Connector data payloads cross into the Tier 2 container layer explicitly.""" connector = _connector() diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index 15aa6f6..a6ff19e 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -236,6 +236,7 @@ "request", "request_data", "request_data_file", + "request_workflow", "snapshot_inputs", } @@ -260,6 +261,7 @@ ConnectorBase.request, ConnectorBase.request_data, ConnectorBase.request_data_file, + ConnectorBase.request_workflow, InputProvider.freeze_inputs, InputProvider.get_input, InputProvider.merge_inputs, From 6eb81ad473bed9c79761a075d41880b119fa4dd6 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:07:11 -0500 Subject: [PATCH 274/287] feat: add connector workflow verb helpers --- README.md | 4 +- docs/package-surface.md | 5 +- src/extended_data/connectors/base.py | 55 +++++++++++++++++++ tests/connectors/test_base.py | 32 +++++++++++ .../test_connector_payload_contracts.py | 10 ++++ 5 files changed, 104 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 23dd233..7623b16 100644 --- a/README.md +++ b/README.md @@ -182,7 +182,9 @@ Use `request_data_file()` when a connector workflow needs API response data and non-secret provenance such as source URL, HTTP status, content type, method, and endpoint in one `DataFile` artifact. Use `request_workflow()` when that decoded response should immediately enter a `DataWorkflow` for merging, -transforming, writing, and provenance-preserving result handling. +transforming, writing, and provenance-preserving result handling; the +`get_workflow()`, `post_workflow()`, `put_workflow()`, `patch_workflow()`, and +`delete_workflow()` helpers provide verb-specific shortcuts. Data-returning AI tool wrappers expose the same `ExtendedDict`/`ExtendedList` payload contract; framework factory functions still return framework tool objects. diff --git a/docs/package-surface.md b/docs/package-surface.md index dad2527..215b121 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -347,7 +347,10 @@ validation or redaction boundaries. Use non-secret response provenance such as source URL, HTTP status, content type, method, and endpoint in a `DataFile` artifact. Use `request_workflow()` when that response should immediately become a `DataWorkflow` with the same promoted -metadata, named transforms, merge helpers, and export/write boundary. +metadata, named transforms, merge helpers, and export/write boundary. The +`get_workflow()`, `post_workflow()`, `put_workflow()`, `patch_workflow()`, and +`delete_workflow()` helpers mirror the decoded-data verb helpers for common API +workflows. Connector methods that return external data payloads should call `extend_result()` at the return boundary, making SDK-shaped dictionaries, lists, decoded repository files, GraphQL results, and workflow-builder output diff --git a/src/extended_data/connectors/base.py b/src/extended_data/connectors/base.py index 72125ed..12f6a37 100644 --- a/src/extended_data/connectors/base.py +++ b/src/extended_data/connectors/base.py @@ -520,6 +520,17 @@ def get_data(self, endpoint: str, *, suffix: str | None = None, as_extended: boo """HTTP GET request returning decoded response data.""" return self.request_data("GET", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def get_workflow( + self, + endpoint: str, + *, + suffix: str | None = None, + as_extended: bool = True, + **kwargs: Any, + ) -> DataWorkflow: + """HTTP GET request returning a workflow over decoded response data.""" + return self.request_workflow("GET", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def post(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP POST request.""" return self.request("POST", endpoint, **kwargs) @@ -528,6 +539,17 @@ def post_data(self, endpoint: str, *, suffix: str | None = None, as_extended: bo """HTTP POST request returning decoded response data.""" return self.request_data("POST", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def post_workflow( + self, + endpoint: str, + *, + suffix: str | None = None, + as_extended: bool = True, + **kwargs: Any, + ) -> DataWorkflow: + """HTTP POST request returning a workflow over decoded response data.""" + return self.request_workflow("POST", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def put(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP PUT request.""" return self.request("PUT", endpoint, **kwargs) @@ -536,6 +558,17 @@ def put_data(self, endpoint: str, *, suffix: str | None = None, as_extended: boo """HTTP PUT request returning decoded response data.""" return self.request_data("PUT", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def put_workflow( + self, + endpoint: str, + *, + suffix: str | None = None, + as_extended: bool = True, + **kwargs: Any, + ) -> DataWorkflow: + """HTTP PUT request returning a workflow over decoded response data.""" + return self.request_workflow("PUT", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def delete(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP DELETE request.""" return self.request("DELETE", endpoint, **kwargs) @@ -544,6 +577,17 @@ def delete_data(self, endpoint: str, *, suffix: str | None = None, as_extended: """HTTP DELETE request returning decoded response data.""" return self.request_data("DELETE", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def delete_workflow( + self, + endpoint: str, + *, + suffix: str | None = None, + as_extended: bool = True, + **kwargs: Any, + ) -> DataWorkflow: + """HTTP DELETE request returning a workflow over decoded response data.""" + return self.request_workflow("DELETE", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def patch(self, endpoint: str, **kwargs: Any) -> httpx.Response: """HTTP PATCH request.""" return self.request("PATCH", endpoint, **kwargs) @@ -552,6 +596,17 @@ def patch_data(self, endpoint: str, *, suffix: str | None = None, as_extended: b """HTTP PATCH request returning decoded response data.""" return self.request_data("PATCH", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + def patch_workflow( + self, + endpoint: str, + *, + suffix: str | None = None, + as_extended: bool = True, + **kwargs: Any, + ) -> DataWorkflow: + """HTTP PATCH request returning a workflow over decoded response data.""" + return self.request_workflow("PATCH", endpoint, suffix=suffix, as_extended=as_extended, **kwargs) + # ------------------------------------------------------------------------- # File Downloads # ------------------------------------------------------------------------- diff --git a/tests/connectors/test_base.py b/tests/connectors/test_base.py index fdac35d..7f70cda 100644 --- a/tests/connectors/test_base.py +++ b/tests/connectors/test_base.py @@ -208,6 +208,38 @@ def test_request_workflow_starts_from_response_artifact() -> None: mock_client.request.assert_called_once() +@pytest.mark.parametrize( + ("helper_name", "expected_method"), + [ + ("get_workflow", "GET"), + ("post_workflow", "POST"), + ("put_workflow", "PUT"), + ("patch_workflow", "PATCH"), + ("delete_workflow", "DELETE"), + ], +) +def test_http_verb_workflow_helpers_start_response_workflows(helper_name: str, expected_method: str) -> None: + """Verb-specific workflow helpers should mirror decoded data helpers.""" + connector = _connector() + mock_client = MagicMock() + mock_client.request.return_value = httpx.Response( + 200, + content=b'{"ok":true}', + headers={"content-type": "application/json"}, + ) + connector._client = mock_client + + workflow = getattr(connector, helper_name)("/status") + + assert isinstance(workflow, DataWorkflow) + assert workflow.metadata["method"] == expected_method + assert workflow.metadata["endpoint"] == "/status" + assert workflow.result().as_builtin() == {"ok": True} + mock_client.request.assert_called_once() + assert mock_client.request.call_args.args[0] == expected_method + assert mock_client.request.call_args.args[1] == "https://api.example.com/status" + + def test_extend_result_promotes_connector_payloads() -> None: """Connector data payloads cross into the Tier 2 container layer explicitly.""" connector = _connector() diff --git a/tests/connectors/test_connector_payload_contracts.py b/tests/connectors/test_connector_payload_contracts.py index a6ff19e..cc9c430 100644 --- a/tests/connectors/test_connector_payload_contracts.py +++ b/tests/connectors/test_connector_payload_contracts.py @@ -216,6 +216,7 @@ "decode_response_file", "delete", "delete_data", + "delete_workflow", "download", "extend_result", "freeze_inputs", @@ -224,14 +225,18 @@ "get_data", "get_input", "get_tools", + "get_workflow", "handle_ai_tool_call", "merge_inputs", "patch", "patch_data", + "patch_workflow", "post", "post_data", + "post_workflow", "put", "put_data", + "put_workflow", "replace_inputs", "request", "request_data", @@ -245,19 +250,24 @@ ConnectorBase.decode_response_file, ConnectorBase.delete, ConnectorBase.delete_data, + ConnectorBase.delete_workflow, ConnectorBase.download, ConnectorBase.extend_result, ConnectorBase.get, ConnectorBase.get_ai_tool_definitions, ConnectorBase.get_data, ConnectorBase.get_tools, + ConnectorBase.get_workflow, ConnectorBase.handle_ai_tool_call, ConnectorBase.patch, ConnectorBase.patch_data, + ConnectorBase.patch_workflow, ConnectorBase.post, ConnectorBase.post_data, + ConnectorBase.post_workflow, ConnectorBase.put, ConnectorBase.put_data, + ConnectorBase.put_workflow, ConnectorBase.request, ConnectorBase.request_data, ConnectorBase.request_data_file, From a59ce2c4e9c64b33dbffd7171f24a1ee143358ec Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:13:39 -0500 Subject: [PATCH 275/287] chore: remove unpatched torch dependency path --- README.md | 3 + docs/package-surface.md | 5 +- pyproject.toml | 2 - src/extended_data/connectors/_optional.py | 5 +- src/extended_data/connectors/meshy/README.md | 5 + .../meshy/persistence/vector_store.py | 4 +- .../connectors/test_optional_dependencies.py | 20 + tests/core/test_release_hygiene.py | 1 + uv.lock | 909 ------------------ 9 files changed, 40 insertions(+), 914 deletions(-) diff --git a/README.md b/README.md index 7623b16..60460e1 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,9 @@ Published runtime extras are `anthropic`, `aws`, `cursor`, `github`, `google`, CrewAI adapters remain available when `crewai` is installed independently, but `extended-data` intentionally does not publish a CrewAI extra while current CrewAI releases pull vulnerable `chromadb` versions transitively. +The `vector` extra installs `sqlite-vec` for local vector search; embedding +model packages such as `sentence-transformers` are user-managed while current +releases pull vulnerable `torch` versions transitively. ## Usage diff --git a/docs/package-surface.md b/docs/package-surface.md index 215b121..6be69ea 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -484,7 +484,7 @@ Published runtime extras: | `extended-data[slack]` | Slack connector operations | | `extended-data[strands]` | Strands tool adapters | | `extended-data[vault]` | Vault connector operations | -| `extended-data[vector]` | Local vector search for generated asset metadata | +| `extended-data[vector]` | SQLite vector search for generated asset metadata | | `extended-data[webhooks]` | Webhook listener support | | `extended-data[zoom]` | Zoom connector operations | | `extended-data[ai]` | Aggregate LangChain, MCP, and Strands install target | @@ -492,6 +492,9 @@ Published runtime extras: CrewAI tool adapters are still importable when users install `crewai` directly, but `extended-data` does not expose a CrewAI extra while current CrewAI dependency trees pull vulnerable `chromadb` releases. +The `vector` extra installs `sqlite-vec` for local vector search; embedding +model packages such as `sentence-transformers` remain user-managed while +current releases pull vulnerable `torch` versions. All built-in CrewAI tool adapters use `extended_data.connectors._optional.get_crewai_tool_decorator()` so missing or incompatible CrewAI installs fail with the same user-managed install guidance. diff --git a/pyproject.toml b/pyproject.toml index dd16739..80b5653 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,7 +103,6 @@ webhooks = [ "uvicorn>=0.45.0", ] vector = [ - "sentence-transformers>=5.4.1", "sqlite-vec>=0.1.9", ] tests = [ @@ -144,7 +143,6 @@ all = [ "python-graphql-client>=0.4.3", "pyyaml>=6.0.3", "rich>=13.7.0,<15.0.0", - "sentence-transformers>=5.4.1", "slack-sdk>=3.41.0", "sqlite-vec>=0.1.9", "strands-agents>=1.36.0", diff --git a/src/extended_data/connectors/_optional.py b/src/extended_data/connectors/_optional.py index 97fe29c..b24ad31 100644 --- a/src/extended_data/connectors/_optional.py +++ b/src/extended_data/connectors/_optional.py @@ -47,7 +47,6 @@ "fastapi": "webhooks", "uvicorn": "webhooks", "sqlite_vec": "vector", - "sentence_transformers": "vector", } # Cache for import checks @@ -58,6 +57,10 @@ "Install CrewAI separately after reviewing its dependency tree; extended-data does not publish a " "CrewAI extra while current CrewAI releases pull vulnerable chromadb versions." ), + "sentence_transformers": ( + "Install sentence-transformers separately after reviewing its dependency tree; extended-data does not " + "include it in the vector extra while current releases pull vulnerable torch versions." + ), } CREWAI_TOOLS_IMPORT_ERROR = f"crewai is required for CrewAI tools.\n{PACKAGE_INSTALL_HINTS['crewai']}" diff --git a/src/extended_data/connectors/meshy/README.md b/src/extended_data/connectors/meshy/README.md index e18bf38..99f3b57 100644 --- a/src/extended_data/connectors/meshy/README.md +++ b/src/extended_data/connectors/meshy/README.md @@ -18,6 +18,11 @@ asset metadata: pip install "extended-data[meshy,vector]" ``` +The `vector` extra installs `sqlite-vec` for local similarity search. +Embedding generation through `get_embedding()` uses `sentence-transformers` +only when users install it independently after reviewing its `torch` dependency +tree. + ## Functional API ```python diff --git a/src/extended_data/connectors/meshy/persistence/vector_store.py b/src/extended_data/connectors/meshy/persistence/vector_store.py index 471df14..b43e7c8 100644 --- a/src/extended_data/connectors/meshy/persistence/vector_store.py +++ b/src/extended_data/connectors/meshy/persistence/vector_store.py @@ -31,7 +31,9 @@ The vector extra includes: - sqlite-vec (vector similarity extension) - - Optional: sentence-transformers for embeddings + + get_embedding() uses sentence-transformers only when users install it + independently after reviewing its torch dependency tree. """ from __future__ import annotations diff --git a/tests/connectors/test_optional_dependencies.py b/tests/connectors/test_optional_dependencies.py index 056d8b0..87d365a 100644 --- a/tests/connectors/test_optional_dependencies.py +++ b/tests/connectors/test_optional_dependencies.py @@ -105,6 +105,26 @@ def fake_import_module(name: str) -> object: assert "extended-data[crewai]" not in message +def test_sentence_transformers_explains_user_managed_install(monkeypatch) -> None: + """Missing sentence-transformers reports the deliberate no-extra install policy.""" + + def fake_import_module(name: str) -> object: + if name == "sentence_transformers": + raise ImportError("No module named 'sentence_transformers'") + pytest.fail(f"unexpected import: {name}") + + monkeypatch.setattr(_optional.importlib, "import_module", fake_import_module) + + with pytest.raises(ImportError) as exc_info: + _optional.require_extra("sentence_transformers") + + message = str(exc_info.value) + assert "sentence-transformers separately" in message + assert "torch" in message + assert "extended-data[vector]" not in message + assert _optional.get_extra_for_package("sentence_transformers") is None + + def test_get_crewai_tool_decorator_returns_tool_decorator(monkeypatch) -> None: """Installed CrewAI tool support is returned directly.""" sentinel = object() diff --git a/tests/core/test_release_hygiene.py b/tests/core/test_release_hygiene.py index 2f40d24..1173259 100644 --- a/tests/core/test_release_hygiene.py +++ b/tests/core/test_release_hygiene.py @@ -59,6 +59,7 @@ PACKAGE_SHAPE_RE = re.compile(r"^ ([a-z_]+)/\s+") UNPATCHED_RUNTIME_VULNERABILITIES = { "chromadb": "GHSA-f4j7-r4q5-qw2c", + "torch": "GHSA-rrmf-rvhw-rf47", } diff --git a/uv.lock b/uv.lock index 67bd16b..e7f536c 100644 --- a/uv.lock +++ b/uv.lock @@ -709,76 +709,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/8c/ce3823c06c2804f194f9e64f0d67fa3f4094a39f2bb1a990cd03603af8fc/cryptography-48.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6184ca7b174f28d7c703f1290d4b297217c45355f77a98f67e9b7f14549ac54a", size = 3742204, upload-time = "2026-06-09T22:31:34.773Z" }, ] -[[package]] -name = "cuda-bindings" -version = "13.3.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cuda-pathfinder" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/21/8464d133752951c154feafb3b65c297e7d80f301183d220bec4c830f1441/cuda_bindings-13.3.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:120fcc53d57903df529c3486962c56528cba5b7d6c57c99537320ed9922c8b86", size = 6073403, upload-time = "2026-05-29T23:11:36.22Z" }, - { url = "https://files.pythonhosted.org/packages/a8/1f/5ef51f5fbaa5d4d3201bb3d7555af028ec1aa4416275ccbf73c9e34e3d2d/cuda_bindings-13.3.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9851b0caa8bfd3bc6fa054eaf57bea7c8e9c3a62db2d2621224677f49f3c53d0", size = 6675244, upload-time = "2026-05-29T23:11:38.664Z" }, - { url = "https://files.pythonhosted.org/packages/51/6b/457ca12dad3ee9bfcc9a545cfd6b64b359ba49de40f776f6e028e678f262/cuda_bindings-13.3.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c5879712accf6e14bb01aa5e67440eb84998b8d104b509cc7a6dc0b8f656a474", size = 6053539, upload-time = "2026-05-29T23:11:43.19Z" }, - { url = "https://files.pythonhosted.org/packages/95/7a/c5e3c34a409b148f5c0f5a4ea374158f95d488862c1dffedf9aa5c639df9/cuda_bindings-13.3.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04436a9364059c84b8f9636f359eccda1cf814341f5b670c71d80d2f79dbc708", size = 6674166, upload-time = "2026-05-29T23:11:45.478Z" }, - { url = "https://files.pythonhosted.org/packages/ce/67/5e7dba1ba576dd73da5dee894ca076ca5e959450dfff66d6d510a255d1f7/cuda_bindings-13.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7855c4868aabc0cfae28abbe83d56734bdfbd08f08fc234ac1912a12858bf49", size = 6025351, upload-time = "2026-05-29T23:11:49.685Z" }, - { url = "https://files.pythonhosted.org/packages/39/2a/6d2e9047d1fb243dbaa364b01e0297534b9ed7fd27dba1c9f361519cf69b/cuda_bindings-13.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e32d08f71ebcdf00f0f41eab2eb37e8da94c8ed411cc9f7f7a019ce6b34abe3a", size = 6657965, upload-time = "2026-05-29T23:11:52.227Z" }, - { url = "https://files.pythonhosted.org/packages/cc/6e/2394f8163360f8391f8f1b7e72d300a82724edb81a7b7084c799fbd4c91f/cuda_bindings-13.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9efb21c1ee64981e184b9e0ba5eb3179e5ba3d4b51665a6cb52b8ef3d01a7cbf", size = 5920504, upload-time = "2026-05-29T23:11:56.883Z" }, - { url = "https://files.pythonhosted.org/packages/34/c2/ef9b6a63f7dc432712a462c816662e662e00d38caa9b861c8c2588195d03/cuda_bindings-13.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2732904099e0a4d4db774a5fc6d91ee95fae065b4d2ecabb4968c5fe2406c9d7", size = 6476660, upload-time = "2026-05-29T23:11:59.188Z" }, - { url = "https://files.pythonhosted.org/packages/b1/81/bff68ce829999c1e4209c761bbf903b1c06ec570416ddb25020864ad5907/cuda_bindings-13.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ab2f74ed65bfef4163ba07a8db16f1085e0729291db12a2423aff84ee8278b8", size = 6013639, upload-time = "2026-05-29T23:12:03.509Z" }, - { url = "https://files.pythonhosted.org/packages/d4/e0/c8a1f0c8f9ffdea4f5fe6dbab89b326cef4d85caf489dad39e209da89416/cuda_bindings-13.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:efd4c814d311ec08c981f6dded1dbe7d4b371067ee4f6c14cccec4bde9590f80", size = 6534419, upload-time = "2026-05-29T23:12:05.633Z" }, - { url = "https://files.pythonhosted.org/packages/52/b8/83b1f563925b290f2d11a01a77a84013ba56052fe3653a5bef3ccfbb43d6/cuda_bindings-13.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3c772dfff49681541d59630c90f858e173ac926b9c593a2b7123f2a1043cc76", size = 5809771, upload-time = "2026-05-29T23:12:10.422Z" }, - { url = "https://files.pythonhosted.org/packages/12/20/e79b4bfe98f075195afb6343d41c498f9dbd2d161d7021d4d28bceb83581/cuda_bindings-13.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:36febb7c1079d68a981dbbd8d5a67235b399802b82075c9388624719607e52b9", size = 6358584, upload-time = "2026-05-29T23:12:12.767Z" }, -] - -[[package]] -name = "cuda-pathfinder" -version = "1.5.5" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/11/c8/26f2e4aae92f11522a96043892ba39a90eac610d5242523aa863212bc1c7/cuda_pathfinder-1.5.5-py3-none-any.whl", hash = "sha256:0228c023f95d1480f143ef5c8922d27a2ab052087a942e81dc289c9eb8f91689", size = 51671, upload-time = "2026-05-27T01:21:25.413Z" }, -] - -[[package]] -name = "cuda-toolkit" -version = "13.0.2" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/57/b2/453099f5f3b698d7d0eab38916aac44c7f76229f451709e2eb9db6615dcd/cuda_toolkit-13.0.2-py2.py3-none-any.whl", hash = "sha256:b198824cf2f54003f50d64ada3a0f184b42ca0846c1c94192fa269ecd97a66eb", size = 2364, upload-time = "2025-12-19T23:24:07.328Z" }, -] - -[package.optional-dependencies] -cudart = [ - { name = "nvidia-cuda-runtime", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] -cufft = [ - { name = "nvidia-cufft", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] -cufile = [ - { name = "nvidia-cufile", marker = "sys_platform == 'linux'" }, -] -cupti = [ - { name = "nvidia-cuda-cupti", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] -curand = [ - { name = "nvidia-curand", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] -cusolver = [ - { name = "nvidia-cusolver", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] -cusparse = [ - { name = "nvidia-cusparse", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] -nvjitlink = [ - { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] -nvrtc = [ - { name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] -nvtx = [ - { name = "nvidia-nvtx", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, -] - [[package]] name = "deepmerge" version = "2.0" @@ -885,7 +815,6 @@ all = [ { name = "python-graphql-client" }, { name = "pyyaml" }, { name = "rich" }, - { name = "sentence-transformers" }, { name = "slack-sdk" }, { name = "sqlite-vec" }, { name = "strands-agents" }, @@ -965,7 +894,6 @@ vault = [ { name = "hvac" }, ] vector = [ - { name = "sentence-transformers" }, { name = "sqlite-vec" }, ] webhooks = [ @@ -1037,8 +965,6 @@ requires-dist = [ { name = "rich", marker = "extra == 'meshy'", specifier = ">=13.7.0,<15.0.0" }, { name = "ruamel-yaml", specifier = ">=0.18.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" }, - { name = "sentence-transformers", marker = "extra == 'all'", specifier = ">=5.4.1" }, - { name = "sentence-transformers", marker = "extra == 'vector'", specifier = ">=5.4.1" }, { name = "slack-sdk", marker = "extra == 'all'", specifier = ">=3.41.0" }, { name = "slack-sdk", marker = "extra == 'slack'", specifier = ">=3.41.0" }, { name = "sortedcontainers", specifier = ">=2.4.0" }, @@ -1208,15 +1134,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" }, ] -[[package]] -name = "fsspec" -version = "2026.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d5/8d/1c51c094345df128ca4a990d633fe1a0ff28726c9e6b3c41ba65087bba1d/fsspec-2026.4.0.tar.gz", hash = "sha256:301d8ac70ae90ef3ad05dcf94d6c3754a097f9b5fe4667d2787aa359ec7df7e4", size = 312760, upload-time = "2026-04-29T20:42:38.635Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/0c/043d5e551459da400957a1395e0febbf771446ff34291afcbe3d8be2a279/fsspec-2026.4.0-py3-none-any.whl", hash = "sha256:11ef7bb35dab8a394fde6e608221d5cf3e8499401c249bebaeaad760a1a8dec2", size = 203402, upload-time = "2026-04-29T20:42:36.842Z" }, -] - [[package]] name = "gitdb" version = "4.0.12" @@ -1454,38 +1371,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] -[[package]] -name = "hf-xet" -version = "1.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4b/2d/57fd21d84d93efb4bd0b962383790e19dd1bc053501b4264c97903b4e83e/hf_xet-1.5.1.tar.gz", hash = "sha256:51ef4500dab3764b41135ee1381a4b62ce56fc54d4c92b719b59e597d6df5bf6", size = 876636, upload-time = "2026-06-08T23:02:53.897Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/ee/dd9ba7beae1005e54131b7d45263cc74c8a066d47d354e6d58ae9445a388/hf_xet-1.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:dbf48c0d02cf0b2e568944330c60d9120c272dabe013bd892d48e25bc6797577", size = 4069485, upload-time = "2026-06-08T23:02:13.193Z" }, - { url = "https://files.pythonhosted.org/packages/b6/bc/9cae6cfeb4e03070874e73e5c97c66eb90369d3206b6a2b1ef5f96520888/hf_xet-1.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78e4e5192ad2b674c2e1160b651cb9134db974f8ae1835bdfbfb0166b894a43", size = 3838493, upload-time = "2026-06-08T23:02:15.282Z" }, - { url = "https://files.pythonhosted.org/packages/ba/b4/d5c01e0eb6d9f2ca2dacd84d0d1b71e6cfbb2ef3208c968528e010e9b3d7/hf_xet-1.5.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6f7a04a8ad962422e225bc49fbbac99dc1806764b1f3e54dbd154bffa7593947", size = 4505658, upload-time = "2026-06-08T23:02:17.196Z" }, - { url = "https://files.pythonhosted.org/packages/76/c5/29a7598c0c6383c523dc22186d577f4e04267a626cd95ae60f67c00bfe66/hf_xet-1.5.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d48199c2bf4f8df0adc55d31d1368b6ec0e4d4f45bc86b08038089c23db0bed8", size = 4292822, upload-time = "2026-06-08T23:02:18.608Z" }, - { url = "https://files.pythonhosted.org/packages/04/9a/dceaf6ca69390126b86ea825fb354b93d01163199070b7bd849225de9468/hf_xet-1.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:97f212a88d14bbf573619a74b7fecb238de77d08fc702e54dec6f78276ca3283", size = 4491255, upload-time = "2026-06-08T23:02:20.124Z" }, - { url = "https://files.pythonhosted.org/packages/48/a7/e5a7afaacf6c1791fdbeeac42951fb81c3d2bc482992b115dedcc86d963e/hf_xet-1.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f61e3665892a6c8c5e765395838b8ddf36185da835253d4bc4509a81e49fb342", size = 4711062, upload-time = "2026-06-08T23:02:21.863Z" }, - { url = "https://files.pythonhosted.org/packages/53/49/2802f8433c9742ce281bddc1e65c02c32268ca3098d66828b05e12e45ee2/hf_xet-1.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f4ad3ebd4c32dd2b27099d69dc7b2df821e30767e46fb6ee6a0713778243b8ff", size = 4017205, upload-time = "2026-06-08T23:02:23.495Z" }, - { url = "https://files.pythonhosted.org/packages/9e/5a/50c71195b9fb883659f596e7252faf4c18c58e753a9013bdbf9bac5d2250/hf_xet-1.5.1-cp313-cp313t-win_arm64.whl", hash = "sha256:8298485c1e36e7e67cbd01eeb1376619b7af43d4f1ec245caae306f890a8a32d", size = 3845426, upload-time = "2026-06-08T23:02:25.124Z" }, - { url = "https://files.pythonhosted.org/packages/05/24/5e0c28f80371c17d49fed004597d9d132cb75c1f6f53db2cb95f459d2312/hf_xet-1.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:3474760d10e3bb6f92ff3f024fcb00c0b3e4001e9b035c7483e49a5dd17aa70f", size = 4069676, upload-time = "2026-06-08T23:02:26.759Z" }, - { url = "https://files.pythonhosted.org/packages/d2/17/261ba565b6a4d960fb478f61fdf919c0be5824645aaf1c319eca660c1611/hf_xet-1.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6762d89b9e3267dfd502b29b2a327b4525f33b17e7b509a78d94e2151a30ce30", size = 3838509, upload-time = "2026-06-08T23:02:28.573Z" }, - { url = "https://files.pythonhosted.org/packages/4e/44/7ffdc2e184b0d41fc0f683ba3936ef669ab63cf242cf36ef50e57d683668/hf_xet-1.5.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bf67e6ed10260cef62e852789dc91ebb03f382d5bdc4b1dbeb64763ea275e7d6", size = 4505881, upload-time = "2026-06-08T23:02:30.257Z" }, - { url = "https://files.pythonhosted.org/packages/63/b6/788060d5aa4d5e671f1a31bf69624c314eb2d8babab3aa562f9e5d53444e/hf_xet-1.5.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c6b6cd08ca095058780b50b8ce4d6cbf6787bcf27841705d58a9d32246e3e47a", size = 4292995, upload-time = "2026-06-08T23:02:31.993Z" }, - { url = "https://files.pythonhosted.org/packages/22/93/c5540cbd6b55529b7dc42f6734e88cebee21aefbea34128b66229df56c57/hf_xet-1.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e1af0de8ca6f190d4294a28b88023db64a1e2d1d719cab044baf75bec569e7a9", size = 4491570, upload-time = "2026-06-08T23:02:33.86Z" }, - { url = "https://files.pythonhosted.org/packages/03/f3/9d8ceab30f44f36c1679b1b8683054c71a0dadc787dbf07421891742d3ca/hf_xet-1.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4f561cbbb92f80960772059864b7fb07eae879adde1b2e781ec6f86f6ac26c59", size = 4711565, upload-time = "2026-06-08T23:02:35.454Z" }, - { url = "https://files.pythonhosted.org/packages/cd/54/27ed9a5e2cc583b4df82f75a03a4df8dbf55f5a9fa1f47f1fadfb20dbeac/hf_xet-1.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:e7dbb40617410f432182d918e37c12303fe6700fd6aa6c5964e30a535a4461d6", size = 4017343, upload-time = "2026-06-08T23:02:37.14Z" }, - { url = "https://files.pythonhosted.org/packages/ae/12/ecb2fc8d45e767580e3a37faa97cb895608b614965567efb4f18cff67e27/hf_xet-1.5.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6071d5ccb4d8d2cbd5fea5cc798da4f0ba3f44e25369591c4e89a4987050e61d", size = 3845716, upload-time = "2026-06-08T23:02:39.073Z" }, - { url = "https://files.pythonhosted.org/packages/7a/d8/5e54cf37434759d1f4f2ba9b66077ff9d4c4e1f37b6bd7975da5c40d94ab/hf_xet-1.5.1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6abd35c3221eff63836618ddfb954dcf84798603f71d8e33e3ed7b04acfdbe6e", size = 4077794, upload-time = "2026-06-08T23:02:40.656Z" }, - { url = "https://files.pythonhosted.org/packages/35/94/4b2ecfbad8f8b04701a23aefb62f540b9137d058b7e1dbef16a32676f0e9/hf_xet-1.5.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:94e761bbd266bf4c03cee73753916062665ce8365aa40ed321f45afcb934b41e", size = 3845354, upload-time = "2026-06-08T23:02:42.702Z" }, - { url = "https://files.pythonhosted.org/packages/de/cc/f99f4bc7295023d7bd9ebbfd51f75cc530ca262c1227666268b8208f4b77/hf_xet-1.5.1-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:892e3a3a3aecc12aded8b93cf4f9cd059282c7de0732f7d55026f3abdf474350", size = 4514864, upload-time = "2026-06-08T23:02:44.497Z" }, - { url = "https://files.pythonhosted.org/packages/cd/6e/21f7e5a2381278bd3b7b7a5a4d90038518bb6308a0c1daf5d9f8268bb178/hf_xet-1.5.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:a93df2039190502835b1db8cd7e178b0b7b889fe9ab51299d5ced26e0dd879a4", size = 4303784, upload-time = "2026-06-08T23:02:46.203Z" }, - { url = "https://files.pythonhosted.org/packages/35/0e/f992bb6927ac1cb30ef74e62268f551f338bc32b2191f7c96a44c6f7283e/hf_xet-1.5.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0c97106032ef70467b4f6bc2d0ccc266d7613ee076afc56516c502f87ce1c4a6", size = 4500703, upload-time = "2026-06-08T23:02:47.628Z" }, - { url = "https://files.pythonhosted.org/packages/fb/d1/90a498d05447980b977b1669246eeeeae4cfb0ea3e7a286eaba627f91bf9/hf_xet-1.5.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6208adb15d192b90e4c2ad2a27ed864359b2cb0f2494eb6d7c7f3699ac02e2bf", size = 4719498, upload-time = "2026-06-08T23:02:49.268Z" }, - { url = "https://files.pythonhosted.org/packages/6d/b6/20f99cfe97cc663a711f7b33cc21d4793e51968e9a26125b4afcd77315ba/hf_xet-1.5.1-cp37-abi3-win_amd64.whl", hash = "sha256:f7b3002f95d1c13e24bcb4537baa8f0eb3838957067c91bb4959bc004a6435f5", size = 4026419, upload-time = "2026-06-08T23:02:50.829Z" }, - { url = "https://files.pythonhosted.org/packages/f9/fa/77453694888f03e5a8c8852d1514a0894d8e81c622d39edbaf308ea0dcf4/hf_xet-1.5.1-cp37-abi3-win_arm64.whl", hash = "sha256:93d090b57b211133f6c0dab0205ef5cb6d89162979ba75a74845045cc3063b8e", size = 3855178, upload-time = "2026-06-08T23:02:52.452Z" }, -] - [[package]] name = "httpcore" version = "1.0.9" @@ -1535,26 +1420,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, ] -[[package]] -name = "huggingface-hub" -version = "1.16.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "filelock" }, - { name = "fsspec" }, - { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, - { name = "httpx" }, - { name = "packaging" }, - { name = "pyyaml" }, - { name = "tqdm" }, - { name = "typer" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/48/0f/ed994dbade67a54407c28cab96ef845e0e6d25500be56aca6394f8bfc9dd/huggingface_hub-1.16.1.tar.gz", hash = "sha256:7f1dc4c5ec21aed69be630ad0c3378616be16f3de1a47b141c0e812965d9c832", size = 792534, upload-time = "2026-05-21T18:40:00.908Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/79/621a7dbb80c70974f73a597275351ebe03ce5bc65cb5f8f4acb5859252bc/huggingface_hub-1.16.1-py3-none-any.whl", hash = "sha256:64340de934b9ce37857ef85a82de72f5629e8a270f9119eabb12bf495eb53c22", size = 668176, upload-time = "2026-05-21T18:39:58.596Z" }, -] - [[package]] name = "hvac" version = "2.4.0" @@ -1619,18 +1484,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] -[[package]] -name = "jinja2" -version = "3.1.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, -] - [[package]] name = "jiter" version = "0.13.0" @@ -1737,15 +1590,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, ] -[[package]] -name = "joblib" -version = "1.5.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, -] - [[package]] name = "jsonpatch" version = "1.33" @@ -1954,91 +1798,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687, upload-time = "2026-05-07T12:08:27.182Z" }, ] -[[package]] -name = "markupsafe" -version = "3.0.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e8/4b/3541d44f3937ba468b75da9eebcae497dcf67adb65caa16760b0a6807ebb/markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559", size = 11631, upload-time = "2025-09-27T18:36:05.558Z" }, - { url = "https://files.pythonhosted.org/packages/98/1b/fbd8eed11021cabd9226c37342fa6ca4e8a98d8188a8d9b66740494960e4/markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419", size = 12057, upload-time = "2025-09-27T18:36:07.165Z" }, - { url = "https://files.pythonhosted.org/packages/40/01/e560d658dc0bb8ab762670ece35281dec7b6c1b33f5fbc09ebb57a185519/markupsafe-3.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695", size = 22050, upload-time = "2025-09-27T18:36:08.005Z" }, - { url = "https://files.pythonhosted.org/packages/af/cd/ce6e848bbf2c32314c9b237839119c5a564a59725b53157c856e90937b7a/markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591", size = 20681, upload-time = "2025-09-27T18:36:08.881Z" }, - { url = "https://files.pythonhosted.org/packages/c9/2a/b5c12c809f1c3045c4d580b035a743d12fcde53cf685dbc44660826308da/markupsafe-3.0.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c", size = 20705, upload-time = "2025-09-27T18:36:10.131Z" }, - { url = "https://files.pythonhosted.org/packages/cf/e3/9427a68c82728d0a88c50f890d0fc072a1484de2f3ac1ad0bfc1a7214fd5/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f", size = 21524, upload-time = "2025-09-27T18:36:11.324Z" }, - { url = "https://files.pythonhosted.org/packages/bc/36/23578f29e9e582a4d0278e009b38081dbe363c5e7165113fad546918a232/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6", size = 20282, upload-time = "2025-09-27T18:36:12.573Z" }, - { url = "https://files.pythonhosted.org/packages/56/21/dca11354e756ebd03e036bd8ad58d6d7168c80ce1fe5e75218e4945cbab7/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1", size = 20745, upload-time = "2025-09-27T18:36:13.504Z" }, - { url = "https://files.pythonhosted.org/packages/87/99/faba9369a7ad6e4d10b6a5fbf71fa2a188fe4a593b15f0963b73859a1bbd/markupsafe-3.0.3-cp310-cp310-win32.whl", hash = "sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa", size = 14571, upload-time = "2025-09-27T18:36:14.779Z" }, - { url = "https://files.pythonhosted.org/packages/d6/25/55dc3ab959917602c96985cb1253efaa4ff42f71194bddeb61eb7278b8be/markupsafe-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8", size = 15056, upload-time = "2025-09-27T18:36:16.125Z" }, - { url = "https://files.pythonhosted.org/packages/d0/9e/0a02226640c255d1da0b8d12e24ac2aa6734da68bff14c05dd53b94a0fc3/markupsafe-3.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1", size = 13932, upload-time = "2025-09-27T18:36:17.311Z" }, - { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" }, - { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" }, - { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" }, - { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940, upload-time = "2025-09-27T18:36:22.249Z" }, - { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887, upload-time = "2025-09-27T18:36:23.535Z" }, - { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692, upload-time = "2025-09-27T18:36:24.823Z" }, - { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471, upload-time = "2025-09-27T18:36:25.95Z" }, - { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923, upload-time = "2025-09-27T18:36:27.109Z" }, - { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572, upload-time = "2025-09-27T18:36:28.045Z" }, - { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077, upload-time = "2025-09-27T18:36:29.025Z" }, - { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876, upload-time = "2025-09-27T18:36:29.954Z" }, - { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, - { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, - { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, - { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, - { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, - { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, - { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, - { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, - { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, - { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, - { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, - { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, - { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, - { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, - { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, - { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, - { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, - { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, - { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, - { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, - { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, - { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, - { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, - { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, - { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, - { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, - { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, - { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, - { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, - { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, - { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, - { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, - { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, - { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, - { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, - { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, - { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, - { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, - { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, - { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, - { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, - { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, - { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, - { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, - { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, - { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, - { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, - { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, - { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, - { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, - { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, - { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, - { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, - { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, - { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, -] - [[package]] name = "mcp" version = "1.26.0" @@ -2082,15 +1841,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e8/3d/1087453384dbde46a8c7f9356eead2c58be8a7bf156bca40243377c85715/more_itertools-11.1.0-py3-none-any.whl", hash = "sha256:4b65538ae22f6fed0ce4874efd317463a7489796a0939fa66824dd542125a192", size = 72226, upload-time = "2026-05-22T14:14:28.824Z" }, ] -[[package]] -name = "mpmath" -version = "1.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, -] - [[package]] name = "multidict" version = "6.7.1" @@ -2297,42 +2047,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] -[[package]] -name = "narwhals" -version = "2.22.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/62/3c/c4ef2164a71c1a63d7f1ae411c4082c5fa872405106db60a4b7114989ad7/narwhals-2.22.1.tar.gz", hash = "sha256:d62920805a0a43b7ff8b54b0c0d3142d796f8a9301836ada37e573d6a33cbcd9", size = 647493, upload-time = "2026-06-05T12:34:34.051Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/ca/36339329c4604adbcc99c899b7eb1ce1a555c499b6a6860757dc9bfed36d/narwhals-2.22.1-py3-none-any.whl", hash = "sha256:60567d774edf77db53906f89d9fbd164e66e56d66d388e1e6990f17ac33cfb53", size = 454815, upload-time = "2026-06-05T12:34:32.289Z" }, -] - -[[package]] -name = "networkx" -version = "3.4.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.11'", -] -sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263, upload-time = "2024-10-21T12:39:36.247Z" }, -] - -[[package]] -name = "networkx" -version = "3.6.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.15'", - "python_full_version == '3.14.*'", - "python_full_version == '3.13.*'", - "python_full_version >= '3.11' and python_full_version < '3.13'", -] -sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, -] - [[package]] name = "num2words" version = "0.5.14" @@ -2495,158 +2209,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/ce/e5ec180bc41812edcd8daeb8639d205622c0e8c02259d8ab25a0201b3c2a/numpy-2.4.6-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:2803abfebfc990042cd494d8ce2d5f82e9d847af6d35ec486923aa19dbad5e73", size = 12504263, upload-time = "2026-05-18T23:37:09.715Z" }, ] -[[package]] -name = "nvidia-cublas" -version = "13.1.1.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cuda-nvrtc" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/a1/0bd24ee8c8d03adac032fd2909426a00c88f8c57961b1277ded97f91119f/nvidia_cublas-13.1.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b7a210458267ac818974c53038fbec2e969d5c99f305ab15c72522fa9f001dd5", size = 542848918, upload-time = "2026-04-08T18:46:22.985Z" }, - { url = "https://files.pythonhosted.org/packages/3b/cd/154ca20c38269e05eff77c1464e6c1da89f50a6390b565e9d82e06bc11e1/nvidia_cublas-13.1.1.3-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:37936a16db8fe4ac1f065c2139360608a543a09275cb1a1af612e08cfa065436", size = 423138758, upload-time = "2026-04-08T18:46:58.655Z" }, -] - -[[package]] -name = "nvidia-cuda-cupti" -version = "13.0.85" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/2a/80353b103fc20ce05ef51e928daed4b6015db4aaa9162ed0997090fe2250/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_aarch64.whl", hash = "sha256:796bd679890ee55fb14a94629b698b6db54bcfd833d391d5e94017dd9d7d3151", size = 10310827, upload-time = "2025-09-04T08:26:42.012Z" }, - { url = "https://files.pythonhosted.org/packages/33/6d/737d164b4837a9bbd202f5ae3078975f0525a55730fe871d8ed4e3b952b0/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_x86_64.whl", hash = "sha256:4eb01c08e859bf924d222250d2e8f8b8ff6d3db4721288cf35d14252a4d933c8", size = 10715597, upload-time = "2025-09-04T08:26:51.312Z" }, -] - -[[package]] -name = "nvidia-cuda-nvrtc" -version = "13.0.88" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/68/483a78f5e8f31b08fb1bb671559968c0ca3a065ac7acabfc7cee55214fd6/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:ad9b6d2ead2435f11cbb6868809d2adeeee302e9bb94bcf0539c7a40d80e8575", size = 90215200, upload-time = "2025-09-04T08:28:44.204Z" }, - { url = "https://files.pythonhosted.org/packages/b7/dc/6bb80850e0b7edd6588d560758f17e0550893a1feaf436807d64d2da040f/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d27f20a0ca67a4bb34268a5e951033496c5b74870b868bacd046b1b8e0c3267b", size = 43015449, upload-time = "2025-09-04T08:28:20.239Z" }, -] - -[[package]] -name = "nvidia-cuda-runtime" -version = "13.0.96" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/87/4f/17d7b9b8e285199c58ce28e31b5c5bbaa4d8271af06a89b6405258245de2/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef9bcbe90493a2b9d810e43d249adb3d02e98dd30200d86607d8d02687c43f55", size = 2261060, upload-time = "2025-10-09T08:55:15.78Z" }, - { url = "https://files.pythonhosted.org/packages/2e/24/d1558f3b68b1d26e706813b1d10aa1d785e4698c425af8db8edc3dced472/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f82250d7782aa23b6cfe765ecc7db554bd3c2870c43f3d1821f1d18aebf0548", size = 2243632, upload-time = "2025-10-09T08:55:36.117Z" }, -] - -[[package]] -name = "nvidia-cudnn-cu13" -version = "9.20.0.48" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/56/c5/83384d846b2fd17c44bd499b36c75a45ed4f095fbbb2252294e89cea5c5c/nvidia_cudnn_cu13-9.20.0.48-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:e31454ae00094b0c55319d9d15b6fa2fc50a9e1c0f5c8c80fb75258234e731e1", size = 444574296, upload-time = "2026-03-09T19:28:27.751Z" }, - { url = "https://files.pythonhosted.org/packages/6e/5e/edb9c0ae051602c3ccaffe424256463636d639e27d7f302dde9975ef9e7a/nvidia_cudnn_cu13-9.20.0.48-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:0c45dd8eeb50b603f07995b1b300c62ffe6a1980482b82b3bcf94a4ca9d49304", size = 366173588, upload-time = "2026-03-09T19:29:34.474Z" }, -] - -[[package]] -name = "nvidia-cufft" -version = "12.0.0.61" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-nvjitlink" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/ae/f417a75c0259e85c1d2f83ca4e960289a5f814ed0cea74d18c353d3e989d/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2708c852ef8cd89d1d2068bdbece0aa188813a0c934db3779b9b1faa8442e5f5", size = 214053554, upload-time = "2025-09-04T08:31:38.196Z" }, - { url = "https://files.pythonhosted.org/packages/a8/2f/7b57e29836ea8714f81e9898409196f47d772d5ddedddf1592eadb8ab743/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6c44f692dce8fd5ffd3e3df134b6cdb9c2f72d99cf40b62c32dde45eea9ddad3", size = 214085489, upload-time = "2025-09-04T08:31:56.044Z" }, -] - -[[package]] -name = "nvidia-cufile" -version = "1.15.1.6" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/70/4f193de89a48b71714e74602ee14d04e4019ad36a5a9f20c425776e72cd6/nvidia_cufile-1.15.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08a3ecefae5a01c7f5117351c64f17c7c62efa5fffdbe24fc7d298da19cd0b44", size = 1223672, upload-time = "2025-09-04T08:32:22.779Z" }, - { url = "https://files.pythonhosted.org/packages/ab/73/cc4a14c9813a8a0d509417cf5f4bdaba76e924d58beb9864f5a7baceefbf/nvidia_cufile-1.15.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:bdc0deedc61f548bddf7733bdc216456c2fdb101d020e1ab4b88d232d5e2f6d1", size = 1136992, upload-time = "2025-09-04T08:32:14.119Z" }, -] - -[[package]] -name = "nvidia-curand" -version = "10.4.0.35" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/72/7c2ae24fb6b63a32e6ae5d241cc65263ea18d08802aaae087d9f013335a2/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:133df5a7509c3e292aaa2b477afd0194f06ce4ea24d714d616ff36439cee349a", size = 61962106, upload-time = "2025-08-04T10:21:41.128Z" }, - { url = "https://files.pythonhosted.org/packages/a5/9f/be0a41ca4a4917abf5cb9ae0daff1a6060cc5de950aec0396de9f3b52bc5/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:1aee33a5da6e1db083fe2b90082def8915f30f3248d5896bcec36a579d941bfc", size = 59544258, upload-time = "2025-08-04T10:22:03.992Z" }, -] - -[[package]] -name = "nvidia-cusolver" -version = "12.0.4.66" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas" }, - { name = "nvidia-cusparse" }, - { name = "nvidia-nvjitlink" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/c3/b30c9e935fc01e3da443ec0116ed1b2a009bb867f5324d3f2d7e533e776b/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:02c2457eaa9e39de20f880f4bd8820e6a1cfb9f9a34f820eb12a155aa5bc92d2", size = 223467760, upload-time = "2025-09-04T08:33:04.222Z" }, - { url = "https://files.pythonhosted.org/packages/5f/67/cba3777620cdacb99102da4042883709c41c709f4b6323c10781a9c3aa34/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:0a759da5dea5c0ea10fd307de75cdeb59e7ea4fcb8add0924859b944babf1112", size = 200941980, upload-time = "2025-09-04T08:33:22.767Z" }, -] - -[[package]] -name = "nvidia-cusparse" -version = "12.6.3.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-nvjitlink" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/94/5c26f33738ae35276672f12615a64bd008ed5be6d1ebcb23579285d960a9/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:80bcc4662f23f1054ee334a15c72b8940402975e0eab63178fc7e670aa59472c", size = 162155568, upload-time = "2025-09-04T08:33:42.864Z" }, - { url = "https://files.pythonhosted.org/packages/fa/18/623c77619c31d62efd55302939756966f3ecc8d724a14dab2b75f1508850/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b3c89c88d01ee0e477cb7f82ef60a11a4bcd57b6b87c33f789350b59759360b", size = 145942937, upload-time = "2025-09-04T08:33:58.029Z" }, -] - -[[package]] -name = "nvidia-cusparselt-cu13" -version = "0.8.1" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/46/e1/cdc1797eadf82d3a9a575a19b33fdc871a97edbec42c00b5b5e914f4aff4/nvidia_cusparselt_cu13-0.8.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4dca476c50bf4780d46cd0bfbd82e2bc10a08e4fef7950917ce8d7578d22a23f", size = 221051344, upload-time = "2025-09-05T18:49:51.289Z" }, - { url = "https://files.pythonhosted.org/packages/34/7d/2661f2fb3ac4302f3a246f5fc030213ac60c1fe0bce84f9783dbd831dbb7/nvidia_cusparselt_cu13-0.8.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:786ce87568c303fadb5afcc7102d454cd3040d75f6f8626f5db460d1871f4dd0", size = 170148586, upload-time = "2025-09-05T18:50:50.248Z" }, -] - -[[package]] -name = "nvidia-nccl-cu13" -version = "2.29.7" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/72/0d/daf50d44177ee0cbc7ff0a0c91eb5ff676c82be42f9a970bc7597f440c3a/nvidia_nccl_cu13-2.29.7-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:674a12383e3c38a1bcccae7d4f3633b37852230b6047883cb2f4c2d1b36d9bf5", size = 206014712, upload-time = "2026-03-03T05:34:20.843Z" }, - { url = "https://files.pythonhosted.org/packages/67/f4/58e4e91b6919367c7aafb8e36fce9aad1a3047e536bf7e2fd560927d3a4c/nvidia_nccl_cu13-2.29.7-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:edd81538446786ec3b73972543e53bb43bcaf0bfc8ef76cb679fcc390ffe136d", size = 205976000, upload-time = "2026-03-03T05:36:24.472Z" }, -] - -[[package]] -name = "nvidia-nvjitlink" -version = "13.0.88" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/56/7a/123e033aaff487c77107195fa5a2b8686795ca537935a24efae476c41f05/nvidia_nvjitlink-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:13a74f429e23b921c1109976abefacc69835f2f433ebd323d3946e11d804e47b", size = 40713933, upload-time = "2025-09-04T08:35:43.553Z" }, - { url = "https://files.pythonhosted.org/packages/ab/2c/93c5250e64df4f894f1cbb397c6fd71f79813f9fd79d7cd61de3f97b3c2d/nvidia_nvjitlink-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e931536ccc7d467a98ba1d8b89ff7fa7f1fa3b13f2b0069118cd7f47bff07d0c", size = 38768748, upload-time = "2025-09-04T08:35:20.008Z" }, -] - -[[package]] -name = "nvidia-nvshmem-cu13" -version = "3.4.5" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/0f/05cc9c720236dcd2db9c1ab97fff629e96821be2e63103569da0c9b72f19/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dc2a197f38e5d0376ad52cd1a2a3617d3cdc150fd5966f4aee9bcebb1d68fe9", size = 60215947, upload-time = "2025-09-06T00:32:20.022Z" }, - { url = "https://files.pythonhosted.org/packages/3c/35/a9bf80a609e74e3b000fef598933235c908fcefcef9026042b8e6dfde2a9/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:290f0a2ee94c9f3687a02502f3b9299a9f9fe826e6d0287ee18482e78d495b80", size = 60412546, upload-time = "2025-09-06T00:32:41.564Z" }, -] - -[[package]] -name = "nvidia-nvtx" -version = "13.0.85" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/f3/d86c845465a2723ad7e1e5c36dcd75ddb82898b3f53be47ebd429fb2fa5d/nvidia_nvtx-13.0.85-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4936d1d6780fbe68db454f5e72a42ff64d1fd6397df9f363ae786930fd5c1cd4", size = 148047, upload-time = "2025-09-04T08:29:01.761Z" }, - { url = "https://files.pythonhosted.org/packages/a8/64/3708a90d1ebe202ffdeb7185f878a3c84d15c2b2c31858da2ce0583e2def/nvidia_nvtx-13.0.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb7780edb6b14107373c835bf8b72e7a178bac7367e23da7acb108f973f157a6", size = 148878, upload-time = "2025-09-04T08:28:53.627Z" }, -] - [[package]] name = "opentelemetry-api" version = "1.34.1" @@ -3941,304 +3503,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2b/58/a58fc997655386daa2e25784e30c288aa3e3819e401f77029ee4899fb55a/s3transfer-0.18.0-py3-none-any.whl", hash = "sha256:239c13b09e65ad0346e1be7348b8a202dcad44ac7ea7c6eb858fc881dce739b6", size = 88572, upload-time = "2026-05-28T19:39:07.999Z" }, ] -[[package]] -name = "safetensors" -version = "0.8.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/45/06/f955dbbb1859e3bd23c8ac6141af5106e7ad5fedec4a3a6e3d60f94b7001/safetensors-0.8.0.tar.gz", hash = "sha256:fabaf3e0f18a6618d9b36560682562157f77c2b71fcffc7b432be2baed9d753d", size = 325846, upload-time = "2026-06-09T07:52:25.563Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/39/a0/f718cda65b05407d228f97602cf60dca269c979867aa5beb25410de26cd3/safetensors-0.8.0-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c554f85858e05226d3c2828e32395e677434685d6d94594a41643361c5e837f0", size = 473568, upload-time = "2026-06-09T07:52:18.829Z" }, - { url = "https://files.pythonhosted.org/packages/f5/b1/fa7c600e7dceae12e9606c7578cbc9ff1e1ed55844883ee5c92205e86226/safetensors-0.8.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:c80201d22cbf405b80647a60ada77bba06c8fba2da2743ba1e89cdcc39a81f25", size = 484562, upload-time = "2026-06-09T07:52:17.518Z" }, - { url = "https://files.pythonhosted.org/packages/09/7d/65a7de0af421317bb36a067241e4235fff194eed60b961ed6d3f59a3fc60/safetensors-0.8.0-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a46e5ff292c356d6991e60942ba7f79817682d3a2cef0702136448cb9c4d235", size = 502844, upload-time = "2026-06-09T07:52:07.624Z" }, - { url = "https://files.pythonhosted.org/packages/91/4f/3175c9d75634e0e0dda0082794193521035edd7c70a6f212bf33ca06ddf4/safetensors-0.8.0-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4124502b78f03534117c848f87a39b8f31e577b15eff423bf8bfb95f2a8c30d0", size = 511823, upload-time = "2026-06-09T07:52:09.565Z" }, - { url = "https://files.pythonhosted.org/packages/20/87/846c289e7aa2299eff406335717cf43ce8777194ece8aad75772e0411615/safetensors-0.8.0-cp310-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bc0a787ba8a35be368ee3574edfa2b1ad389eebd0a72e482ae275490e3f6c98", size = 633461, upload-time = "2026-06-09T07:52:11.128Z" }, - { url = "https://files.pythonhosted.org/packages/76/22/8d64d9df2c45d5ded401df889d0ad90882804ca172d79ec4f0df8f727fe0/safetensors-0.8.0-cp310-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040070828e36dc8e122178bbbd5830ff9e97920affb84cbe0f46442497bed358", size = 545148, upload-time = "2026-06-09T07:52:13.603Z" }, - { url = "https://files.pythonhosted.org/packages/28/50/f203ff3a3ddfe19308efc83c5a3a29ed02bf786732ec35e68bf9162f3365/safetensors-0.8.0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd6f3f93c9a0a7cc2788ee63fb763353d4bd2e89b0751bc78fcf7dda00bea774", size = 516040, upload-time = "2026-06-09T07:52:16.29Z" }, - { url = "https://files.pythonhosted.org/packages/46/fb/cdaed17ceb2948784fd9c36b6fd3e951b608547cea81a48e8ee6f8cfdfcb/safetensors-0.8.0-cp310-abi3-manylinux_2_31_riscv64.whl", hash = "sha256:fcdd41ec4628fee5799f807c73c353629130fbd942aa23d83c623dd6c9d52d78", size = 513832, upload-time = "2026-06-09T07:52:12.37Z" }, - { url = "https://files.pythonhosted.org/packages/0d/49/1e15de264dcc3b77943d2d0c56a95809956883b1c2d6d585c792523f180b/safetensors-0.8.0-cp310-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8e9f537aa183a38ace122d27303dcd986b26bd2a7591f9181d7f0c396f4677ca", size = 559930, upload-time = "2026-06-09T07:52:14.743Z" }, - { url = "https://files.pythonhosted.org/packages/2a/43/bf38443278eab4b1be1fce2931e2b012ad9cb7df52ada751d0aab8f7659a/safetensors-0.8.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:87eec7ffed2b809f05a398a8becb7d013f19f7837cd15d9748580d6cf30dbaf4", size = 678670, upload-time = "2026-06-09T07:52:20.032Z" }, - { url = "https://files.pythonhosted.org/packages/72/e3/68cd3fa5b48488e84add63e04cb12f3bc28ae4638c06d4508c6e88823d0e/safetensors-0.8.0-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:4a95ae2b05d7726d751da4ebf626a2ca782b706e101bd894c95bc2450b1cffcc", size = 786679, upload-time = "2026-06-09T07:52:21.322Z" }, - { url = "https://files.pythonhosted.org/packages/29/4b/1c19c509d56e01f4fbb3d0a2e597450f6cc04d1d56cf52defb0a62dfd715/safetensors-0.8.0-cp310-abi3-musllinux_1_2_i686.whl", hash = "sha256:3ae091f16662658bdc019a4ff6cb4c085bb7d725eb5978b183ffd265863b6d2d", size = 765683, upload-time = "2026-06-09T07:52:22.594Z" }, - { url = "https://files.pythonhosted.org/packages/27/43/41c1621732edd934d868a00d1b891584c892a7b62a9aab82ea5a0a5623ee/safetensors-0.8.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8e080062fcde23be189565e1c3305d16751a218ecf9412c8601e64204eb6f846", size = 722361, upload-time = "2026-06-09T07:52:23.924Z" }, - { url = "https://files.pythonhosted.org/packages/8e/3f/73ccf82579412b4a71c4ca673f10b5f1f888d7cf5af7fe24f27d30307be4/safetensors-0.8.0-cp310-abi3-win32.whl", hash = "sha256:2ddf52eac562eda224f99acfa7889d02968c1fd59a5b011ae7d8137c37e9c02d", size = 342401, upload-time = "2026-06-09T07:52:28.895Z" }, - { url = "https://files.pythonhosted.org/packages/1b/6d/3fba214c1e5e0f69991677ec3bc17023f0421776975e1de0c682dca475e2/safetensors-0.8.0-cp310-abi3-win_amd64.whl", hash = "sha256:096ec1a98435df7beb08853bb5aa9081a84f23d0adc67ed1a0a10550f608373f", size = 355540, upload-time = "2026-06-09T07:52:27.832Z" }, - { url = "https://files.pythonhosted.org/packages/8d/fc/7eedc3510d97878876e32774eebbeb61c43f148a96e915c84229a3e967aa/safetensors-0.8.0-cp310-abi3-win_arm64.whl", hash = "sha256:f7838e5135a406ad3e02efdcb8cf2e5397d368b0154537c4fec682dbc544d452", size = 340500, upload-time = "2026-06-09T07:52:26.745Z" }, -] - -[[package]] -name = "scikit-learn" -version = "1.7.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.11'", -] -dependencies = [ - { name = "joblib", marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "threadpoolctl", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" }, - { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" }, - { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" }, - { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" }, - { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" }, - { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" }, - { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" }, - { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" }, - { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" }, - { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" }, - { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" }, - { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" }, - { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" }, - { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" }, - { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" }, - { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" }, - { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" }, - { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" }, - { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" }, - { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" }, - { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" }, - { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" }, - { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" }, - { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" }, - { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" }, - { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" }, - { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" }, - { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" }, - { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" }, - { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" }, -] - -[[package]] -name = "scikit-learn" -version = "1.9.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.15'", - "python_full_version == '3.14.*'", - "python_full_version == '3.13.*'", - "python_full_version >= '3.11' and python_full_version < '3.13'", -] -dependencies = [ - { name = "joblib", marker = "python_full_version >= '3.11'" }, - { name = "narwhals", marker = "python_full_version >= '3.11'" }, - { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "threadpoolctl", marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fa/6f/37092bdb25f712817231799fc5674d8e704066a8a70c1d2d40517e18b4ab/scikit_learn-1.9.0.tar.gz", hash = "sha256:8833266989d3a5110178a9fae30783675460724d0e1efb13b14901d2c660c557", size = 7750767, upload-time = "2026-06-02T11:54:32.706Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f5/be/e844fd9586e66540a15b71924d17a6cbc1bb749e81ddd0a796bcdba4c055/scikit_learn-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9db6f4d34e68c8899e4cab27fdf8eafe6ed21f2ba52ceb25ea250cd237f8e47b", size = 8789686, upload-time = "2026-06-02T11:53:05.439Z" }, - { url = "https://files.pythonhosted.org/packages/42/e2/ff880f62677a17d035817d543cb0fc8727d01eccbee81c5f7fc733a9d856/scikit_learn-1.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f401448645a3e7bc115aa3c094097865155b34bff1cba8101857d9104e99074c", size = 8256782, upload-time = "2026-06-02T11:53:08.904Z" }, - { url = "https://files.pythonhosted.org/packages/25/64/eb40435e1a508ab1b4e284ce43ae80f6a162e5be5e38ed5a6fab467a9ea4/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd3a8ef0c758555a3b23c03adaa858af32f7736785ded50ad5991f59c4ed03fa", size = 8992419, upload-time = "2026-06-02T11:53:11.551Z" }, - { url = "https://files.pythonhosted.org/packages/8d/da/4810a28e473185429e45a57eebcc91fc991b33d889cc0676063e671db03d/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7e254636164090da847715a27f8e5478feb98c40a9e0ee90cbd277de9e5ceb8", size = 9281411, upload-time = "2026-06-02T11:53:15.063Z" }, - { url = "https://files.pythonhosted.org/packages/3b/67/be3d369f40d8178ba3bd86635d132e08cb5329b023e4669d9426d84bc007/scikit_learn-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:5dc1818c77575d149e25fce9ef82dd7b7263ae372f03494158668ad632a69759", size = 8272736, upload-time = "2026-06-02T11:53:18.108Z" }, - { url = "https://files.pythonhosted.org/packages/37/79/a733f02dc2118da7e77a134b34f39f40201a353311b011d20859d2db3556/scikit_learn-1.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:366652351f092b219c248f1e72821e841960a63d8f358f1dcfd54dc1cbdbbc28", size = 7919564, upload-time = "2026-06-02T11:53:21.2Z" }, - { url = "https://files.pythonhosted.org/packages/ac/20/75f915ff375d6249e6550ac740fdbbd66159a068fd3af1400ff62036b07a/scikit_learn-1.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2bd41b0d201bc81575531b96b713d3eb5e5f50fb0b82101ff0f92294fdc236ac", size = 8741122, upload-time = "2026-06-02T11:53:24.08Z" }, - { url = "https://files.pythonhosted.org/packages/cc/d5/2b5148f2279196775e1db2aeb85d14b70ac80e7e32b3b28e7ebeafb0901d/scikit_learn-1.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5be45aa4a42a68a533913a6ed736cf309de2226411c79ef8d609a5456f1939b1", size = 8261512, upload-time = "2026-06-02T11:53:27.183Z" }, - { url = "https://files.pythonhosted.org/packages/a0/ee/5adbc77656b71f9456a2f5a7a9fdb4bcf9207a6b962889f1c2f9323afa4e/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e50ed4da51974e86e940690e9a3d82e729b62b5a49f7c9bac534d515d39d86f", size = 8837603, upload-time = "2026-06-02T11:53:30.328Z" }, - { url = "https://files.pythonhosted.org/packages/6c/c2/63fdda36c56437eeb44aaf9493c8bcd62ce230ab1598924fc626ffbfa943/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:056c92bb67ad4c28463c2f2653d9701449201e7e7a9e94e321be0f71c4fef2b8", size = 9132097, upload-time = "2026-06-02T11:53:33.456Z" }, - { url = "https://files.pythonhosted.org/packages/83/a4/c8e67227c680e2259c8864ae72ff48b06e16a6f51253a22167aa02a8aa4e/scikit_learn-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4306775fad04cc4b472a1b15af1ae9cede1540fbfcc17fbce3767cd8dc7ae283", size = 8211173, upload-time = "2026-06-02T11:53:36.602Z" }, - { url = "https://files.pythonhosted.org/packages/cf/fd/3c0863792e98e67e9184aa4029288a175935eb65443afcd30d4f143450cf/scikit_learn-1.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:26e22435f63bcdcf396b574273f29f13dd531f5ea035801f5be10ba1540a4e60", size = 7867451, upload-time = "2026-06-02T11:53:39.075Z" }, - { url = "https://files.pythonhosted.org/packages/3c/01/cf3310626b6d48d3e9be69a1223f9180360b5e6edb045f50fade723ce494/scikit_learn-1.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:80746d63bd4b6eaca54d36fe5feaf4d28bb38dc6f9470f81c7cad7c40155f119", size = 8705188, upload-time = "2026-06-02T11:53:41.964Z" }, - { url = "https://files.pythonhosted.org/packages/3e/04/5acd7ae280c5f93b6ac5ef6cdec14eef4c8d1cd91d85b3292989c94d96b1/scikit_learn-1.9.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5b934c45c252844a91d69fda3a34cff5e7307e1db10d77cb10a3980312c74713", size = 8228299, upload-time = "2026-06-02T11:53:44.817Z" }, - { url = "https://files.pythonhosted.org/packages/0c/39/ffe829a5b8ecb40a518724a997794657fdc354ada5e8fe8e64d998c0bac9/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38c3dcb9a1ffb85505ec53d54c7b4aea0cff70050425a7760c2af661ac85df05", size = 8789690, upload-time = "2026-06-02T11:53:47.461Z" }, - { url = "https://files.pythonhosted.org/packages/1f/88/8dab5de10c638c083772a6be83a3d8106ced492f74a928c8693638e5bb50/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da76d09304a4706db7cc1e3ebaa3b6b98a67365cc11d2996c4f1e58ba47df714", size = 9087723, upload-time = "2026-06-02T11:53:50.702Z" }, - { url = "https://files.pythonhosted.org/packages/20/3f/7917ca72464038f6240ec70c29f94862d08a34a74291ae4d4ec5eb8186a0/scikit_learn-1.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5808d98f15c6bf6d9d96d2348c1997392a5888ce7097e664105f930c4bca1277", size = 8184330, upload-time = "2026-06-02T11:53:53.396Z" }, - { url = "https://files.pythonhosted.org/packages/78/c7/15739eb2f61fda3c54639e9942414e5a19ad8a8d1f5a3266afad7cb7df80/scikit_learn-1.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:d77f54c017633791bc0225a43e2f8d03745fdcfe4880268fcc4df15f505dec2e", size = 7840653, upload-time = "2026-06-02T11:53:56.035Z" }, - { url = "https://files.pythonhosted.org/packages/f4/7d/c9a35cf59b20a86fec24d306f1547b78dec194b08d367ce2a3e4854169d9/scikit_learn-1.9.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9656acd4e93f74e0b66c8a36c88830a99252dfa900044d36bc2212ae89a47162", size = 8713289, upload-time = "2026-06-02T11:53:58.788Z" }, - { url = "https://files.pythonhosted.org/packages/3c/a7/552a7821597c632b907f7bfe8f36f9f572777af8ef8a48353041cf8e091a/scikit_learn-1.9.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:24360002ae845e7866522b0a5bbf690802e7bc388cac8663502e78aa98598aa2", size = 8245141, upload-time = "2026-06-02T11:54:01.694Z" }, - { url = "https://files.pythonhosted.org/packages/7d/79/f4a0c4fe9711154cddabf913471153af79056382ddc612cfe5ee0ff4b72e/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5162ad10a418c8a282dde04c9aa06965de3e9a65f33c1440c0ae69bb1a09d913", size = 8847671, upload-time = "2026-06-02T11:54:04.448Z" }, - { url = "https://files.pythonhosted.org/packages/f0/af/4d72d9e475ac83719160c662619e4bf7b95c19507cd582e7d0167a3c3dae/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fea2cc5677ab49d6f5bade978c866da44957b712d92e9635e8b4f723013c3cb", size = 9118104, upload-time = "2026-06-02T11:54:07.205Z" }, - { url = "https://files.pythonhosted.org/packages/a2/d5/6a58eea2cb9abbb9b3f2bb8b2cfb3243d1152d69f442d256c7af71304769/scikit_learn-1.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:64fa347efc1c839c487433e40c5144d38c336e8a2b59c81aa8660373945c2673", size = 8290674, upload-time = "2026-06-02T11:54:10.087Z" }, - { url = "https://files.pythonhosted.org/packages/65/5b/d4c879cf358f1187141cf90ced473f087183489090244f50c124a2ee478b/scikit_learn-1.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:1b944b6db288f6b926e3650026ddafb988929de95d11fc2cc5fa117773c9ba42", size = 7978807, upload-time = "2026-06-02T11:54:12.769Z" }, - { url = "https://files.pythonhosted.org/packages/8a/43/bfae3121ec67ae09150d453c442c7c1cc166e9aefe056e6ab3b7728a5cfc/scikit_learn-1.9.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4ccacf04ca5f4b492158a5f28afe0ace43f81b2571e4b9a66d34848b46128949", size = 9031941, upload-time = "2026-06-02T11:54:15.436Z" }, - { url = "https://files.pythonhosted.org/packages/75/b0/20a4546eb17f3b25d3c66df15810411c14ed5065bcfab50b53c96fb627b2/scikit_learn-1.9.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ee1a8db2c18c08e34c7412d4b10be1cac214cd4ea7dc9715a6a327eb49a37c96", size = 8613528, upload-time = "2026-06-02T11:54:18.842Z" }, - { url = "https://files.pythonhosted.org/packages/18/3c/e440e039bb82cd19004edaaad00acbde0fb9b461083c3ecf37941c557312/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:147e9329ef0e39f75d4cffa02b2aa48d827832684926cd5210d9a2cb5c57246b", size = 8855050, upload-time = "2026-06-02T11:54:21.699Z" }, - { url = "https://files.pythonhosted.org/packages/43/26/b341b8dab5998da6270a3a42c2152c578501354d36f944b5856757035ef8/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bad8f8b9950321b54c965fdcbac6c6c55e79e16646b49977bcf3668d3870a1a", size = 9097190, upload-time = "2026-06-02T11:54:24.454Z" }, - { url = "https://files.pythonhosted.org/packages/fb/de/b650b4d69b84468cfa2e28a3ff7b8103743029e6446ce1a97fe060ef688c/scikit_learn-1.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:78fc56eafd4edb9575d2d8950d1dd152061abb573341a1cb7e099fc40f6c6666", size = 8963204, upload-time = "2026-06-02T11:54:27.428Z" }, - { url = "https://files.pythonhosted.org/packages/ee/f3/ff83d76d7418112e5a61326443cdda87be3545dd8d6599c95b2481a4419e/scikit_learn-1.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:051075bda8b7aab87b1906ab3d4740a1e1224a19d7b3781a576736edc94e76aa", size = 8222661, upload-time = "2026-06-02T11:54:30.192Z" }, -] - -[[package]] -name = "scipy" -version = "1.15.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.11'", -] -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/78/2f/4966032c5f8cc7e6a60f1b2e0ad686293b9474b65246b0c642e3ef3badd0/scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c", size = 38702770, upload-time = "2025-05-08T16:04:20.849Z" }, - { url = "https://files.pythonhosted.org/packages/a0/6e/0c3bf90fae0e910c274db43304ebe25a6b391327f3f10b5dcc638c090795/scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253", size = 30094511, upload-time = "2025-05-08T16:04:27.103Z" }, - { url = "https://files.pythonhosted.org/packages/ea/b1/4deb37252311c1acff7f101f6453f0440794f51b6eacb1aad4459a134081/scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f", size = 22368151, upload-time = "2025-05-08T16:04:31.731Z" }, - { url = "https://files.pythonhosted.org/packages/38/7d/f457626e3cd3c29b3a49ca115a304cebb8cc6f31b04678f03b216899d3c6/scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92", size = 25121732, upload-time = "2025-05-08T16:04:36.596Z" }, - { url = "https://files.pythonhosted.org/packages/db/0a/92b1de4a7adc7a15dcf5bddc6e191f6f29ee663b30511ce20467ef9b82e4/scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82", size = 35547617, upload-time = "2025-05-08T16:04:43.546Z" }, - { url = "https://files.pythonhosted.org/packages/8e/6d/41991e503e51fc1134502694c5fa7a1671501a17ffa12716a4a9151af3df/scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40", size = 37662964, upload-time = "2025-05-08T16:04:49.431Z" }, - { url = "https://files.pythonhosted.org/packages/25/e1/3df8f83cb15f3500478c889be8fb18700813b95e9e087328230b98d547ff/scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e", size = 37238749, upload-time = "2025-05-08T16:04:55.215Z" }, - { url = "https://files.pythonhosted.org/packages/93/3e/b3257cf446f2a3533ed7809757039016b74cd6f38271de91682aa844cfc5/scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c", size = 40022383, upload-time = "2025-05-08T16:05:01.914Z" }, - { url = "https://files.pythonhosted.org/packages/d1/84/55bc4881973d3f79b479a5a2e2df61c8c9a04fcb986a213ac9c02cfb659b/scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13", size = 41259201, upload-time = "2025-05-08T16:05:08.166Z" }, - { url = "https://files.pythonhosted.org/packages/96/ab/5cc9f80f28f6a7dff646c5756e559823614a42b1939d86dd0ed550470210/scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b", size = 38714255, upload-time = "2025-05-08T16:05:14.596Z" }, - { url = "https://files.pythonhosted.org/packages/4a/4a/66ba30abe5ad1a3ad15bfb0b59d22174012e8056ff448cb1644deccbfed2/scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba", size = 30111035, upload-time = "2025-05-08T16:05:20.152Z" }, - { url = "https://files.pythonhosted.org/packages/4b/fa/a7e5b95afd80d24313307f03624acc65801846fa75599034f8ceb9e2cbf6/scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65", size = 22384499, upload-time = "2025-05-08T16:05:24.494Z" }, - { url = "https://files.pythonhosted.org/packages/17/99/f3aaddccf3588bb4aea70ba35328c204cadd89517a1612ecfda5b2dd9d7a/scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1", size = 25152602, upload-time = "2025-05-08T16:05:29.313Z" }, - { url = "https://files.pythonhosted.org/packages/56/c5/1032cdb565f146109212153339f9cb8b993701e9fe56b1c97699eee12586/scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889", size = 35503415, upload-time = "2025-05-08T16:05:34.699Z" }, - { url = "https://files.pythonhosted.org/packages/bd/37/89f19c8c05505d0601ed5650156e50eb881ae3918786c8fd7262b4ee66d3/scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982", size = 37652622, upload-time = "2025-05-08T16:05:40.762Z" }, - { url = "https://files.pythonhosted.org/packages/7e/31/be59513aa9695519b18e1851bb9e487de66f2d31f835201f1b42f5d4d475/scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9", size = 37244796, upload-time = "2025-05-08T16:05:48.119Z" }, - { url = "https://files.pythonhosted.org/packages/10/c0/4f5f3eeccc235632aab79b27a74a9130c6c35df358129f7ac8b29f562ac7/scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594", size = 40047684, upload-time = "2025-05-08T16:05:54.22Z" }, - { url = "https://files.pythonhosted.org/packages/ab/a7/0ddaf514ce8a8714f6ed243a2b391b41dbb65251affe21ee3077ec45ea9a/scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb", size = 41246504, upload-time = "2025-05-08T16:06:00.437Z" }, - { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735, upload-time = "2025-05-08T16:06:06.471Z" }, - { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284, upload-time = "2025-05-08T16:06:11.686Z" }, - { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958, upload-time = "2025-05-08T16:06:15.97Z" }, - { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454, upload-time = "2025-05-08T16:06:20.394Z" }, - { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199, upload-time = "2025-05-08T16:06:26.159Z" }, - { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455, upload-time = "2025-05-08T16:06:32.778Z" }, - { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140, upload-time = "2025-05-08T16:06:39.249Z" }, - { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549, upload-time = "2025-05-08T16:06:45.729Z" }, - { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184, upload-time = "2025-05-08T16:06:52.623Z" }, - { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256, upload-time = "2025-05-08T16:06:58.696Z" }, - { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540, upload-time = "2025-05-08T16:07:04.209Z" }, - { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115, upload-time = "2025-05-08T16:07:08.998Z" }, - { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884, upload-time = "2025-05-08T16:07:14.091Z" }, - { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018, upload-time = "2025-05-08T16:07:19.427Z" }, - { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716, upload-time = "2025-05-08T16:07:25.712Z" }, - { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342, upload-time = "2025-05-08T16:07:31.468Z" }, - { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869, upload-time = "2025-05-08T16:07:38.002Z" }, - { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851, upload-time = "2025-05-08T16:08:33.671Z" }, - { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011, upload-time = "2025-05-08T16:07:44.039Z" }, - { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407, upload-time = "2025-05-08T16:07:49.891Z" }, - { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030, upload-time = "2025-05-08T16:07:54.121Z" }, - { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709, upload-time = "2025-05-08T16:07:58.506Z" }, - { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045, upload-time = "2025-05-08T16:08:03.929Z" }, - { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062, upload-time = "2025-05-08T16:08:09.558Z" }, - { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132, upload-time = "2025-05-08T16:08:15.34Z" }, - { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503, upload-time = "2025-05-08T16:08:21.513Z" }, - { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097, upload-time = "2025-05-08T16:08:27.627Z" }, -] - -[[package]] -name = "scipy" -version = "1.17.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.15'", - "python_full_version == '3.14.*'", - "python_full_version == '3.13.*'", - "python_full_version >= '3.11' and python_full_version < '3.13'", -] -dependencies = [ - { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675, upload-time = "2026-02-23T00:16:00.13Z" }, - { url = "https://files.pythonhosted.org/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057, upload-time = "2026-02-23T00:16:09.456Z" }, - { url = "https://files.pythonhosted.org/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032, upload-time = "2026-02-23T00:16:17.358Z" }, - { url = "https://files.pythonhosted.org/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533, upload-time = "2026-02-23T00:16:25.791Z" }, - { url = "https://files.pythonhosted.org/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057, upload-time = "2026-02-23T00:16:36.931Z" }, - { url = "https://files.pythonhosted.org/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300, upload-time = "2026-02-23T00:16:49.108Z" }, - { url = "https://files.pythonhosted.org/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333, upload-time = "2026-02-23T00:17:01.293Z" }, - { url = "https://files.pythonhosted.org/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314, upload-time = "2026-02-23T00:17:12.576Z" }, - { url = "https://files.pythonhosted.org/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512, upload-time = "2026-02-23T00:17:23.424Z" }, - { url = "https://files.pythonhosted.org/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248, upload-time = "2026-02-23T00:17:34.561Z" }, - { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, - { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, - { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, - { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, - { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, - { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, - { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, - { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, - { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, - { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, - { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, - { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, - { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, - { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, - { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, - { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, - { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, - { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, - { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, - { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, - { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, - { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, - { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, - { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, - { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, - { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, - { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, - { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, - { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, - { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, - { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, - { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, - { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, - { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, - { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, - { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, - { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, - { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, - { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, - { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, - { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, - { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, - { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, - { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, - { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, - { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, - { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, - { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, - { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, - { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, -] - -[[package]] -name = "sentence-transformers" -version = "5.5.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "huggingface-hub" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "torch" }, - { name = "tqdm" }, - { name = "transformers" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/cf/d4/7ef93157485e978c016f49da05363c1e4e7237beb5343b64b5631101f0f1/sentence_transformers-5.5.1.tar.gz", hash = "sha256:02b7740dfc60bdbbcb6061625f5d97a5c1a4e2d3baac5f9391b912bb5eae2290", size = 445161, upload-time = "2026-05-20T07:37:44.465Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/03/ee99a6b030e7a2e056547729f8a4709dd93e13d9c6f07590f74c395c4017/sentence_transformers-5.5.1-py3-none-any.whl", hash = "sha256:4fe11d433badc5282d32f7fc08bc714216b7a5aca426f9df77a45a554756deb7", size = 588887, upload-time = "2026-05-20T07:37:43.004Z" }, -] - -[[package]] -name = "setuptools" -version = "81.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0d/1c/73e719955c59b8e424d015ab450f51c0af856ae46ea2da83eba51cc88de1/setuptools-81.0.0.tar.gz", hash = "sha256:487b53915f52501f0a79ccfd0c02c165ffe06631443a886740b91af4b7a5845a", size = 1198299, upload-time = "2026-02-06T21:10:39.601Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" }, -] - -[[package]] -name = "shellingham" -version = "1.5.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, -] - [[package]] name = "six" version = "1.17.0" @@ -4358,18 +3622,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/86/16/426f1f8c7d2678230b68755fcb3c0b8f59bc3a5591efe48343566b8e73c5/strands_agents-1.42.0-py3-none-any.whl", hash = "sha256:6cc04a32fc23a443a651d0e40198d946afc809888ae6fbc1fd07b2d5b6e354b3", size = 440999, upload-time = "2026-06-01T18:38:20.231Z" }, ] -[[package]] -name = "sympy" -version = "1.14.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mpmath" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, -] - [[package]] name = "tenacity" version = "8.5.0" @@ -4379,45 +3631,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687", size = 28165, upload-time = "2024-07-05T07:25:29.591Z" }, ] -[[package]] -name = "threadpoolctl" -version = "3.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, -] - -[[package]] -name = "tokenizers" -version = "0.22.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "huggingface-hub" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" }, - { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" }, - { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" }, - { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" }, - { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" }, - { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" }, - { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" }, - { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" }, - { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" }, - { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" }, - { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" }, - { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" }, - { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" }, - { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" }, - { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, - { url = "https://files.pythonhosted.org/packages/84/04/655b79dbcc9b3ac5f1479f18e931a344af67e5b7d3b251d2dcdcd7558592/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:753d47ebd4542742ef9261d9da92cd545b2cacbb48349a1225466745bb866ec4", size = 3282301, upload-time = "2026-01-05T10:40:34.858Z" }, - { url = "https://files.pythonhosted.org/packages/46/cd/e4851401f3d8f6f45d8480262ab6a5c8cb9c4302a790a35aa14eeed6d2fd/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e10bf9113d209be7cd046d40fbabbaf3278ff6d18eb4da4c500443185dc1896c", size = 3161308, upload-time = "2026-01-05T10:40:40.737Z" }, - { url = "https://files.pythonhosted.org/packages/6f/6e/55553992a89982cd12d4a66dddb5e02126c58677ea3931efcbe601d419db/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64d94e84f6660764e64e7e0b22baa72f6cd942279fdbb21d46abd70d179f0195", size = 3718964, upload-time = "2026-01-05T10:40:46.56Z" }, - { url = "https://files.pythonhosted.org/packages/59/8c/b1c87148aa15e099243ec9f0cf9d0e970cc2234c3257d558c25a2c5304e6/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f01a9c019878532f98927d2bacb79bbb404b43d3437455522a00a30718cdedb5", size = 3373542, upload-time = "2026-01-05T10:40:52.803Z" }, -] - [[package]] name = "tomli" version = "2.0.2" @@ -4436,128 +3649,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/43/8bd850ee71a191bf072e31302c73a66be413fecdd98fdcd111ecbcce13ca/tomlkit-0.15.0-py3-none-any.whl", hash = "sha256:4dbc8f0fc024412b57ced8757ac7461305126a648ff8c2c807fcb8e133a78738", size = 41328, upload-time = "2026-05-10T07:38:23.517Z" }, ] -[[package]] -name = "torch" -version = "2.12.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cuda-bindings", marker = "sys_platform == 'linux'" }, - { name = "cuda-toolkit", extra = ["cudart", "cufft", "cufile", "cupti", "curand", "cusolver", "cusparse", "nvjitlink", "nvrtc", "nvtx"], marker = "sys_platform == 'linux'" }, - { name = "filelock" }, - { name = "fsspec" }, - { name = "jinja2" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "nvidia-cublas", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu13", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu13", marker = "sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" }, - { name = "nvidia-nvshmem-cu13", marker = "sys_platform == 'linux'" }, - { name = "setuptools" }, - { name = "sympy" }, - { name = "triton", marker = "sys_platform == 'linux'" }, - { name = "typing-extensions" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/b7/53fe0436586716ab7aecff41e26b9302d57c85ded481fd83a2cd741e6b4e/torch-2.12.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:1834bd984f8a2f4f16bdfbeecca9146184b220aa46276bf5756735b5dae12812", size = 87981887, upload-time = "2026-05-13T14:55:53.234Z" }, - { url = "https://files.pythonhosted.org/packages/34/60/d930eac44c30de06ed16f6d1ba4e785e1632532b50d8f0bf9bf699a4d0c7/torch-2.12.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d4d029801cb7b6df858804a2a21b00cc2aa0bf0ee5d2ab18d343c9e9e5681f35", size = 426355000, upload-time = "2026-05-13T14:54:31.944Z" }, - { url = "https://files.pythonhosted.org/packages/8e/0c/c76b6a087820bab55705b94dfc074e520de9ae91f5ef90da2ecbf2a3ef12/torch-2.12.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:d47e7dee68ac4cd7a068b26bcd6b989935427709fae1c8f7bd0019978f829e15", size = 532144998, upload-time = "2026-05-13T14:56:05.523Z" }, - { url = "https://files.pythonhosted.org/packages/4a/64/8a0d036e166a6aa85ee09bef072f3655d1ba5d5486a68d1b03b6813c01b3/torch-2.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:cf9839790285dd472e7a16aafcb4a4e6bf58ec1b494045044b0eefb0eb4bd1f2", size = 122949877, upload-time = "2026-05-13T14:55:46.841Z" }, - { url = "https://files.pythonhosted.org/packages/18/62/131124fb95df03811b8260d1d43dcc5ee85ea1a344b964613d7efe77fb08/torch-2.12.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:10802fd383bbfed646212e765a72c37d2185205d4f26eb197a254e8ac7ddcb25", size = 87990344, upload-time = "2026-05-13T14:55:42.154Z" }, - { url = "https://files.pythonhosted.org/packages/12/9c/dda0dbd547dc549839824135f223792fd0e725f28ed0715dda366b7acaa2/torch-2.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c12592630aef72feaf18bd3f197ef587bbfa21131b31c38b23ab2e55fce92e36", size = 426362932, upload-time = "2026-05-13T14:54:15.295Z" }, - { url = "https://files.pythonhosted.org/packages/e2/d2/a7dd5a3f9bdaa7842124e8e2359202b317c48d47d2fc5816fafdf2049adb/torch-2.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:415c1b8d0412f67551c8e89a2daca0fb3e56694af0281ba155eaa9da481f58b4", size = 532170085, upload-time = "2026-05-13T14:55:20.788Z" }, - { url = "https://files.pythonhosted.org/packages/12/1b/a61ce2004f9ab0ea8964a6e6168133a127795667639e2ff4f8f2bdb16a65/torch-2.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd37188ea325042cb1f6cafa56822b11ada2520c04791a52629b0af25bdfbfd9", size = 122953128, upload-time = "2026-05-13T14:54:52.744Z" }, - { url = "https://files.pythonhosted.org/packages/ef/bb/285d643f254731294c9b595a007eac39db4600a98682d7bca688f42ca164/torch-2.12.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:b41339df93d491435e790ff8bcbae1c0ce777175889bfd1281d119862793e6a2", size = 88010197, upload-time = "2026-05-13T14:55:35.414Z" }, - { url = "https://files.pythonhosted.org/packages/79/81/76debf1db1343bd929bbb5d74c89fb437c2ed88eb144712557e7bd3eea45/torch-2.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8fbef9f108a863e7722a73740998967e3b074742a834fc5be3a535a2befa7057", size = 426376751, upload-time = "2026-05-13T14:55:03.353Z" }, - { url = "https://files.pythonhosted.org/packages/de/f0/80026028b603c4650ff270fc3785bdef4bd6738765a9cc5a0f5a637d65a2/torch-2.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4b4f64c2c2b11f7510d93dd6412b87025ff6eddd6bb61c3b5a3d892ea20c4756", size = 532261691, upload-time = "2026-05-13T14:52:54.453Z" }, - { url = "https://files.pythonhosted.org/packages/b9/c2/64b06cbb7830fb3cd9be13e1158b31a3f36b68e6a209105ee3c9d9480be0/torch-2.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:8b958caff4a14d3a3b0b2dfc6a378f64dda9728a9dad28c08a0db9ce4dafb549", size = 122988114, upload-time = "2026-05-13T14:54:42.153Z" }, - { url = "https://files.pythonhosted.org/packages/86/ca/01896c80ba921676aa45886b2c5b8d774912de2a1f719de48169c6f755cd/torch-2.12.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:90dd587a5f61bfe1307148b581e2084fc5bc4a06e2b90a20e9a36b81087ff16b", size = 88009511, upload-time = "2026-05-13T14:54:47.411Z" }, - { url = "https://files.pythonhosted.org/packages/a5/04/52bdaf4787eab6ac7d7f5851dff934e4def0bc8ead9c8fd2b69b3e529699/torch-2.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:864392c73b7654f4d2b3ae712f607937d0dbb1101c4555fbb41848106b297f39", size = 426383231, upload-time = "2026-05-13T14:53:32.129Z" }, - { url = "https://files.pythonhosted.org/packages/49/8a/94bdecd13f5aaa90d45920b89789d9fe7c6f4af8c3cdd7ce01fcb59908fc/torch-2.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5d6b560dfa7d56291c07d615c3bb73e8d9943d9b6d87f76cd0d9d570c4797fa6", size = 532269288, upload-time = "2026-05-13T14:53:49.423Z" }, - { url = "https://files.pythonhosted.org/packages/3e/2f/bdbaaa267de519ef1b73054bf590d8c93c37a266c9a4e24a01bd38b6918f/torch-2.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:3fee918902090ade827643e758e98363278815de583c75d111fdd665ebffde9f", size = 122987706, upload-time = "2026-05-13T14:54:00.335Z" }, - { url = "https://files.pythonhosted.org/packages/9b/ad/e95e822f3538171e22640a7fbe839a1fdb666600bf6487025de2ff03b11a/torch-2.12.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:10ee1448a9f304d3b987eb4656f664ba6e4d7b410ca7a5a7c642199777a2cf88", size = 88319556, upload-time = "2026-05-13T14:54:05.574Z" }, - { url = "https://files.pythonhosted.org/packages/b7/07/055d06d985b445d67422d25b033c11cf55bbb81785d4c4e68e28bca5820e/torch-2.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:af68dbf403439cae9ceaeaaf92f8352b460787dcd27b92aa05c40dd4a19c0f1e", size = 426397656, upload-time = "2026-05-13T14:52:38.84Z" }, - { url = "https://files.pythonhosted.org/packages/43/94/b0b4fdc3014122e0a7302fb90086d352aa48f2576f0b252561ebb38c01a8/torch-2.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:a6a2eebb237d3b1d9ad3b378e86d9b9e0782afdea8b1e0eba6a13646b9b49c07", size = 532183124, upload-time = "2026-05-13T14:53:16.178Z" }, - { url = "https://files.pythonhosted.org/packages/d8/c8/052405e6ad05d3237bfe5a4df78f917773956f8e17813a2d44c059068b74/torch-2.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2140e373e9a51a3e22ef62e8d14366d0b470d18f0adf19fdc757368077133a34", size = 123232462, upload-time = "2026-05-13T14:52:27.26Z" }, - { url = "https://files.pythonhosted.org/packages/67/dc/ac069f8d6e8be701535921141055293b0d4819d3d7f224a4612cf157c7f9/torch-2.12.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7dfae4a519197dfa050e98d8e36378a0fb5899625a875c2b54445005a2e404e", size = 88027282, upload-time = "2026-05-13T14:53:05.258Z" }, - { url = "https://files.pythonhosted.org/packages/33/c3/1c1eb00e34555b536dddf792676026a988d710ed36981aa00499b36b0620/torch-2.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:891c769072637c74e9a5a77a3bc782894696d8ffec83b938df8536dee7f0ba78", size = 426386961, upload-time = "2026-05-13T14:51:28.406Z" }, - { url = "https://files.pythonhosted.org/packages/cd/d4/7e730dba0c7032a4154dc9056b76cf9625515e030e269cfbf8098fcfee7d/torch-2.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:e2ad3eb85d39c3cab62dfa93ed5a73516e6a53c6713cb97d004004fe089f0f1f", size = 532272265, upload-time = "2026-05-13T14:51:59.308Z" }, - { url = "https://files.pythonhosted.org/packages/f1/b4/92c80d1bbfee1c0036c06d1d2155a3065bd2423134c83bf8a47e65cd6b9b/torch-2.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:c66696857e987efb8bc1777a37357ec4f60ab5e8af6250b83d6034437fa2d8f3", size = 122987138, upload-time = "2026-05-13T14:51:45.942Z" }, - { url = "https://files.pythonhosted.org/packages/7b/78/2e12b37ce50a19a037d7bc62d652a5a8f27385a7b05859d6bc9204f20cfe/torch-2.12.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:b4556715c8572758625d62b6e0ae3b1f76c440221913a6fb5e100f321fb4fb02", size = 88320100, upload-time = "2026-05-13T14:51:39.955Z" }, - { url = "https://files.pythonhosted.org/packages/56/5e/83c450ec7b0bb40a7b74611c1b5440f9260e33c54c90d556fd4a1f0fd955/torch-2.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a43ac605a5e13116c72b64c359644cce0229f213dde48d2ae0ae5eb5becf7feb", size = 426391871, upload-time = "2026-05-13T14:52:14.989Z" }, - { url = "https://files.pythonhosted.org/packages/c9/e9/1a0b575d98d0afedd8f157d23fa3d2759421483660448e60d0a4b10b6daa/torch-2.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6a7512adfdd7f6732e40de1c620831e3c75b39b98cef60b11d0c5f0a76473ec5", size = 532192241, upload-time = "2026-05-13T14:51:07.795Z" }, - { url = "https://files.pythonhosted.org/packages/88/21/afadd25ecd81b3cea1e11c73cf1ab41a983a50271548c3ec7ec3b9efc3e9/torch-2.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:5f96b63f8287f66a005dd1b5a6abba2920f11156c5e5c4d815f3e2050fd1aa16", size = 123231092, upload-time = "2026-05-13T14:51:18.854Z" }, -] - -[[package]] -name = "tqdm" -version = "4.68.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/85/05/0d5260f1f1ca784f4a4a0def9cbe6affe587f5b4025328d446c3d67765f4/tqdm-4.68.2.tar.gz", hash = "sha256:89c230e8dbc67c7615c142487111222f878c77427ea09549960f62389e258add", size = 171923, upload-time = "2026-06-09T13:26:42.539Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/75/1a0392bcc21c44dcdf87b3cf2d137e7829be2c083a1e38d44efca3d57a16/tqdm-4.68.2-py3-none-any.whl", hash = "sha256:d4240441fb5353290b87d6a85968c9decc131a99b8c7faa28269d829de669ede", size = 78578, upload-time = "2026-06-09T13:26:40.731Z" }, -] - -[[package]] -name = "transformers" -version = "5.10.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "huggingface-hub" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "packaging" }, - { name = "pyyaml" }, - { name = "regex" }, - { name = "safetensors" }, - { name = "tokenizers" }, - { name = "tqdm" }, - { name = "typer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8d/38/d5f978bd5091019e89aef29b9a831f5cd70f2598963a3ead8b9570cab592/transformers-5.10.2.tar.gz", hash = "sha256:f9a44b9c8ca9ab1156b467f574d832ea066284299c2fd0ed84641ccb592751fc", size = 8799687, upload-time = "2026-06-04T18:43:49.119Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/73/6f/e1564b0cc182afa05e219a8e09a8e770ffaab879b6b824b56c819bd221da/transformers-5.10.2-py3-none-any.whl", hash = "sha256:8a669db546f82c7c3618cb46ceb0f0afd89292bc70f319c058f8332ec63e268d", size = 11003830, upload-time = "2026-06-04T18:43:45.303Z" }, -] - -[[package]] -name = "triton" -version = "3.7.0" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/97/dcd1f2a0f8336691bff74abc59b2ed9c69a0c0f8f65cd77109c49e05f068/triton-3.7.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223ac302091491436c248a34ee1e6c47a1026486579103c906ffd805be50cb89", size = 188367104, upload-time = "2026-05-07T19:04:56.68Z" }, - { url = "https://files.pythonhosted.org/packages/b2/c0/c2ac4fd2d8809b7579d4a820a0f9e5de62a9bc8a757ed4b3abf4f7ee964a/triton-3.7.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c631b65668d4951213b948a413c0564184305b77bb45cc9d686d3e1ecc4701a3", size = 201313191, upload-time = "2026-05-07T18:45:58.444Z" }, - { url = "https://files.pythonhosted.org/packages/b8/c1/5d842314bb6c78442cc60437928781701c6050b8d479bc2a1aed691d37ca/triton-3.7.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9e71fc392675fac364e0ecf4ef3f76f85b7f5433a16f4c3c5fe5f05a52c85fe", size = 188480277, upload-time = "2026-05-07T19:05:03.231Z" }, - { url = "https://files.pythonhosted.org/packages/13/31/8315ea5f8dd18e60970b3022e3a8b93fd37e0b784fbbef86e10c8e6e5ca1/triton-3.7.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22bacffce443f54593dd20f05294d5a40622e0ea9ab632816f87154504356221", size = 201415942, upload-time = "2026-05-07T18:46:06.479Z" }, - { url = "https://files.pythonhosted.org/packages/f7/13/ec05adfcd87311d532ba61e3af143e8be59fcd26675884c4682841406a20/triton-3.7.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4bf49b00a7a377a68a6da603a876e797614e6455a80e9021669c476a953ad9a", size = 188505104, upload-time = "2026-05-07T19:05:09.843Z" }, - { url = "https://files.pythonhosted.org/packages/62/7b/468a576e35beef1426e0828e28e9ba9e65f5474d496f16ee126c15646324/triton-3.7.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8f111161d49bf903c0eaedde3962353a3d841c08a836839b7cc1025b8426efcf", size = 201457567, upload-time = "2026-05-07T18:46:13.505Z" }, - { url = "https://files.pythonhosted.org/packages/01/e1/a59a583de59b8f62c495d67c80ee3ea97d09e91ac80c4c6e76456ed8d8ac/triton-3.7.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:abdf6beaa89b1bcfb9a43cd990536ce66091a997841a4814b260b7bee4c88c3c", size = 188503209, upload-time = "2026-05-07T19:05:17.935Z" }, - { url = "https://files.pythonhosted.org/packages/30/b1/b7507bb9815d403927c8dd51d4158ed2e11751a92dbc118a044f247b6848/triton-3.7.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a35d7afe3f3f058e7ec49fcce09794049e0ffc5c59019ac25ec3413741b8c4e7", size = 201453566, upload-time = "2026-05-07T18:46:20.427Z" }, - { url = "https://files.pythonhosted.org/packages/a6/8f/0bea7a6a0c989315c9135a1d7fb37e41905cfb3a17cbc1f10044ebd4cc3a/triton-3.7.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc1d61c172d257db80ddf42595131fb196ad2e9bdd751e90fe2ef13531734e8b", size = 188612899, upload-time = "2026-05-07T19:05:24.955Z" }, - { url = "https://files.pythonhosted.org/packages/e1/02/d96f57828d0912aec733b9bc7e0e7dbfd2c6f079a8fa433ac25cb93d1a30/triton-3.7.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70fb9bbdc9f400afc54bbf6eb2670af28829a6ae3996863317964783141daf56", size = 201553816, upload-time = "2026-05-07T18:46:27.49Z" }, - { url = "https://files.pythonhosted.org/packages/40/fb/82a802dac4689f2a2fb2e69302e6a138eecc3e175bbe976ba3cfc717683a/triton-3.7.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4a44a8476d0d3571eac4e4d1048e1ff75aad81a09ff4602ccfc56c6dea1672e", size = 188507879, upload-time = "2026-05-07T19:05:32.209Z" }, - { url = "https://files.pythonhosted.org/packages/8f/af/9904ec6d3c93d9b24e5ec360445bbdf758b7f00bfbeedb89cb0eb64eb8bb/triton-3.7.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b9b85e72968a9d8bba5ddb24e9b64aaabaf48affb042f2755cb7cfa92b7531ce", size = 201460637, upload-time = "2026-05-07T18:46:34.749Z" }, - { url = "https://files.pythonhosted.org/packages/a1/f9/4835a8ea746b88727d8899f4e3ccce4f9cacb38abfc3bb0a638266c53111/triton-3.7.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18a160de426fd99f92b0baf509045360afbd3bfaa0b4a5171dde800ec9f09684", size = 188608706, upload-time = "2026-05-07T19:05:39.218Z" }, - { url = "https://files.pythonhosted.org/packages/c1/68/fa86e5a39608000f645535b2c124920126327ab731f8c4fafd5b07ff8d4b/triton-3.7.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce061073102714b725f3660ec6939d94a1da7984b3aa99c921417cae273672f5", size = 201546766, upload-time = "2026-05-07T18:46:42.088Z" }, -] - -[[package]] -name = "typer" -version = "0.26.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "annotated-doc" }, - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "rich" }, - { name = "shellingham" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5e/ed/ef06584ccdd5c410df0837951ecd7e15d9a6144ea1bd4c73cecab1a89891/typer-0.26.7.tar.gz", hash = "sha256:e314a34c617e419c091b2830dda3ea1f257134ff593061a8f5b9717ab8dddb3a", size = 201709, upload-time = "2026-06-03T07:18:06.843Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/24/25/2201973529af2c954de0bb725323c3aaed6d7f0ceee8f550dec9185df013/typer-0.26.7-py3-none-any.whl", hash = "sha256:5c87cfbc5d34491c5346ebf49c23e18d56ccb863268d3a8d592b26087c2f5e58", size = 122456, upload-time = "2026-06-03T07:18:05.732Z" }, -] - [[package]] name = "types-pyyaml" version = "6.0.12.20260518" From 21eba99e08c6b7a5c1113ea0d4e00cb4b3791f87 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:19:45 -0500 Subject: [PATCH 276/287] fix: report connector cli entrypoint errors --- src/extended_data/connectors/cli.py | 3 ++- tests/connectors/test_cli.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/extended_data/connectors/cli.py b/src/extended_data/connectors/cli.py index f087f1c..162df52 100644 --- a/src/extended_data/connectors/cli.py +++ b/src/extended_data/connectors/cli.py @@ -339,7 +339,8 @@ def main(argv: Sequence[str] | None = None) -> int: return args.func(args) except KeyboardInterrupt: return 130 - except Exception: + except Exception as e: + _write_stderr(str(e)) return 1 parser.print_help() diff --git a/tests/connectors/test_cli.py b/tests/connectors/test_cli.py index 483fe03..929016f 100644 --- a/tests/connectors/test_cli.py +++ b/tests/connectors/test_cli.py @@ -357,3 +357,19 @@ def test_cli_main_help() -> None: with pytest.raises(SystemExit) as exc: main() assert exc.value.code == 0 + + +def test_cli_main_reports_unexpected_command_errors() -> None: + """Connector CLI entrypoint should not collapse unexpected failures silently.""" + with ( + patch("sys.argv", ["extended-data", "list"]), + patch("extended_data.connectors.cli.cmd_list", side_effect=RuntimeError("failed password=hunter2")), + patch("sys.stderr.write") as mock_write, + ): + exit_code = main() + + assert exit_code == 1 + output = mock_write.call_args.args[0] + assert "failed" in output + assert "hunter2" not in output + assert "password=[REDACTED]" in output From fed17a9eaf0e93037f76d902420db01f5db9f9f5 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:27:26 -0500 Subject: [PATCH 277/287] feat: align meshy logging with package logger --- docs/package-surface.md | 3 + src/extended_data/connectors/meshy/README.md | 6 ++ .../connectors/meshy/__init__.py | 8 +-- src/extended_data/connectors/meshy/logging.py | 58 ++++++++++++------- tests/connectors/meshy/test_meshy_logging.py | 45 ++++++++++++++ 5 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 tests/connectors/meshy/test_meshy_logging.py diff --git a/docs/package-surface.md b/docs/package-surface.md index 6be69ea..97cd5a2 100644 --- a/docs/package-surface.md +++ b/docs/package-surface.md @@ -377,6 +377,9 @@ same boundary, so connector-local reads do not grow private JSON parsers. AWS S3 JSON object writes and Meshy manifest writes go through the shared export boundary, so connector persistence uses the same Tier 3 data-file encoding path; Meshy vector-store metadata follows the same path. +Meshy logging helpers return `extended_data.logging.Logging` instances with a +Meshy storage marker instead of configuring global Python logging or importing a +connector-local logging stack at module import time. CLI JSON output, MCP tool results, and SecretSync `results_json` are exported through the same path after redaction. GitHub workflow YAML generation and `Logging.exit_run()` stdout serialization diff --git a/src/extended_data/connectors/meshy/README.md b/src/extended_data/connectors/meshy/README.md index 99f3b57..cf8fb3b 100644 --- a/src/extended_data/connectors/meshy/README.md +++ b/src/extended_data/connectors/meshy/README.md @@ -50,11 +50,17 @@ modules from `extended_data.connectors.meshy`. ```python from extended_data import ConnectorFabric +from extended_data.connectors.meshy import create_meshy_logger fabric = ConnectorFabric(inputs={"MESHY_API_KEY": "..."}, from_environment=False) meshy = fabric.get_connector("meshy") +logger = create_meshy_logger(default_storage_marker="asset-generation") ``` +Meshy logging helpers return the same `extended_data.logging.Logging` type as +the rest of the package; they do not configure global Python logging at import +time. + ## Job Orchestration ```python diff --git a/src/extended_data/connectors/meshy/__init__.py b/src/extended_data/connectors/meshy/__init__.py index 988830d..30512f7 100644 --- a/src/extended_data/connectors/meshy/__init__.py +++ b/src/extended_data/connectors/meshy/__init__.py @@ -40,6 +40,7 @@ from extended_data.connectors.meshy import animate, base, image3d, retexture, rigging, text3d from extended_data.connectors.meshy.base import MeshyAPIError, RateLimitError from extended_data.connectors.meshy.connector import MeshyConnector +from extended_data.connectors.meshy.logging import MESHY_LOGGER_NAME, MESHY_STORAGE_MARKER, create_meshy_logger from extended_data.connectors.meshy.tools import ( get_crewai_tools, get_langchain_tools, @@ -49,18 +50,17 @@ __all__ = [ - # Errors + "MESHY_LOGGER_NAME", + "MESHY_STORAGE_MARKER", "MeshyAPIError", - # Connector "MeshyConnector", "RateLimitError", - # API modules (functional interface) "animate", "base", + "create_meshy_logger", "get_crewai_tools", "get_langchain_tools", "get_strands_tools", - # Tools "get_tools", "image3d", "retexture", diff --git a/src/extended_data/connectors/meshy/logging.py b/src/extended_data/connectors/meshy/logging.py index a603d4d..fc6b434 100644 --- a/src/extended_data/connectors/meshy/logging.py +++ b/src/extended_data/connectors/meshy/logging.py @@ -1,33 +1,47 @@ -"""Rich logging configuration for Meshy SDK.""" +"""Meshy logging helpers backed by Extended Data lifecycle logging.""" from __future__ import annotations -import logging +from collections.abc import Sequence -from rich.logging import RichHandler +from extended_data.logging import Logging +from extended_data.logging.const import VERBOSITY +from extended_data.logging.utils import get_log_level -def setup_logging(level: str = "INFO") -> logging.Logger: - """Configure Rich logging with proper exception handling. +MESHY_LOGGER_NAME = "extended_data.connectors.meshy" +MESHY_STORAGE_MARKER = "meshy" - Args: - level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) - Returns: - Configured logger instance +def create_meshy_logger( + *, + level: int | str = "INFO", + logger_name: str = MESHY_LOGGER_NAME, + enable_console: bool = False, + enable_file: bool = False, + log_file_name: str | None = None, + default_storage_marker: str | None = MESHY_STORAGE_MARKER, + allowed_levels: Sequence[str] | None = None, + denied_levels: Sequence[str] | None = None, + enable_verbose_output: bool = False, + verbosity_threshold: int = VERBOSITY, +) -> Logging: + """Create an Extended Data logger configured for Meshy workflows. + + The helper intentionally avoids import-time side effects and global + ``logging.basicConfig`` changes. Callers opt into console or file output + the same way they do with the package-level ``Logging`` surface. """ - logging.basicConfig( - level=level, - format="%(message)s", - datefmt="[%X]", - handlers=[RichHandler(rich_tracebacks=True, markup=False, show_path=True)], + logger = Logging( + enable_console=enable_console, + enable_file=enable_file, + logger_name=logger_name, + log_file_name=log_file_name, + default_storage_marker=default_storage_marker, + allowed_levels=allowed_levels, + denied_levels=denied_levels, + enable_verbose_output=enable_verbose_output, + verbosity_threshold=verbosity_threshold, ) - - logger = logging.getLogger("meshy") - logger.setLevel(level) - + logger.logger.setLevel(get_log_level(level)) return logger - - -# Global logger instance -logger = setup_logging() diff --git a/tests/connectors/meshy/test_meshy_logging.py b/tests/connectors/meshy/test_meshy_logging.py new file mode 100644 index 0000000..133d233 --- /dev/null +++ b/tests/connectors/meshy/test_meshy_logging.py @@ -0,0 +1,45 @@ +"""Tests for Meshy logging helpers.""" + +from __future__ import annotations + +import logging + +from extended_data.connectors.meshy import MESHY_LOGGER_NAME, MESHY_STORAGE_MARKER, create_meshy_logger +from extended_data.logging import Logging + + +def test_create_meshy_logger_returns_extended_data_logger() -> None: + """Meshy logging should use the package lifecycle logging surface.""" + logger = create_meshy_logger(level="WARNING") + + assert isinstance(logger, Logging) + assert logger.logger.name == MESHY_LOGGER_NAME + assert logger.logger.level == logging.WARNING + assert logger.enable_console is False + assert logger.enable_file is False + assert logger.default_storage_marker == MESHY_STORAGE_MARKER + + +def test_create_meshy_logger_uses_tier2_storage_and_redaction() -> None: + """Meshy logging should keep stored connector messages promoted and redacted.""" + logger = create_meshy_logger( + level="INFO", + default_storage_marker="asset-generation", + allowed_levels=["info"], + ) + + result = logger.logged_statement( + "Meshy request failed with Authorization: Bearer raw_token", + json_data={"api_key": "key_123", "task_id": "task_456"}, + log_level="info", + ) + + assert result is not None + assert "raw_token" not in result + assert "key_123" not in result + stored = logger.get_stored_messages("asset-generation") + assert len(stored) == 1 + stored_message = next(iter(stored)) + assert "raw_token" not in stored_message + assert "key_123" not in stored_message + assert "task_456" in stored_message From 5300ba264744f968f5203d5c0c5014088067cce9 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:35:03 -0500 Subject: [PATCH 278/287] fix: resolve MCP schemas from type hints --- src/extended_data/connectors/mcp.py | 14 ++-- tests/connectors/test_mcp.py | 99 +++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 5 deletions(-) diff --git a/src/extended_data/connectors/mcp.py b/src/extended_data/connectors/mcp.py index a10e739..f706adc 100644 --- a/src/extended_data/connectors/mcp.py +++ b/src/extended_data/connectors/mcp.py @@ -25,7 +25,7 @@ import sys from collections.abc import Callable, Iterable, Mapping -from typing import Any, cast +from typing import Any, cast, get_origin, get_type_hints from extended_data.connectors.registry import ( _list_connector_classes, @@ -58,6 +58,10 @@ def _check_mcp_installed() -> bool: def _get_method_schema(method: Callable[..., Any]) -> dict[str, Any]: """Generate JSON schema from method signature.""" sig = inspect.signature(method) + try: + type_hints = get_type_hints(method) + except Exception: + type_hints = {} properties = {} required = [] @@ -68,17 +72,17 @@ def _get_method_schema(method: Callable[..., Any]) -> dict[str, Any]: prop: dict[str, Any] = {"type": "string"} # Default # Try to get type from annotations - if param.annotation != inspect.Parameter.empty: - ann = param.annotation + ann = type_hints.get(name, param.annotation) + if ann != inspect.Parameter.empty: if ann is int: prop = {"type": "integer"} elif ann is float: prop = {"type": "number"} elif ann is bool: prop = {"type": "boolean"} - elif ann is list or (hasattr(ann, "__origin__") and ann.__origin__ is list): + elif ann is list or get_origin(ann) is list: prop = {"type": "array"} - elif ann is dict or (hasattr(ann, "__origin__") and ann.__origin__ is dict): + elif ann is dict or get_origin(ann) is dict: prop = {"type": "object"} # Get description from docstring if available diff --git a/tests/connectors/test_mcp.py b/tests/connectors/test_mcp.py index ba26b87..119d619 100644 --- a/tests/connectors/test_mcp.py +++ b/tests/connectors/test_mcp.py @@ -2,6 +2,8 @@ from __future__ import annotations +import json + from unittest.mock import patch import pytest @@ -20,6 +22,14 @@ from extended_data.containers import ExtendedDict, ExtendedList, ExtendedSet +class ExampleMCPConnector: + """Tiny connector shell for MCP handler tests.""" + + def fetch(self, enabled: bool = False, count: int = 0) -> ExtendedDict: + """Fetch example MCP data.""" + return ExtendedDict({"enabled": enabled, "count": count, "password": "hunter2"}) + + def test_create_server() -> None: """Test that the MCP server can be created and has tools.""" pytest.importorskip("mcp") @@ -195,3 +205,92 @@ def test_unknown_tool_text_redacts_sensitive_tool_names() -> None: assert "hunter2" not in text assert "raw_token" not in text assert "[REDACTED]" in text + + +@pytest.mark.asyncio +async def test_create_server_registered_list_tools_handler_exposes_catalog_and_methods() -> None: + """The registered MCP list-tools handler should expose catalog and connector tools.""" + mcp_types = pytest.importorskip("mcp.types") + + with patch("extended_data.connectors.mcp._list_connector_classes", return_value={"example": ExampleMCPConnector}): + server = create_server() + + result = await server.request_handlers[mcp_types.ListToolsRequest](mcp_types.ListToolsRequest()) + tools = {tool.name: tool for tool in result.root.tools} + + assert "extended_data_get_connector_info" in tools + assert tools["extended_data_get_connector_info"].inputSchema["required"] == ["name"] + assert "example_fetch" in tools + assert tools["example_fetch"].description == "Fetch example MCP data." + assert tools["example_fetch"].inputSchema["properties"]["enabled"]["type"] == "boolean" + assert tools["example_fetch"].inputSchema["properties"]["count"]["type"] == "integer" + + +@pytest.mark.asyncio +async def test_create_server_registered_catalog_call_handler_uses_shared_result_boundary() -> None: + """The registered MCP call handler should serialize catalog tool results.""" + mcp_types = pytest.importorskip("mcp.types") + server = create_server() + await server.request_handlers[mcp_types.ListToolsRequest](mcp_types.ListToolsRequest()) + + result = await server.request_handlers[mcp_types.CallToolRequest]( + mcp_types.CallToolRequest( + params=mcp_types.CallToolRequestParams( + name="extended_data_get_connector_info", + arguments={"name": "github"}, + ) + ) + ) + + payload = json.loads(result.root.content[0].text) + assert payload["name"] == "github" + assert payload["category"] == "development" + assert "repositories" in payload["capabilities"] + + +@pytest.mark.asyncio +async def test_create_server_registered_connector_call_handler_redacts_payloads() -> None: + """The registered MCP call handler should dispatch connector methods and redact results.""" + mcp_types = pytest.importorskip("mcp.types") + connector = ExampleMCPConnector() + + with ( + patch("extended_data.connectors.mcp._list_connector_classes", return_value={"example": ExampleMCPConnector}), + patch("extended_data.connectors.mcp.get_connector", return_value=connector) as mock_get_connector, + ): + server = create_server() + await server.request_handlers[mcp_types.ListToolsRequest](mcp_types.ListToolsRequest()) + result = await server.request_handlers[mcp_types.CallToolRequest]( + mcp_types.CallToolRequest( + params=mcp_types.CallToolRequestParams( + name="example_fetch", + arguments={"enabled": True, "count": 3}, + ) + ) + ) + + mock_get_connector.assert_called_once_with("example") + payload = json.loads(result.root.content[0].text) + assert payload == {"enabled": True, "count": 3, "password": "[REDACTED]"} + + +@pytest.mark.asyncio +async def test_create_server_registered_call_handler_redacts_unknown_tools() -> None: + """The registered MCP call handler should sanitize unknown tool diagnostics.""" + mcp_types = pytest.importorskip("mcp.types") + server = create_server() + await server.request_handlers[mcp_types.ListToolsRequest](mcp_types.ListToolsRequest()) + + result = await server.request_handlers[mcp_types.CallToolRequest]( + mcp_types.CallToolRequest( + params=mcp_types.CallToolRequestParams( + name="password=hunter2 Authorization: Bearer raw_token", + arguments={}, + ) + ) + ) + + text = result.root.content[0].text + assert "hunter2" not in text + assert "raw_token" not in text + assert "Unknown tool: password=[REDACTED]" in text From 6d5af83330e313166520a9646513e4c9ecdbc6b4 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:41:15 -0500 Subject: [PATCH 279/287] fix: harden meshy MCP error redaction --- src/extended_data/connectors/meshy/README.md | 7 + src/extended_data/connectors/meshy/mcp.py | 11 +- tests/connectors/meshy/test_meshy_mcp.py | 135 ++++++++++++++++++- 3 files changed, 147 insertions(+), 6 deletions(-) diff --git a/src/extended_data/connectors/meshy/README.md b/src/extended_data/connectors/meshy/README.md index cf8fb3b..dc7c5c6 100644 --- a/src/extended_data/connectors/meshy/README.md +++ b/src/extended_data/connectors/meshy/README.md @@ -11,6 +11,13 @@ adapters, and an MCP server. pip install "extended-data[meshy]" ``` +Install the MCP extra too when running `meshy-mcp` or wiring Meshy tools into an +MCP client: + +```bash +pip install "extended-data[meshy,mcp]" +``` + Use the `vector` extra only when you need local vector search over generated asset metadata: diff --git a/src/extended_data/connectors/meshy/mcp.py b/src/extended_data/connectors/meshy/mcp.py index ab479fc..6129547 100644 --- a/src/extended_data/connectors/meshy/mcp.py +++ b/src/extended_data/connectors/meshy/mcp.py @@ -268,9 +268,9 @@ def _jsonable_tool_result(result: Any) -> Any: return redact_sensitive_data(result) -def _tool_error_payload(error: object) -> dict[str, str]: +def _tool_error_payload(error: object, *, values: Iterable[Any] | None = None) -> dict[str, str]: """Return an MCP-safe error payload without raw secret values.""" - return {"error": redact_sensitive_text(error)} + return {"error": redact_sensitive_text(error, values=values)} def _tool_payload_text(payload: Any) -> str: @@ -314,9 +314,10 @@ async def list_tools() -> list[Any]: # Handle tool calls @call_decorator() - async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]: + async def call_tool(name: str, arguments: dict[str, Any] | None) -> list[Any]: from mcp.types import TextContent + tool_arguments = arguments or {} handler = tool_handlers.get(name) if not handler: return [ @@ -327,13 +328,13 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]: ] try: - result = handler(**arguments) + result = handler(**tool_arguments) return [TextContent(type="text", text=_tool_result_text(result))] except Exception as e: return [ TextContent( type="text", - text=_tool_payload_text(_tool_error_payload(e)), + text=_tool_payload_text(_tool_error_payload(e, values=tool_arguments.values())), ) ] diff --git a/tests/connectors/meshy/test_meshy_mcp.py b/tests/connectors/meshy/test_meshy_mcp.py index 0941f32..6b93a25 100644 --- a/tests/connectors/meshy/test_meshy_mcp.py +++ b/tests/connectors/meshy/test_meshy_mcp.py @@ -2,10 +2,19 @@ from __future__ import annotations +import json + from unittest.mock import patch +import pytest + from extended_data.connectors.meshy import mcp as meshy_mcp_module -from extended_data.connectors.meshy.mcp import _jsonable_tool_result, _tool_error_payload, _tool_result_text +from extended_data.connectors.meshy.mcp import ( + _jsonable_tool_result, + _tool_error_payload, + _tool_result_text, + create_server, +) from extended_data.containers import ExtendedDict, ExtendedSet @@ -60,3 +69,127 @@ def test_meshy_mcp_error_payload_redacts_unknown_tool_names() -> None: assert "hunter2" not in payload["error"] assert "raw_token" not in payload["error"] assert "[REDACTED]" in payload["error"] + + +def test_meshy_mcp_error_payload_redacts_argument_values() -> None: + """Meshy MCP errors should redact operation-specific argument values.""" + payload = _tool_error_payload( + RuntimeError("failed for user@example.com"), + values=["user@example.com"], + ) + + assert "user@example.com" not in payload["error"] + assert "[REDACTED]" in payload["error"] + + +@pytest.mark.asyncio +async def test_create_server_registered_list_tools_handler_exposes_meshy_tools() -> None: + """The registered Meshy MCP list-tools handler should expose expected schemas.""" + mcp_types = pytest.importorskip("mcp.types") + + server = create_server() + result = await server.request_handlers[mcp_types.ListToolsRequest](mcp_types.ListToolsRequest()) + tools = {tool.name: tool for tool in result.root.tools} + + assert "text3d_generate" in tools + assert tools["text3d_generate"].inputSchema["required"] == ["prompt"] + assert tools["text3d_generate"].inputSchema["properties"]["enable_pbr"]["type"] == "boolean" + assert "check_task_status" in tools + assert tools["check_task_status"].inputSchema["properties"]["task_type"]["default"] == "text-to-3d" + + +@pytest.mark.asyncio +async def test_create_server_registered_call_handler_redacts_payloads() -> None: + """The registered Meshy MCP call handler should serialize and redact tool results.""" + mcp_types = pytest.importorskip("mcp.types") + + def fake_tool(enabled: bool = False) -> ExtendedDict: + return ExtendedDict({"enabled": enabled, "password": "hunter2"}) + + tool = mcp_types.Tool( + name="fake_meshy_tool", + description="Fake Meshy tool.", + inputSchema={ + "type": "object", + "properties": {"enabled": {"type": "boolean", "default": False}}, + "required": [], + }, + ) + + with patch("extended_data.connectors.meshy.mcp._create_mcp_tools", return_value=[(tool, fake_tool)]): + server = create_server() + await server.request_handlers[mcp_types.ListToolsRequest](mcp_types.ListToolsRequest()) + result = await server.request_handlers[mcp_types.CallToolRequest]( + mcp_types.CallToolRequest( + params=mcp_types.CallToolRequestParams( + name="fake_meshy_tool", + arguments={"enabled": True}, + ) + ) + ) + + assert json.loads(result.root.content[0].text) == {"enabled": True, "password": "[REDACTED]"} + + +@pytest.mark.asyncio +async def test_create_server_registered_call_handler_redacts_error_argument_values() -> None: + """The registered Meshy MCP call handler should redact operation-specific error values.""" + mcp_types = pytest.importorskip("mcp.types") + + def fake_tool(email: str) -> None: + raise RuntimeError(f"failed for {email} with api_key=key_123") + + tool = mcp_types.Tool( + name="fake_meshy_tool", + description="Fake Meshy tool.", + inputSchema={ + "type": "object", + "properties": {"email": {"type": "string"}}, + "required": ["email"], + }, + ) + + with patch("extended_data.connectors.meshy.mcp._create_mcp_tools", return_value=[(tool, fake_tool)]): + server = create_server() + await server.request_handlers[mcp_types.ListToolsRequest](mcp_types.ListToolsRequest()) + result = await server.request_handlers[mcp_types.CallToolRequest]( + mcp_types.CallToolRequest( + params=mcp_types.CallToolRequestParams( + name="fake_meshy_tool", + arguments={"email": "user@example.com"}, + ) + ) + ) + + payload = json.loads(result.root.content[0].text) + assert "user@example.com" not in payload["error"] + assert "key_123" not in payload["error"] + assert "[REDACTED]" in payload["error"] + + +@pytest.mark.asyncio +async def test_create_server_registered_call_handler_accepts_missing_arguments() -> None: + """The registered Meshy MCP call handler should treat omitted arguments as empty.""" + mcp_types = pytest.importorskip("mcp.types") + + def fake_tool() -> ExtendedDict: + return ExtendedDict({"status": "ok"}) + + tool = mcp_types.Tool( + name="fake_meshy_tool", + description="Fake Meshy tool.", + inputSchema={"type": "object", "properties": {}, "required": []}, + ) + + with patch("extended_data.connectors.meshy.mcp._create_mcp_tools", return_value=[(tool, fake_tool)]): + server = create_server() + await server.request_handlers[mcp_types.ListToolsRequest](mcp_types.ListToolsRequest()) + result = await server.request_handlers[mcp_types.CallToolRequest]( + mcp_types.CallToolRequest( + params=mcp_types.CallToolRequestParams( + name="fake_meshy_tool", + ) + ) + ) + + assert json.loads(result.root.content[0].text) == {"status": "ok"} From e6a414232c21104ec55d71d670a2c81d5feef1b0 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:47:23 -0500 Subject: [PATCH 280/287] test: expand google workspace coverage --- tests/connectors/test_google_workspace.py | 328 ++++++++++++++++++++++ 1 file changed, 328 insertions(+) diff --git a/tests/connectors/test_google_workspace.py b/tests/connectors/test_google_workspace.py index 21d2587..adfd08b 100644 --- a/tests/connectors/test_google_workspace.py +++ b/tests/connectors/test_google_workspace.py @@ -19,6 +19,15 @@ def _logged_text(logger: MagicMock) -> str: return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) +def _http_error(status: int): + """Return a Google API HttpError with the requested status.""" + from googleapiclient.errors import HttpError + + response = MagicMock() + response.status = status + return HttpError(response, b"Google API error") + + @pytest.fixture def google_connector(): """Create Google connector with mocked services.""" @@ -95,6 +104,34 @@ def test_list_users_pagination(self, google_connector): assert len(result) == 2 assert mock_users.list.return_value.execute.call_count == 2 + def test_list_workspace_users_unhumps_and_uses_subject(self, google_connector): + """Legacy Workspace user listing should still promote and unhump payloads.""" + mock_service = MagicMock() + mock_users = mock_service.users.return_value + mock_users.list.return_value.execute.side_effect = [ + { + "users": [{"primaryEmail": "user1@example.com", "orgUnitPath": "/Engineering"}], + "nextPageToken": "next", + }, + {"users": [{"primaryEmail": "user2@example.com", "orgUnitPath": "/Sales"}]}, + ] + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + result = google_connector.list_workspace_users( + domain="example.com", + max_results=100, + unhump_users=True, + subject="admin@example.com", + ) + + assert isinstance(result, ExtendedList) + assert result[0]["primary_email"] == "user1@example.com" + assert result[0]["org_unit_path"] == "/Engineering" + google_connector.get_admin_directory_service.assert_called_once_with(subject="admin@example.com") + first_call, second_call = mock_users.list.call_args_list + assert first_call.kwargs == {"customer": "my_customer", "maxResults": 100, "domain": "example.com"} + assert second_call.kwargs["pageToken"] == "next" + def test_get_user(self, google_connector): """Test getting a specific user.""" mock_service = MagicMock() @@ -205,6 +242,87 @@ def test_delete_user(self, google_connector): mock_users.delete.assert_called_once_with(userKey="user1@example.com") + def test_create_or_update_user_returns_existing_when_updates_disabled(self, google_connector): + """Idempotent user creation should return existing users without mutation by default.""" + mock_service = MagicMock() + mock_users = mock_service.users.return_value + mock_users.get.return_value.execute.return_value = {"primaryEmail": "existing@example.com"} + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + result = google_connector.create_or_update_user( + primary_email="existing@example.com", + given_name="Existing", + family_name="User", + password="SecurePass123!", + ) + + assert isinstance(result, ExtendedDict) + assert result["primaryEmail"] == "existing@example.com" + mock_users.update.assert_not_called() + mock_users.insert.assert_not_called() + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "existing@example.com" not in logs + + def test_create_or_update_user_updates_existing_with_builtin_body(self, google_connector): + """Idempotent user creation should lower extended update payloads before SDK calls.""" + mock_service = MagicMock() + mock_users = mock_service.users.return_value + mock_users.get.return_value.execute.return_value = {"primaryEmail": "existing@example.com"} + mock_users.update.return_value.execute.return_value = {"primaryEmail": "existing@example.com", "updated": True} + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + result = google_connector.create_or_update_user( + primary_email="existing@example.com", + given_name="Existing", + family_name="User", + password="SecurePass123!", + update_if_exists=True, + customSchemas=extend_data({"HR": {"level": "5"}}), + ) + + assert isinstance(result, ExtendedDict) + assert result["updated"] is True + mock_users.insert.assert_not_called() + body = mock_users.update.call_args.kwargs["body"] + assert isinstance(body, dict) + assert isinstance(body["customSchemas"], dict) + assert body["customSchemas"] == {"HR": {"level": "5"}} + + def test_create_or_update_user_creates_when_missing(self, google_connector): + """Idempotent user creation should insert when the user is not found.""" + mock_service = MagicMock() + mock_users = mock_service.users.return_value + mock_users.get.return_value.execute.side_effect = _http_error(404) + mock_users.insert.return_value.execute.return_value = {"primaryEmail": "newuser@example.com"} + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + result = google_connector.create_or_update_user( + primary_email="newuser@example.com", + given_name="New", + family_name="User", + password="SecurePass123!", + ) + + assert isinstance(result, ExtendedDict) + assert result["primaryEmail"] == "newuser@example.com" + mock_users.insert.assert_called_once() + + def test_create_or_update_user_reraises_non_not_found_errors(self, google_connector): + """Idempotent user creation should not mask unexpected Google API errors.""" + mock_service = MagicMock() + mock_users = mock_service.users.return_value + mock_users.get.return_value.execute.side_effect = _http_error(403) + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + with pytest.raises(Exception, match="Google API error"): + google_connector.create_or_update_user( + primary_email="blocked@example.com", + given_name="Blocked", + family_name="User", + password="SecurePass123!", + ) + class TestWorkspaceGroups: """Tests for Workspace group operations.""" @@ -244,6 +362,33 @@ def test_list_groups_with_domain(self, google_connector): call_args = mock_groups.list.call_args[1] assert call_args["domain"] == "example.com" + def test_list_workspace_groups_unhumps_and_uses_subject(self, google_connector): + """Legacy Workspace group listing should still promote and unhump payloads.""" + mock_service = MagicMock() + mock_groups = mock_service.groups.return_value + mock_groups.list.return_value.execute.side_effect = [ + { + "groups": [{"email": "group1@example.com", "directMembersCount": "5"}], + "nextPageToken": "next", + }, + {"groups": [{"email": "group2@example.com", "directMembersCount": "2"}]}, + ] + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + result = google_connector.list_workspace_groups( + domain="example.com", + max_results=50, + unhump_groups=True, + subject="admin@example.com", + ) + + assert isinstance(result, ExtendedList) + assert result[0]["direct_members_count"] == "5" + google_connector.get_admin_directory_service.assert_called_once_with(subject="admin@example.com") + first_call, second_call = mock_groups.list.call_args_list + assert first_call.kwargs == {"customer": "my_customer", "maxResults": 50, "domain": "example.com"} + assert second_call.kwargs["pageToken"] == "next" + def test_get_group(self, google_connector): """Test getting a specific group.""" mock_service = MagicMock() @@ -363,6 +508,75 @@ def test_remove_group_member(self, google_connector): mock_members.delete.assert_called_once() + def test_create_or_update_group_returns_existing_when_updates_disabled(self, google_connector): + """Idempotent group creation should return existing groups without mutation by default.""" + mock_service = MagicMock() + mock_groups = mock_service.groups.return_value + mock_groups.get.return_value.execute.return_value = {"email": "existing-group@example.com"} + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + result = google_connector.create_or_update_group( + email="existing-group@example.com", + name="Existing Group", + ) + + assert isinstance(result, ExtendedDict) + assert result["email"] == "existing-group@example.com" + mock_groups.update.assert_not_called() + mock_groups.insert.assert_not_called() + + def test_create_or_update_group_updates_existing_with_additional_fields(self, google_connector): + """Idempotent group creation should lower extended group payloads before SDK calls.""" + mock_service = MagicMock() + mock_groups = mock_service.groups.return_value + mock_groups.get.return_value.execute.return_value = {"email": "existing-group@example.com"} + mock_groups.update.return_value.execute.return_value = {"email": "existing-group@example.com", "updated": True} + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + result = google_connector.create_or_update_group( + email="existing-group@example.com", + name="Existing Group", + update_if_exists=True, + settings=extend_data({"whoCanPostMessage": "ALL_MEMBERS_CAN_POST"}), + ) + + assert isinstance(result, ExtendedDict) + assert result["updated"] is True + mock_groups.insert.assert_not_called() + body = mock_groups.update.call_args.kwargs["body"] + assert isinstance(body, dict) + assert body["settings"] == {"whoCanPostMessage": "ALL_MEMBERS_CAN_POST"} + + def test_create_or_update_group_creates_when_missing(self, google_connector): + """Idempotent group creation should insert when the group is not found.""" + mock_service = MagicMock() + mock_groups = mock_service.groups.return_value + mock_groups.get.return_value.execute.side_effect = _http_error(404) + mock_groups.insert.return_value.execute.return_value = {"email": "newgroup@example.com"} + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + result = google_connector.create_or_update_group( + email="newgroup@example.com", + name="New Group", + ) + + assert isinstance(result, ExtendedDict) + assert result["email"] == "newgroup@example.com" + mock_groups.insert.assert_called_once() + + def test_create_or_update_group_reraises_non_not_found_errors(self, google_connector): + """Idempotent group creation should not mask unexpected Google API errors.""" + mock_service = MagicMock() + mock_groups = mock_service.groups.return_value + mock_groups.get.return_value.execute.side_effect = _http_error(403) + google_connector.get_admin_directory_service = MagicMock(return_value=mock_service) + + with pytest.raises(Exception, match="Google API error"): + google_connector.create_or_update_group( + email="blocked-group@example.com", + name="Blocked Group", + ) + class TestWorkspaceOrgUnits: """Tests for Workspace organizational unit operations.""" @@ -386,3 +600,117 @@ def test_list_org_units(self, google_connector): assert isinstance(result[0]["name"], ExtendedString) assert len(result) == 2 assert result[0]["name"] == "Engineering" + + +class TestWorkspaceLicenses: + """Tests for Workspace license operations.""" + + def test_list_available_licenses_uses_delegated_credentials_and_paginates(self, google_connector): + """License listing should use the licensing scope, subject delegation, and pagination.""" + credentials = MagicMock(name="credentials") + delegated_credentials = MagicMock(name="delegated_credentials") + credentials.with_subject.return_value = delegated_credentials + mock_service = MagicMock() + mock_assignments = mock_service.licenseAssignments.return_value + mock_assignments.listForProduct.return_value.execute.side_effect = [ + { + "items": [{"skuId": "sku-1", "userId": "user1@example.com"}], + "nextPageToken": "next", + }, + {"items": [{"skuId": "sku-2", "userId": "user2@example.com"}]}, + ] + + with ( + patch("google.oauth2.service_account.Credentials.from_service_account_info", return_value=credentials) as from_info, + patch("googleapiclient.discovery.build", return_value=mock_service) as build, + ): + result = google_connector.list_available_licenses( + customer_id="customer-1", + product_id="Google-Apps", + subject="admin@example.com", + ) + + assert isinstance(result, ExtendedList) + assert isinstance(result[0], ExtendedDict) + assert result[0]["productId"] == "Google-Apps" + assert result[1]["skuId"] == "sku-2" + from_info.assert_called_once_with( + google_connector.service_account_info, + scopes=["https://www.googleapis.com/auth/apps.licensing"], + ) + credentials.with_subject.assert_called_once_with("admin@example.com") + build.assert_called_once_with( + "licensing", + "v1", + credentials=delegated_credentials, + cache_discovery=False, + ) + first_call, second_call = mock_assignments.listForProduct.call_args_list + assert first_call.kwargs == {"productId": "Google-Apps", "customerId": "customer-1"} + assert second_call.kwargs["pageToken"] == "next" + + def test_list_available_licenses_ignores_unavailable_products(self, google_connector): + """Unavailable or forbidden products should not fail broad license discovery.""" + credentials = MagicMock(name="credentials") + mock_service = MagicMock() + mock_assignments = mock_service.licenseAssignments.return_value + mock_assignments.listForProduct.return_value.execute.side_effect = [ + _http_error(404), + _http_error(403), + {"items": [{"skuId": "sku-1"}]}, + {"items": []}, + {"items": []}, + {"items": []}, + ] + + with ( + patch("google.oauth2.service_account.Credentials.from_service_account_info", return_value=credentials), + patch("googleapiclient.discovery.build", return_value=mock_service), + ): + result = google_connector.list_available_licenses() + + assert isinstance(result, ExtendedList) + assert result[0]["productId"] == "101034" + assert mock_assignments.listForProduct.call_count == 6 + + def test_list_available_licenses_logs_unexpected_product_errors(self, google_connector): + """Unexpected product errors should be logged and redacted without aborting discovery.""" + credentials = MagicMock(name="credentials") + mock_service = MagicMock() + mock_assignments = mock_service.licenseAssignments.return_value + mock_assignments.listForProduct.return_value.execute.side_effect = [_http_error(500)] + + with ( + patch("google.oauth2.service_account.Credentials.from_service_account_info", return_value=credentials), + patch("googleapiclient.discovery.build", return_value=mock_service), + ): + result = google_connector.list_available_licenses(product_id="private-product@example.com") + + assert result == [] + logs = _logged_text(google_connector.logger) + assert "[REDACTED]" in logs + assert "private-product@example.com" not in logs + + def test_get_license_summary_counts_assigned_skus(self, google_connector): + """License summaries should aggregate promoted license payloads by product and SKU.""" + google_connector.list_available_licenses = MagicMock( + return_value=extend_data( + [ + {"productId": "Google-Apps", "skuId": "sku-1"}, + {"productId": "Google-Apps", "skuId": "sku-1"}, + {"productId": "Google-Vault", "skuId": "sku-2"}, + {"skuId": "sku-unknown"}, + ] + ) + ) + + result = google_connector.get_license_summary(customer_id="customer-1", subject="admin@example.com") + + assert isinstance(result, ExtendedDict) + assert result["Google-Apps/sku-1"]["assigned"] == 2 + assert result["Google-Vault/sku-2"]["assigned"] == 1 + assert result["unknown/sku-unknown"]["assigned"] == 1 + google_connector.list_available_licenses.assert_called_once_with( + customer_id="customer-1", + subject="admin@example.com", + ) From 6720d19ea68415b6e6dd34f9ff7f3bfa2145dc4c Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:52:35 -0500 Subject: [PATCH 281/287] test: cover meshy task helper flows --- tests/connectors/meshy/test_task_ids.py | 230 +++++++++++++++++++++++- 1 file changed, 229 insertions(+), 1 deletion(-) diff --git a/tests/connectors/meshy/test_task_ids.py b/tests/connectors/meshy/test_task_ids.py index 0572a44..9c39b2d 100644 --- a/tests/connectors/meshy/test_task_ids.py +++ b/tests/connectors/meshy/test_task_ids.py @@ -4,7 +4,7 @@ import json -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, call, patch import httpx import pytest @@ -12,9 +12,11 @@ from extended_data.connectors.meshy import animate, image3d, retexture, rigging, text3d from extended_data.connectors.meshy.models import ( AnimationRequest, + ArtStyle, Image3DRequest, RetextureRequest, RiggingRequest, + TaskStatus, Text3DRequest, ) from extended_data.containers import ExtendedDict, ExtendedString @@ -83,6 +85,200 @@ def test_retexture_task_id_is_extended_string() -> None: assert created == "retexture-task" +def test_text3d_generate_builds_request_and_waits() -> None: + """Text generation should build the preview request and poll the created task.""" + completed = ExtendedDict({"id": "text-task", "status": TaskStatus.SUCCEEDED}) + + with ( + patch("extended_data.connectors.meshy.text3d.create", return_value=ExtendedString("text-task")) as create, + patch("extended_data.connectors.meshy.text3d.poll", return_value=completed) as poll, + ): + result = text3d.generate( + "a low-poly castle", + art_style="sculpture", + negative_prompt="blurry", + target_polycount=1234, + enable_pbr=False, + ) + + assert result is completed + create.assert_called_once() + request = create.call_args.args[0] + assert isinstance(request, Text3DRequest) + assert request.mode == "preview" + assert request.prompt == "a low-poly castle" + assert request.art_style == ArtStyle.SCULPTURE + assert request.negative_prompt == "blurry" + assert request.target_polycount == 1234 + assert request.enable_pbr is False + poll.assert_called_once_with("text-task") + + +def test_text3d_generate_without_wait_returns_task_id() -> None: + """Text generation should expose submitted task IDs without polling when wait is disabled.""" + with ( + patch("extended_data.connectors.meshy.text3d.create", return_value=ExtendedString("text-task")) as create, + patch("extended_data.connectors.meshy.text3d.poll") as poll, + ): + result = text3d.generate("a low-poly crate", wait=False) + + assert isinstance(result, ExtendedString) + assert result == "text-task" + create.assert_called_once() + poll.assert_not_called() + + +def test_image3d_generate_builds_request_and_waits() -> None: + """Image generation should build the preview request and poll the created task.""" + completed = ExtendedDict({"id": "image-task", "status": TaskStatus.SUCCEEDED}) + + with ( + patch("extended_data.connectors.meshy.image3d.create", return_value=ExtendedString("image-task")) as create, + patch("extended_data.connectors.meshy.image3d.poll", return_value=completed) as poll, + ): + result = image3d.generate( + "https://example.com/source.png", + topology="quad", + target_polycount=4321, + enable_pbr=False, + ) + + assert result is completed + create.assert_called_once() + request = create.call_args.args[0] + assert isinstance(request, Image3DRequest) + assert request.mode == "preview" + assert request.image_url == "https://example.com/source.png" + assert request.topology == "quad" + assert request.target_polycount == 4321 + assert request.enable_pbr is False + poll.assert_called_once_with("image-task") + + +def test_animation_apply_builds_request_and_waits() -> None: + """Animation application should build the animation request and poll the created task.""" + completed = ExtendedDict({"id": "animation-task", "status": TaskStatus.SUCCEEDED}) + + with ( + patch("extended_data.connectors.meshy.animate.create", return_value=ExtendedString("animation-task")) as create, + patch("extended_data.connectors.meshy.animate.poll", return_value=completed) as poll, + ): + result = animate.apply("rig-task", 42, loop=False, frame_rate=24) + + assert result is completed + create.assert_called_once() + request = create.call_args.args[0] + assert isinstance(request, AnimationRequest) + assert request.rig_task_id == "rig-task" + assert request.action_id == 42 + assert request.loop is False + assert request.frame_rate == 24 + poll.assert_called_once_with("animation-task") + + +def test_rigging_helpers_build_task_and_url_requests() -> None: + """Rigging helpers should build task-id and model-url request payloads.""" + with ( + patch("extended_data.connectors.meshy.rigging.create", return_value=ExtendedString("rig-task")) as create, + patch("extended_data.connectors.meshy.rigging.poll") as poll, + ): + task_result = rigging.rig("model-task", height_meters=1.9, wait=False) + url_result = rigging.rig_from_url( + "https://example.com/model.glb", + height_meters=1.8, + texture_url="https://example.com/texture.png", + wait=False, + ) + + assert task_result == "rig-task" + assert url_result == "rig-task" + task_request, url_request = [call.args[0] for call in create.call_args_list] + assert isinstance(task_request, RiggingRequest) + assert task_request.input_task_id == "model-task" + assert task_request.height_meters == 1.9 + assert isinstance(url_request, RiggingRequest) + assert url_request.model_url == "https://example.com/model.glb" + assert url_request.texture_image_url == "https://example.com/texture.png" + assert url_request.height_meters == 1.8 + poll.assert_not_called() + + +def test_rigging_helpers_poll_when_wait_enabled() -> None: + """Rigging helpers should poll created task IDs when wait is enabled.""" + task_completed = ExtendedDict({"id": "rig-task", "status": TaskStatus.SUCCEEDED}) + url_completed = ExtendedDict({"id": "url-rig-task", "status": TaskStatus.SUCCEEDED}) + + with ( + patch( + "extended_data.connectors.meshy.rigging.create", + side_effect=[ExtendedString("rig-task"), ExtendedString("url-rig-task")], + ), + patch("extended_data.connectors.meshy.rigging.poll", side_effect=[task_completed, url_completed]) as poll, + ): + task_result = rigging.rig("model-task") + url_result = rigging.rig_from_url("https://example.com/model.glb") + + assert task_result is task_completed + assert url_result is url_completed + assert poll.call_args_list == [call("rig-task"), call("url-rig-task")] + + +def test_retexture_helpers_build_text_and_image_style_requests() -> None: + """Retexture helpers should build text-prompt and image-reference request payloads.""" + with ( + patch("extended_data.connectors.meshy.retexture.create", return_value=ExtendedString("retexture-task")) as create, + patch("extended_data.connectors.meshy.retexture.poll") as poll, + ): + text_result = retexture.apply( + "model-task", + "gold leaf", + enable_original_uv=False, + enable_pbr=False, + wait=False, + ) + image_result = retexture.apply_from_image( + "model-task", + "https://example.com/style.png", + enable_original_uv=True, + enable_pbr=True, + wait=False, + ) + + assert text_result == "retexture-task" + assert image_result == "retexture-task" + text_request, image_request = [call.args[0] for call in create.call_args_list] + assert isinstance(text_request, RetextureRequest) + assert text_request.input_task_id == "model-task" + assert text_request.text_style_prompt == "gold leaf" + assert text_request.enable_original_uv is False + assert text_request.enable_pbr is False + assert isinstance(image_request, RetextureRequest) + assert image_request.image_style_url == "https://example.com/style.png" + assert image_request.enable_original_uv is True + assert image_request.enable_pbr is True + poll.assert_not_called() + + +def test_retexture_helpers_poll_when_wait_enabled() -> None: + """Retexture helpers should poll created task IDs when wait is enabled.""" + text_completed = ExtendedDict({"id": "retexture-task", "status": TaskStatus.SUCCEEDED}) + image_completed = ExtendedDict({"id": "image-retexture-task", "status": TaskStatus.SUCCEEDED}) + + with ( + patch( + "extended_data.connectors.meshy.retexture.create", + side_effect=[ExtendedString("retexture-task"), ExtendedString("image-retexture-task")], + ), + patch("extended_data.connectors.meshy.retexture.poll", side_effect=[text_completed, image_completed]) as poll, + ): + text_result = retexture.apply("model-task", "gold leaf") + image_result = retexture.apply_from_image("model-task", "https://example.com/style.png") + + assert text_result is text_completed + assert image_result is image_completed + assert poll.call_args_list == [call("retexture-task"), call("image-retexture-task")] + + @pytest.mark.parametrize( ("request_path", "call"), [ @@ -241,6 +437,17 @@ def test_meshy_get_responses_redact_validation_failures(request_path: str, call) assert "[REDACTED]" in message +@pytest.mark.parametrize("module", [text3d, image3d, retexture, rigging, animate]) +def test_meshy_poll_returns_succeeded_tasks(monkeypatch: pytest.MonkeyPatch, module: object) -> None: + """All Meshy polling helpers should return succeeded task payloads directly.""" + completed = ExtendedDict({"id": "task-123", "status": TaskStatus.SUCCEEDED}) + monkeypatch.setattr(module, "get", lambda task_id: completed) + + result = module.poll("task-123", interval=0, timeout=1) + + assert result is completed + + @pytest.mark.parametrize("module", [text3d, image3d, retexture, rigging, animate]) def test_meshy_poll_redacts_failed_task_errors(monkeypatch: pytest.MonkeyPatch, module: object) -> None: """All Meshy polling helpers should redact vendor task failure messages.""" @@ -265,6 +472,27 @@ def test_meshy_poll_redacts_failed_task_errors(monkeypatch: pytest.MonkeyPatch, assert "[REDACTED]" in message +@pytest.mark.parametrize("module", [text3d, image3d, retexture, rigging, animate]) +def test_meshy_poll_raises_for_expired_tasks(monkeypatch: pytest.MonkeyPatch, module: object) -> None: + """All Meshy polling helpers should fail loudly when tasks expire.""" + monkeypatch.setattr(module, "get", lambda task_id: {"id": task_id, "status": TaskStatus.EXPIRED}) + + with pytest.raises(RuntimeError, match="Task expired"): + module.poll("task-expired", interval=0, timeout=1) + + +@pytest.mark.parametrize("module", [text3d, image3d, retexture, rigging, animate]) +def test_meshy_poll_times_out_pending_tasks(monkeypatch: pytest.MonkeyPatch, module: object) -> None: + """All Meshy polling helpers should time out pending tasks.""" + times = iter([0.0, 2.0]) + monkeypatch.setattr(module, "get", lambda task_id: {"id": task_id, "status": TaskStatus.PENDING}) + monkeypatch.setattr(module.time, "time", lambda: next(times)) + monkeypatch.setattr(module.time, "sleep", MagicMock()) + + with pytest.raises(TimeoutError, match="Task timed out after 1s"): + module.poll("task-pending", interval=0, timeout=1) + + @pytest.mark.parametrize("payload", [{"result": ""}, {"result": 123}, ["not", "a", "mapping"]]) def test_meshy_task_id_response_requires_non_empty_string_result(payload: object) -> None: """Task ids are string API handles, not arbitrary JSON payload values.""" From 747f73ed166bb350e8ee5330e72313d73fadf488 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 00:57:47 -0500 Subject: [PATCH 282/287] test: expand google billing coverage --- tests/connectors/test_google_billing.py | 255 +++++++++++++++++++++++- 1 file changed, 253 insertions(+), 2 deletions(-) diff --git a/tests/connectors/test_google_billing.py b/tests/connectors/test_google_billing.py index 117f20b..2c1dfab 100644 --- a/tests/connectors/test_google_billing.py +++ b/tests/connectors/test_google_billing.py @@ -6,7 +6,7 @@ from collections import deque from collections.abc import Iterable from typing import Any -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest @@ -22,6 +22,15 @@ def _logged_text(logger: MagicMock) -> str: return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) +def _http_error(status: int): + """Return a Google API HttpError with the requested status.""" + from googleapiclient.errors import HttpError + + response = MagicMock() + response.status = status + return HttpError(response, b"Google API error") + + class _ImmediateResponse: def __init__(self, response: dict[str, Any]): self._response = response @@ -84,9 +93,16 @@ def projects(self): class _TestGoogleBilling(GoogleBillingMixin): - def __init__(self, service: _StubBillingService): + def __init__(self, service: Any): self.logger = MagicMock() self._service = service + self.service_account_info = { + "type": "service_account", + "client_email": "test@example.iam.gserviceaccount.com", + "private_key": "-----BEGIN RSA PRIVATE KEY-----\nMIIE...test\n-----END RSA PRIVATE KEY-----\n", + "private_key_id": "key123", + "project_id": "test-project", + } def get_billing_service(self): return self._service @@ -128,6 +144,84 @@ def test_list_billing_accounts_paginates_and_unhumps(): ] +def test_get_billing_account_prefixes_name_and_promotes_result(): + service = MagicMock() + service.billingAccounts.return_value.get.return_value.execute.return_value = { + "name": "billingAccounts/1234-ABCD", + "displayName": "Primary", + } + connector = _TestGoogleBilling(service) + + account = connector.get_billing_account("1234-ABCD") + + assert isinstance(account, ExtendedDict) + assert account["name"] == "billingAccounts/1234-ABCD" + assert isinstance(account["displayName"], ExtendedString) + service.billingAccounts.return_value.get.assert_called_once_with(name="billingAccounts/1234-ABCD") + + +def test_get_billing_account_accepts_prefixed_name(): + service = MagicMock() + service.billingAccounts.return_value.get.return_value.execute.return_value = { + "name": "billingAccounts/1234-ABCD", + } + connector = _TestGoogleBilling(service) + + connector.get_billing_account("billingAccounts/1234-ABCD") + + service.billingAccounts.return_value.get.assert_called_once_with(name="billingAccounts/1234-ABCD") + + +def test_get_billing_account_returns_none_for_not_found(): + service = MagicMock() + service.billingAccounts.return_value.get.return_value.execute.side_effect = _http_error(404) + connector = _TestGoogleBilling(service) + + account = connector.get_billing_account("private-account@example.com") + + assert account is None + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private-account@example.com" not in logs + + +def test_get_billing_account_reraises_unexpected_errors(): + service = MagicMock() + service.billingAccounts.return_value.get.return_value.execute.side_effect = _http_error(403) + connector = _TestGoogleBilling(service) + + with pytest.raises(Exception, match="Google API error"): + connector.get_billing_account("1234-ABCD") + + +def test_get_project_billing_info_promotes_result(): + service = MagicMock() + service.projects.return_value.getBillingInfo.return_value.execute.return_value = { + "name": "projects/demo-project/billingInfo", + "billingEnabled": True, + } + connector = _TestGoogleBilling(service) + + info = connector.get_project_billing_info("demo-project") + + assert isinstance(info, ExtendedDict) + assert info["billingEnabled"] is True + service.projects.return_value.getBillingInfo.assert_called_once_with(name="projects/demo-project") + + +def test_get_project_billing_info_returns_none_for_not_found(): + service = MagicMock() + service.projects.return_value.getBillingInfo.return_value.execute.side_effect = _http_error(404) + connector = _TestGoogleBilling(service) + + info = connector.get_project_billing_info("secret-project@example.com") + + assert info is None + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "secret-project@example.com" not in logs + + def test_update_project_billing_info_prefixes_account_name(): service = _StubBillingService(account_responses=[], project_responses=[]) connector = _TestGoogleBilling(service) @@ -203,3 +297,160 @@ def test_list_billing_account_projects_handles_prefixing(): {"name": "billingAccounts/123456-AAAA"}, {"name": "billingAccounts/123456-AAAA", "pageToken": "p1"}, ] + + +def test_get_billing_account_iam_policy_prefixes_resource(): + service = MagicMock() + service.billingAccounts.return_value.getIamPolicy.return_value.execute.return_value = { + "bindings": [{"role": "roles/billing.viewer"}], + } + connector = _TestGoogleBilling(service) + + policy = connector.get_billing_account_iam_policy("123456-AAAA") + + assert isinstance(policy, ExtendedDict) + assert policy["bindings"][0]["role"] == "roles/billing.viewer" + service.billingAccounts.return_value.getIamPolicy.assert_called_once_with(resource="billingAccounts/123456-AAAA") + + +def test_set_billing_account_iam_policy_lowers_extended_policy(): + service = MagicMock() + service.billingAccounts.return_value.setIamPolicy.return_value.execute.return_value = { + "bindings": [{"role": "roles/billing.admin"}], + } + connector = _TestGoogleBilling(service) + + policy = connector.set_billing_account_iam_policy( + "123456-AAAA", + extend_data({"bindings": [{"role": "roles/billing.admin"}]}), + ) + + assert isinstance(policy, ExtendedDict) + service.billingAccounts.return_value.setIamPolicy.assert_called_once_with( + resource="billingAccounts/123456-AAAA", + body={"policy": {"bindings": [{"role": "roles/billing.admin"}]}}, + ) + + +def test_get_bigquery_billing_dataset_filters_billing_tables(): + service = _StubBillingService(account_responses=[], project_responses=[]) + connector = _TestGoogleBilling(service) + credentials = MagicMock(name="credentials") + bigquery = MagicMock() + bigquery.datasets.return_value.get.return_value.execute.return_value = { + "datasetReference": {"datasetId": "billing_export"}, + "location": "US", + "description": "Billing export", + } + bigquery.tables.return_value.list.return_value.execute.return_value = { + "tables": [ + {"tableReference": {"tableId": "gcp_billing_export_v1_123"}}, + {"tableReference": {"tableId": "not_billing"}}, + ] + } + + with ( + patch("google.oauth2.service_account.Credentials.from_service_account_info", return_value=credentials) as from_info, + patch("googleapiclient.discovery.build", return_value=bigquery) as build, + ): + result = connector.get_bigquery_billing_dataset("billing-project", "billing_export") + + assert isinstance(result, ExtendedDict) + assert result["location"] == "US" + assert len(result["billing_tables"]) == 1 + assert result["billing_tables"][0]["tableReference"]["tableId"] == "gcp_billing_export_v1_123" + from_info.assert_called_once_with( + connector.service_account_info, + scopes=["https://www.googleapis.com/auth/bigquery.readonly"], + ) + build.assert_called_once_with("bigquery", "v2", credentials=credentials, cache_discovery=False) + + +def test_get_bigquery_billing_dataset_returns_none_for_missing_dataset(): + service = _StubBillingService(account_responses=[], project_responses=[]) + connector = _TestGoogleBilling(service) + credentials = MagicMock(name="credentials") + bigquery = MagicMock() + bigquery.datasets.return_value.get.return_value.execute.side_effect = _http_error(404) + + with ( + patch("google.oauth2.service_account.Credentials.from_service_account_info", return_value=credentials), + patch("googleapiclient.discovery.build", return_value=bigquery), + ): + result = connector.get_bigquery_billing_dataset("secret-project@example.com", "billing@example.com") + + assert result is None + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "secret-project@example.com" not in logs + assert "billing@example.com" not in logs + + +def test_setup_billing_export_returns_existing_dataset_config(): + service = _StubBillingService(account_responses=[], project_responses=[]) + connector = _TestGoogleBilling(service) + credentials = MagicMock(name="credentials") + bigquery = MagicMock() + bigquery.datasets.return_value.get.return_value.execute.return_value = {"location": "EU"} + + with ( + patch("google.oauth2.service_account.Credentials.from_service_account_info", return_value=credentials) as from_info, + patch("googleapiclient.discovery.build", return_value=bigquery) as build, + ): + result = connector.setup_billing_export( + "123456-AAAA", + "billing-project", + dataset_id="billing_export", + location="EU", + ) + + assert isinstance(result, ExtendedDict) + assert result["billing_account_id"] == "123456-AAAA" + assert result["project_id"] == "billing-project" + assert result["dataset_id"] == "billing_export" + assert result["location"] == "EU" + assert result["full_dataset_id"] == "billing-project.billing_export" + bigquery.datasets.return_value.insert.assert_not_called() + from_info.assert_called_once_with( + connector.service_account_info, + scopes=["https://www.googleapis.com/auth/bigquery"], + ) + build.assert_called_once_with("bigquery", "v2", credentials=credentials, cache_discovery=False) + + +def test_setup_billing_export_creates_missing_dataset(): + service = _StubBillingService(account_responses=[], project_responses=[]) + connector = _TestGoogleBilling(service) + credentials = MagicMock(name="credentials") + bigquery = MagicMock() + bigquery.datasets.return_value.get.return_value.execute.side_effect = _http_error(404) + bigquery.datasets.return_value.insert.return_value.execute.return_value = {"location": "US"} + + with ( + patch("google.oauth2.service_account.Credentials.from_service_account_info", return_value=credentials), + patch("googleapiclient.discovery.build", return_value=bigquery), + ): + result = connector.setup_billing_export("123456-AAAA", "billing-project") + + assert isinstance(result, ExtendedDict) + assert result["location"] == "US" + insert_call = bigquery.datasets.return_value.insert.call_args + assert insert_call.kwargs["projectId"] == "billing-project" + body = insert_call.kwargs["body"] + assert body["datasetReference"] == {"projectId": "billing-project", "datasetId": "billing_export"} + assert body["labels"]["billing_account"] == "123456_AAAA" + + +def test_setup_billing_export_reraises_unexpected_dataset_errors(): + service = _StubBillingService(account_responses=[], project_responses=[]) + connector = _TestGoogleBilling(service) + credentials = MagicMock(name="credentials") + bigquery = MagicMock() + bigquery.datasets.return_value.get.return_value.execute.side_effect = _http_error(403) + + with ( + patch("google.oauth2.service_account.Credentials.from_service_account_info", return_value=credentials), + patch("googleapiclient.discovery.build", return_value=bigquery), + ): + with pytest.raises(Exception, match="Google API error"): + connector.setup_billing_export("123456-AAAA", "billing-project") From 74eb508dd08ecc82db870752b9bc7018db004f5d Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 01:08:31 -0500 Subject: [PATCH 283/287] test: expand github connector coverage --- .../test_github_payload_contract.py | 613 +++++++++++++++++- tests/connectors/test_github_tools.py | 127 ++++ 2 files changed, 734 insertions(+), 6 deletions(-) diff --git a/tests/connectors/test_github_payload_contract.py b/tests/connectors/test_github_payload_contract.py index 28dcce7..ab1c5cb 100644 --- a/tests/connectors/test_github_payload_contract.py +++ b/tests/connectors/test_github_payload_contract.py @@ -3,12 +3,13 @@ from __future__ import annotations from unittest.mock import MagicMock +from unittest.mock import call as mock_call import pytest import extended_data.connectors.github as github_module -from extended_data.connectors.github import GitHubConnector, GitHubFallbackError +from extended_data.connectors.github import GitHubConnector, GitHubFallbackError, build_github_actions_workflow from extended_data.containers import ExtendedDict, ExtendedList, ExtendedString, ExtendedTuple @@ -31,12 +32,57 @@ def _logged_text(logger: MagicMock) -> str: messages: list[str] = [] for method_name in ("debug", "info", "warning", "error", "exception"): method = getattr(logger, method_name) - for call in method.call_args_list: - messages.extend(str(arg) for arg in call.args) - messages.extend(str(value) for value in call.kwargs.values()) + for log_call in method.call_args_list: + messages.extend(str(arg) for arg in log_call.args) + messages.extend(str(value) for value in log_call.kwargs.values()) return "\n".join(messages) +def _member(login: str, *, member_id: int = 1) -> MagicMock: + member = MagicMock() + member.id = member_id + member.login = login + member.name = login.title() + member.email = f"{login}@example.com" + member.avatar_url = f"https://github.com/{login}.png" + member.html_url = f"https://github.com/{login}" + return member + + +def _repo(name: str) -> MagicMock: + repo = MagicMock() + repo.id = 1 + repo.name = name + repo.full_name = f"test-org/{name}" + repo.description = f"{name} repository" + repo.private = False + repo.archived = False + repo.default_branch = "main" + repo.html_url = f"https://github.com/test-org/{name}" + repo.clone_url = f"https://github.com/test-org/{name}.git" + repo.ssh_url = f"git@github.com:test-org/{name}.git" + repo.language = "Python" + repo.topics = ["data", "connector"] + repo.created_at = None + repo.updated_at = None + repo.pushed_at = None + return repo + + +def _team(slug: str) -> MagicMock: + team = MagicMock() + team.id = 1 + team.name = slug.replace("-", " ").title() + team.slug = slug + team.description = f"{slug} team" + team.privacy = "closed" + team.permission = "push" + team.html_url = f"https://github.com/orgs/test-org/teams/{slug}" + team.members_count = 1 + team.repos_count = 1 + return team + + def test_repository_file_decodes_into_extended_payload_with_metadata() -> None: """Decoded repository files should enter the Tier 2 fabric immediately.""" connector = _connector() @@ -55,6 +101,204 @@ def test_repository_file_decodes_into_extended_payload_with_metadata() -> None: assert result[1:] == ("abc123", "service.json") +def test_get_repository_file_returns_raw_text_when_decode_disabled() -> None: + """Raw repository reads should preserve text content and optional metadata.""" + connector = _connector() + mock_file = MagicMock() + mock_file.decoded_content = b"raw text" + mock_file.sha = "abc123" + mock_file.content = "raw text" + connector.repo.get_contents.return_value = mock_file + + result = connector.get_repository_file("README.md", decode=False, return_sha=True) + + assert isinstance(result, ExtendedTuple) + assert result == ("raw text", "abc123") + connector.repo.get_contents.assert_called_once_with("README.md", ref="main") + + +def test_get_repository_file_missing_can_raise_redacted_not_found(monkeypatch: pytest.MonkeyPatch) -> None: + """Missing repository files should raise on demand without leaking caller paths.""" + monkeypatch.setattr(github_module, "UnknownObjectException", GitHubFallbackError) + connector = _connector() + connector.repo.get_contents.side_effect = GitHubFallbackError("missing private/path.json") + + with pytest.raises(FileNotFoundError) as exc_info: + connector.get_repository_file("private/path.json", raise_on_not_found=True) + + message = str(exc_info.value) + logs = _logged_text(connector.logger) + assert "[REDACTED]" in message + assert "[REDACTED]" in logs + assert "private/path.json" not in message + assert "private/path.json" not in logs + + +def test_get_repository_file_without_repo_returns_none_and_redacts_log() -> None: + """Repository file reads should no-op when no repository is configured.""" + connector = _connector() + connector.repo = None + + assert connector.get_repository_file("private/path.json") is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private/path.json" not in logs + + +def test_get_repository_file_empty_content_returns_default_payload() -> None: + """Empty repository files should return the default decoded payload shape.""" + connector = _connector() + mock_file = MagicMock() + mock_file.content = "" + mock_file.decoded_content = b"" + mock_file.sha = "empty-sha" + connector.repo.get_contents.return_value = mock_file + + result = connector.get_repository_file("empty.json", return_sha=True) + + assert isinstance(result, ExtendedTuple) + assert result == ({}, "empty-sha") + + +def test_get_repository_file_decode_failure_returns_raw_text_with_redacted_log() -> None: + """Decode failures should return raw text and avoid leaking repository paths.""" + connector = _connector() + mock_file = MagicMock() + mock_file.content = "not-json" + mock_file.decoded_content = b"{not-json" + mock_file.sha = "bad-json-sha" + connector.repo.get_contents.return_value = mock_file + + result = connector.get_repository_file("private/config.json") + + assert isinstance(result, ExtendedString) + assert result == "{not-json" + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private/config.json" not in logs + + +def test_get_repository_file_unsupported_read_returns_raw_default() -> None: + """Unsupported SDK content reads should return decoded defaults instead of crashing.""" + connector = _connector() + mock_file = MagicMock() + mock_file.content = "content" + mock_file.decoded_content.decode.side_effect = ValueError("unsupported private/path.bin") + mock_file.sha = "binary-sha" + connector.repo.get_contents.return_value = mock_file + + result = connector.get_repository_file("private/path.bin", return_sha=True) + + assert isinstance(result, ExtendedTuple) + assert result == ({}, "binary-sha") + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private/path.bin" not in logs + + +def test_update_repository_file_creates_missing_file_with_encoded_payload() -> None: + """Repository updates should create files when no current SHA can be found.""" + connector = _connector() + connector.get_repository_file = MagicMock(return_value=ExtendedString("")) + + result = connector.update_repository_file( + "config/service.json", + {"service": {"name": "api"}}, + allow_encoding="json", + ) + + assert result is connector.repo.create_file.return_value + connector.repo.create_file.assert_called_once() + kwargs = connector.repo.create_file.call_args.kwargs + assert kwargs["path"] == "config/service.json" + assert kwargs["message"] == "Creating config/service.json" + assert kwargs["branch"] == "main" + assert '"service"' in kwargs["content"] + connector.repo.update_file.assert_not_called() + + +def test_update_repository_file_rejects_empty_payloads_unless_allowed() -> None: + """Repository updates should not silently write empty payloads by default.""" + connector = _connector() + + result = connector.update_repository_file("empty.txt", "") + + assert result is None + connector.repo.create_file.assert_not_called() + connector.repo.update_file.assert_not_called() + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "empty.txt" not in logs + + +def test_update_repository_file_allows_empty_payloads_when_requested() -> None: + """Explicit empty writes should be allowed when allow_empty is true.""" + connector = _connector() + + result = connector.update_repository_file("empty.txt", "", file_sha="abc123", allow_empty=True, allow_encoding=False) + + assert result is connector.repo.update_file.return_value + connector.repo.update_file.assert_called_once_with( + path="empty.txt", + message="Updating empty.txt", + content="", + sha="abc123", + branch="main", + ) + + +def test_update_repository_file_without_repo_returns_none_and_redacts_log() -> None: + """Repository file updates should no-op when no repository is configured.""" + connector = _connector() + connector.repo = None + + assert connector.update_repository_file("private/path.json", {"x": 1}) is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private/path.json" not in logs + + +def test_delete_repository_file_deletes_when_sha_exists() -> None: + """Repository deletes should fetch the current SHA and call delete_file.""" + connector = _connector() + connector.get_repository_file = MagicMock(return_value=ExtendedTuple(("", "abc123"))) + + result = connector.delete_repository_file("config/service.json") + + assert result is connector.repo.delete_file.return_value + connector.get_repository_file.assert_called_once_with(file_path="config/service.json", return_sha=True) + connector.repo.delete_file.assert_called_once_with( + path="config/service.json", + message="Deleting config/service.json", + branch="main", + sha="abc123", + ) + + +def test_delete_repository_file_skips_when_sha_missing() -> None: + """Repository deletes should be no-ops when the current file cannot be resolved.""" + connector = _connector() + connector.get_repository_file = MagicMock(return_value=None) + + assert connector.delete_repository_file("missing.txt") is None + + connector.repo.delete_file.assert_not_called() + + +def test_delete_repository_file_without_repo_returns_none_and_redacts_log() -> None: + """Repository file deletes should no-op when no repository is configured.""" + connector = _connector() + connector.repo = None + + assert connector.delete_repository_file("private/path.json") is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private/path.json" not in logs + + def test_list_repositories_promotes_sdk_payloads() -> None: """Repository listing payloads should be extended containers, not raw dicts.""" connector = _connector() @@ -84,6 +328,251 @@ def test_list_repositories_promotes_sdk_payloads() -> None: assert result["api-service"]["name"].to_snake_case() == "api_service" +def test_create_repository_branch_uses_parent_sha() -> None: + """Branch creation should create Git refs from the selected parent branch SHA.""" + connector = _connector() + connector.repo.default_branch = "main" + parent = MagicMock() + parent.commit.sha = "parent-sha" + connector.get_repository_branch = MagicMock(return_value=parent) + + result = connector.create_repository_branch("feature/data") + + assert result is connector.repo.create_git_ref.return_value + connector.get_repository_branch.assert_called_once_with("main") + connector.repo.create_git_ref.assert_called_once_with(ref="refs/heads/feature/data", sha="parent-sha") + + +def test_get_repository_branch_without_repo_returns_none_and_redacts_log() -> None: + """Branch lookup should no-op when no repository is configured.""" + connector = _connector() + connector.repo = None + + assert connector.get_repository_branch("private-branch") is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private-branch" not in logs + + +def test_get_repository_branch_missing_returns_none(monkeypatch: pytest.MonkeyPatch) -> None: + """Missing branch lookup should return None and redact branch names.""" + monkeypatch.setattr(github_module, "UnknownObjectException", GitHubFallbackError) + connector = _connector() + connector.repo.get_branch.side_effect = GitHubFallbackError("missing private-branch") + + assert connector.get_repository_branch("private-branch") is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private-branch" not in logs + + +def test_create_repository_branch_without_repo_returns_none_and_redacts_log() -> None: + """Branch creation should no-op when no repository is configured.""" + connector = _connector() + connector.repo = None + + assert connector.create_repository_branch("private-branch") is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private-branch" not in logs + + +def test_create_repository_branch_returns_existing_branch_on_reference_exists(monkeypatch: pytest.MonkeyPatch) -> None: + """Existing branch creation should return the current branch instead of failing.""" + + class ReferenceExistsError(GitHubFallbackError): + data = {"message": "Reference already exists"} + + monkeypatch.setattr(github_module, "GithubException", GitHubFallbackError) + connector = _connector() + parent = MagicMock() + parent.commit.sha = "parent-sha" + existing = MagicMock() + connector.repo.default_branch = "main" + connector.repo.create_git_ref.side_effect = ReferenceExistsError("Reference already exists") + connector.get_repository_branch = MagicMock(side_effect=[parent, existing]) + + assert connector.create_repository_branch("feature/data") is existing + + assert connector.get_repository_branch.call_args_list == [mock_call("main"), mock_call("feature/data")] + + +def test_create_repository_branch_redacts_unexpected_errors(monkeypatch: pytest.MonkeyPatch) -> None: + """Unexpected branch creation errors should redact branch identifiers.""" + monkeypatch.setattr(github_module, "GithubException", GitHubFallbackError) + connector = _connector() + parent = MagicMock() + parent.commit.sha = "parent-sha" + connector.get_repository_branch = MagicMock(return_value=parent) + connector.repo.create_git_ref.side_effect = GitHubFallbackError("branch private-branch token=raw-token") + + with pytest.raises(RuntimeError) as exc_info: + connector.create_repository_branch("private-branch") + + message = str(exc_info.value) + assert "[REDACTED]" in message + assert "private-branch" not in message + assert "raw-token" not in message + + +def test_create_repository_branch_raises_when_parent_missing() -> None: + """Branch creation should fail loudly when the parent branch is missing.""" + connector = _connector() + connector.get_repository_branch = MagicMock(return_value=None) + + with pytest.raises(RuntimeError, match="parent branch"): + connector.create_repository_branch("feature/data", parent_branch="missing") + + +def test_list_org_members_includes_pending_invitations() -> None: + """Organization member lists should include active and pending members when requested.""" + connector = _connector() + active = _member("octocat", member_id=1) + membership = MagicMock() + membership.role = "admin" + membership.state = "active" + invite = MagicMock() + invite.id = 2 + invite.login = None + invite.email = "pending@example.com" + invite.role = "direct_member" + invite.created_at = None + connector.org.get_members.return_value = [active] + connector.org.get_user_membership.return_value = membership + connector.org.invitations.return_value = [invite] + + result = connector.list_org_members(role="admin", include_pending=True) + + assert isinstance(result, ExtendedDict) + assert result["octocat"]["role"] == "admin" + assert result["pending@example.com"]["state"] == "pending" + connector.org.get_members.assert_called_once_with(role="admin") + + +def test_get_org_member_returns_none_for_missing_user(monkeypatch: pytest.MonkeyPatch) -> None: + """Missing organization members should return None and redact diagnostics.""" + monkeypatch.setattr(github_module, "UnknownObjectException", GitHubFallbackError) + connector = _connector() + connector.git.get_user.side_effect = GitHubFallbackError("missing secret-user") + + assert connector.get_org_member("secret-user") is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "secret-user" not in logs + + +def test_list_repositories_includes_branch_payloads() -> None: + """Repository listings should optionally include promoted branch metadata.""" + connector = _connector() + repo = _repo("api-service") + branch = MagicMock() + branch.name = "main" + branch.protected = True + branch.commit.sha = "branch-sha" + repo.get_branches.return_value = [branch] + connector.org.get_repos.return_value = [repo] + + result = connector.list_repositories(type_filter="private", include_branches=True) + + assert isinstance(result["api-service"]["branches"], ExtendedList) + assert result["api-service"]["branches"][0]["name"] == "main" + assert result["api-service"]["branches"][0]["protected"] is True + assert result["api-service"]["branches"][0]["sha"] == "branch-sha" + connector.org.get_repos.assert_called_once_with(type="private") + + +def test_get_repository_returns_none_for_missing_repo(monkeypatch: pytest.MonkeyPatch) -> None: + """Missing repositories should return None and redact repo names.""" + monkeypatch.setattr(github_module, "UnknownObjectException", GitHubFallbackError) + connector = _connector() + connector.git.get_repo.side_effect = GitHubFallbackError("missing private-repo") + + assert connector.get_repository("private-repo") is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private-repo" not in logs + + +def test_list_teams_includes_members_and_repositories() -> None: + """Team lists should optionally include promoted member and repository details.""" + connector = _connector() + team = _team("data-team") + member = _member("octocat") + repo = _repo("api-service") + team.get_members.return_value = [member] + team.get_repos.return_value = [repo] + team.get_repo_permission.return_value = "admin" + connector.org.get_teams.return_value = [team] + + result = connector.list_teams(include_members=True, include_repos=True) + + assert isinstance(result, ExtendedDict) + assert result["data-team"]["members"][0]["login"] == "octocat" + assert result["data-team"]["repositories"][0]["permission"] == "admin" + assert isinstance(result["data-team"]["repositories"], ExtendedList) + + +def test_get_team_returns_promoted_payload() -> None: + """Team lookup should promote SDK payloads into Tier 2 containers.""" + connector = _connector() + connector.org.get_team_by_slug.return_value = _team("data-team") + + result = connector.get_team("data-team") + + assert isinstance(result, ExtendedDict) + assert result["slug"] == "data-team" + assert isinstance(result["name"], ExtendedString) + + +def test_get_team_returns_none_for_missing_team(monkeypatch: pytest.MonkeyPatch) -> None: + """Missing team lookups should return None and redact team slugs.""" + monkeypatch.setattr(github_module, "UnknownObjectException", GitHubFallbackError) + connector = _connector() + connector.org.get_team_by_slug.side_effect = GitHubFallbackError("missing private-team") + + assert connector.get_team("private-team") is None + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private-team" not in logs + + +def test_add_and_remove_team_member_success_paths() -> None: + """Team membership helpers should call the SDK and return true on success.""" + connector = _connector() + team = _team("data-team") + user = _member("octocat") + connector.org.get_team_by_slug.return_value = team + connector.git.get_user.return_value = user + + assert connector.add_team_member("data-team", "octocat", role="maintainer") is True + assert connector.remove_team_member("data-team", "octocat") is True + + team.add_membership.assert_called_once_with(user, role="maintainer") + team.remove_membership.assert_called_once_with(user) + + +def test_remove_team_member_failure_redacts_diagnostics(monkeypatch: pytest.MonkeyPatch) -> None: + """Team member removal failures should redact user/team identifiers.""" + monkeypatch.setattr(github_module, "GithubException", GitHubFallbackError) + monkeypatch.setattr(github_module, "UnknownObjectException", GitHubFallbackError) + connector = _connector() + connector.git.get_user.side_effect = GitHubFallbackError("team private-team user secret-user token=raw-token") + + assert connector.remove_team_member("private-team", "secret-user") is False + + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "private-team" not in logs + assert "secret-user" not in logs + assert "raw-token" not in logs + + def test_execute_graphql_promotes_response_payload() -> None: """GraphQL response dictionaries should expose nested extended containers.""" connector = _connector() @@ -123,6 +612,56 @@ def test_verified_email_enrichment_returns_extended_payload() -> None: assert result["octocat"]["primary_email"].upper_first() == "Octocat@example.com" +def test_verified_email_enrichment_filters_domain_matches() -> None: + """Verified email enrichment should keep only members with matching domain emails.""" + connector = _connector() + connector.execute_graphql = MagicMock( + side_effect=[ + { + "data": { + "user": { + "email": "octocat@example.com", + "organizationVerifiedDomainEmails": ["octocat@example.com", "octocat@other.test"], + } + } + }, + { + "data": { + "user": { + "email": "nomatch@other.test", + "organizationVerifiedDomainEmails": ["nomatch@other.test"], + } + } + }, + ] + ) + + result = connector.get_users_with_verified_emails( + members={ + "octocat": {"login": "octocat"}, + "nomatch": {"login": "nomatch"}, + }, + domain_filter="example.com", + ) + + assert set(result) == {"octocat"} + assert result["octocat"]["domain_emails"] == ["octocat@example.com"] + + +def test_verified_email_enrichment_preserves_member_on_graphql_failure() -> None: + """GraphQL failures should preserve existing member payloads and redact diagnostics.""" + connector = _connector() + connector.execute_graphql = MagicMock(side_effect=RuntimeError("failed for secret-user token=raw-token")) + + result = connector.get_users_with_verified_emails(members={"secret-user": {"login": "secret-user"}}) + + assert result["secret-user"]["login"] == "secret-user" + logs = _logged_text(connector.logger) + assert "[REDACTED]" in logs + assert "secret-user" not in logs + assert "raw-token" not in logs + + def test_workflow_builders_return_extended_data() -> None: """Local GitHub workflow builders should produce first-class extended data.""" connector = _connector() @@ -138,6 +677,68 @@ def test_workflow_builders_return_extended_data() -> None: assert workflow["jobs"]["test"]["steps"][0]["run"].upper_first() == "Pytest" +def test_create_python_ci_workflow_builds_integrated_default_pipeline() -> None: + """Python CI workflow helper should compose checkout, setup, lint, format, and test steps.""" + connector = _connector() + + workflow = connector.create_python_ci_workflow(python_versions=["3.12", "3.13"], working_directory="packages/api") + + assert isinstance(workflow, ExtendedDict) + assert workflow["name"] == "CI" + steps = workflow["jobs"]["test"]["steps"] + assert [step["name"] for step in steps] == [ + "Checkout code", + "Set up Python", + "Install uv", + "Install dependencies", + "Lint", + "Format check", + "Run tests", + ] + assert workflow["jobs"]["test"]["strategy"]["matrix"]["python-version"] == ["3.12", "3.13"] + assert steps[-1]["working-directory"] == "packages/api" + + +def test_create_python_ci_workflow_can_skip_optional_checks() -> None: + """Python CI workflow helper should omit lint/format steps when callers disable them.""" + connector = _connector() + + workflow = connector.create_python_ci_workflow(lint_command="", format_command=None) + + assert [step["name"] for step in workflow["jobs"]["test"]["steps"]] == [ + "Checkout code", + "Set up Python", + "Install uv", + "Install dependencies", + "Run tests", + ] + + +def test_build_github_actions_workflow_rejects_missing_required_fields() -> None: + """Standalone workflow YAML builder should fail loudly for unusable inputs.""" + with pytest.raises(ValueError, match="workflow_name is required"): + build_github_actions_workflow("", {"test": {"runs-on": "ubuntu-latest", "steps": []}}) + + with pytest.raises(ValueError, match="jobs definition is required"): + build_github_actions_workflow("CI", {}) + + +def test_build_github_actions_workflow_can_disable_oidc_and_events() -> None: + """Standalone workflow builder should honor event and permission options.""" + workflow_yaml = build_github_actions_workflow( + "Release", + {"release": {"runs-on": "ubuntu-latest", "steps": [{"run": "echo release"}]}}, + use_oidc_auth=False, + events={"push": False, "pull_request": False, "workflow_dispatch": True}, + triggers={"branches": ["main"]}, + pull_requests={"branches": ["main"]}, + ) + + assert "id-token" not in workflow_yaml + assert "workflow_dispatch:" in workflow_yaml + assert "pull_request:" not in workflow_yaml + + def test_update_repository_file_redacts_diagnostics_but_preserves_payload() -> None: """GitHub file updates should not leak caller paths or messages in logs.""" connector = _connector() @@ -184,5 +785,5 @@ def test_add_team_member_failure_redacts_diagnostics_without_traceback(monkeypat assert "secret-user" not in logs assert "raw-token" not in logs connector.logger.exception.assert_not_called() - for call in connector.logger.error.call_args_list: - assert call.kwargs.get("exc_info") is not True + for log_call in connector.logger.error.call_args_list: + assert log_call.kwargs.get("exc_info") is not True diff --git a/tests/connectors/test_github_tools.py b/tests/connectors/test_github_tools.py index 90c0c5b..e7b4cfc 100644 --- a/tests/connectors/test_github_tools.py +++ b/tests/connectors/test_github_tools.py @@ -369,6 +369,27 @@ def test_get_repository_file_empty(self, mock_connector_class): assert isinstance(result, ExtendedDict) assert result["status"] == "empty" + @patch(GITHUB_CONNECTOR_PATCH) + def test_get_repository_file_single_payload(self, mock_connector_class): + """Test get_repository_file when the connector returns content without SHA metadata.""" + from extended_data.connectors.github.tools import get_repository_file + + mock_connector = MagicMock() + mock_connector.get_repository_file.return_value = "plain content" + mock_connector_class.return_value = mock_connector + + result = get_repository_file( + github_owner="test-org", + github_token="test-token", + github_repo="test-repo", + file_path="README.md", + ) + + assert isinstance(result, ExtendedDict) + assert result["status"] == "retrieved" + assert result["content"] == "plain content" + assert result["sha"] is None + class TestGetTools: """Test framework getters.""" @@ -396,6 +417,112 @@ def test_get_tools_rejects_functions_alias(self): with pytest.raises(ValueError, match="Unknown framework"): get_tools(framework="functions") + def test_get_langchain_tools_delegates_shared_builder(self, monkeypatch: pytest.MonkeyPatch): + """LangChain tool factory should pass the GitHub definitions to the shared builder.""" + from extended_data.connectors import ai_tools + from extended_data.connectors.github import tools as github_tools + + expected = [object()] + build_langchain_tools = MagicMock(return_value=expected) + monkeypatch.setattr(ai_tools, "build_langchain_tools", build_langchain_tools) + + assert github_tools.get_langchain_tools() is expected + build_langchain_tools.assert_called_once_with(github_tools.TOOL_DEFINITIONS) + + def test_get_crewai_tools_wraps_definitions(self, monkeypatch: pytest.MonkeyPatch): + """CrewAI tool factory should attach descriptions and schemas to wrapped functions.""" + from extended_data.connectors import _optional + from extended_data.connectors.github import tools as github_tools + + def fake_tool(name): + def decorate(func): + wrapped = MagicMock(wrapped_name=name) + wrapped.__name__ = func.__name__ + return wrapped + + return decorate + + monkeypatch.setattr(_optional, "get_crewai_tool_decorator", lambda: fake_tool) + + tools = github_tools.get_crewai_tools() + + assert len(tools) == len(github_tools.TOOL_DEFINITIONS) + assert tools[0].description == github_tools.TOOL_DEFINITIONS[0]["description"] + assert tools[0].args_schema is github_tools.TOOL_DEFINITIONS[0]["schema"] + + def test_get_crewai_tools_allows_schema_less_definitions(self, monkeypatch: pytest.MonkeyPatch): + """CrewAI tool factory should tolerate definitions without schema metadata.""" + from extended_data.connectors import _optional + from extended_data.connectors.github import tools as github_tools + + class WrappedTool: + pass + + def fake_tool(name): + def decorate(func): + wrapped = WrappedTool() + wrapped.name = name + wrapped.func = func + return wrapped + + return decorate + + monkeypatch.setattr(_optional, "get_crewai_tool_decorator", lambda: fake_tool) + monkeypatch.setattr( + github_tools, + "TOOL_DEFINITIONS", + [{"name": "github_ping", "description": "Ping GitHub", "func": lambda: "pong"}], + ) + + tools = github_tools.get_crewai_tools() + + assert len(tools) == 1 + assert tools[0].description == "Ping GitHub" + assert not hasattr(tools[0], "args_schema") + + def test_get_tools_auto_prefers_crewai_when_available(self, monkeypatch: pytest.MonkeyPatch): + """Auto-detection should prefer CrewAI tools when CrewAI is importable.""" + from extended_data.connectors import _optional + from extended_data.connectors.github import tools as github_tools + + expected = [object()] + monkeypatch.setattr(_optional, "is_available", lambda package: package == "crewai") + monkeypatch.setattr(github_tools, "get_crewai_tools", lambda: expected) + + assert github_tools.get_tools("auto") is expected + + def test_get_tools_auto_falls_back_to_langchain_then_strands(self, monkeypatch: pytest.MonkeyPatch): + """Auto-detection should use LangChain before plain Strands functions.""" + from extended_data.connectors import _optional + from extended_data.connectors.github import tools as github_tools + + langchain_tools = [object()] + strands_tools = [object()] + availability = {"langchain_core": True} + monkeypatch.setattr(_optional, "is_available", lambda package: availability.get(package, False)) + monkeypatch.setattr(github_tools, "get_langchain_tools", lambda: langchain_tools) + monkeypatch.setattr(github_tools, "get_strands_tools", lambda: strands_tools) + + assert github_tools.get_tools("auto") is langchain_tools + + availability["langchain_core"] = False + assert github_tools.get_tools("auto") is strands_tools + + def test_get_tools_explicit_frameworks(self, monkeypatch: pytest.MonkeyPatch): + """Explicit framework names should dispatch to their matching factories.""" + from extended_data.connectors.github import tools as github_tools + + langchain_tools = [object()] + crewai_tools = [object()] + strands_tools = [object()] + monkeypatch.setattr(github_tools, "get_langchain_tools", lambda: langchain_tools) + monkeypatch.setattr(github_tools, "get_crewai_tools", lambda: crewai_tools) + monkeypatch.setattr(github_tools, "get_strands_tools", lambda: strands_tools) + + assert github_tools.get_tools("langchain") is langchain_tools + assert github_tools.get_tools("crewai") is crewai_tools + assert github_tools.get_tools("strands") is strands_tools + class TestExports: """Test that all expected exports are available.""" From d57c3c9a86296c1b0498e293e77dc3e7a22f3fce Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 01:14:00 -0500 Subject: [PATCH 284/287] test: cover connector tool adapters --- .../test_tool_framework_adapters.py | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 tests/connectors/test_tool_framework_adapters.py diff --git a/tests/connectors/test_tool_framework_adapters.py b/tests/connectors/test_tool_framework_adapters.py new file mode 100644 index 0000000..a8b1a39 --- /dev/null +++ b/tests/connectors/test_tool_framework_adapters.py @@ -0,0 +1,154 @@ +"""Shared framework adapter contracts for connector tool modules.""" + +from __future__ import annotations + +from importlib import import_module +from typing import Any +from unittest.mock import MagicMock + +import pytest + + +TOOL_MODULES = ( + "extended_data.connectors.anthropic.tools", + "extended_data.connectors.aws.tools", + "extended_data.connectors.cursor.tools", + "extended_data.connectors.github.tools", + "extended_data.connectors.google.tools", + "extended_data.connectors.meshy.tools", + "extended_data.connectors.secrets.tools", + "extended_data.connectors.slack.tools", + "extended_data.connectors.vault.tools", + "extended_data.connectors.zoom.tools", +) + + +def _fake_crewai_tool(name: str): + def decorate(func: Any) -> MagicMock: + wrapped = MagicMock(wrapped_name=name) + wrapped.__name__ = func.__name__ + return wrapped + + return decorate + + +@pytest.mark.parametrize("module_name", TOOL_MODULES) +def test_langchain_tools_delegate_to_shared_builder(module_name: str, monkeypatch: pytest.MonkeyPatch) -> None: + """LangChain factories should pass connector definitions through the shared builder.""" + from extended_data.connectors import ai_tools + + module = import_module(module_name) + expected = [object()] + build_langchain_tools = MagicMock(return_value=expected) + monkeypatch.setattr(ai_tools, "build_langchain_tools", build_langchain_tools) + + assert module.get_langchain_tools() is expected + build_langchain_tools.assert_called_once_with(module.TOOL_DEFINITIONS) + + +@pytest.mark.parametrize("module_name", TOOL_MODULES) +def test_crewai_tools_attach_description_and_schema(module_name: str, monkeypatch: pytest.MonkeyPatch) -> None: + """CrewAI factories should attach connector metadata to wrapped functions.""" + from extended_data.connectors import _optional + + module = import_module(module_name) + monkeypatch.setattr(_optional, "get_crewai_tool_decorator", lambda: _fake_crewai_tool) + + tools = module.get_crewai_tools() + first_definition = module.TOOL_DEFINITIONS[0] + expected_schema = first_definition.get("schema") or first_definition.get("args_schema") + + assert len(tools) == len(module.TOOL_DEFINITIONS) + assert tools[0].description == first_definition["description"] + assert tools[0].args_schema is expected_schema + + +@pytest.mark.parametrize("module_name", TOOL_MODULES) +def test_crewai_tools_allow_schema_less_definitions(module_name: str, monkeypatch: pytest.MonkeyPatch) -> None: + """CrewAI factories should tolerate simple function definitions without schemas.""" + from extended_data.connectors import _optional + + class WrappedTool: + pass + + def fake_tool(name: str): + def decorate(func: Any) -> WrappedTool: + wrapped = WrappedTool() + wrapped.name = name + wrapped.func = func + return wrapped + + return decorate + + module = import_module(module_name) + monkeypatch.setattr(_optional, "get_crewai_tool_decorator", lambda: fake_tool) + monkeypatch.setattr( + module, + "TOOL_DEFINITIONS", + [{"name": "connector_ping", "description": "Ping connector", "func": lambda: "pong"}], + ) + + tools = module.get_crewai_tools() + + assert len(tools) == 1 + assert tools[0].description == "Ping connector" + assert not hasattr(tools[0], "args_schema") + + +@pytest.mark.parametrize("module_name", TOOL_MODULES) +def test_strands_tools_return_plain_definition_functions(module_name: str) -> None: + """Strands factories should expose the raw Python functions in definition order.""" + module = import_module(module_name) + + assert module.get_strands_tools() == [definition["func"] for definition in module.TOOL_DEFINITIONS] + + +@pytest.mark.parametrize("module_name", TOOL_MODULES) +def test_get_tools_auto_prefers_crewai(module_name: str, monkeypatch: pytest.MonkeyPatch) -> None: + """Auto-detection should prefer CrewAI when it is importable.""" + from extended_data.connectors import _optional + + module = import_module(module_name) + expected = [object()] + monkeypatch.setattr(_optional, "is_available", lambda package: package == "crewai") + monkeypatch.setattr(module, "get_crewai_tools", lambda: expected) + + assert module.get_tools("auto") is expected + + +@pytest.mark.parametrize("module_name", TOOL_MODULES) +def test_get_tools_auto_falls_back_to_langchain_then_strands( + module_name: str, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Auto-detection should use LangChain before plain Strands functions.""" + from extended_data.connectors import _optional + + module = import_module(module_name) + langchain_tools = [object()] + strands_tools = [object()] + availability = {"langchain_core": True} + monkeypatch.setattr(_optional, "is_available", lambda package: availability.get(package, False)) + monkeypatch.setattr(module, "get_langchain_tools", lambda: langchain_tools) + monkeypatch.setattr(module, "get_strands_tools", lambda: strands_tools) + + assert module.get_tools("auto") is langchain_tools + + availability["langchain_core"] = False + assert module.get_tools("auto") is strands_tools + + +@pytest.mark.parametrize("module_name", TOOL_MODULES) +def test_get_tools_explicit_framework_dispatch(module_name: str, monkeypatch: pytest.MonkeyPatch) -> None: + """Explicit framework names should dispatch to their matching factories.""" + module = import_module(module_name) + langchain_tools = [object()] + crewai_tools = [object()] + strands_tools = [object()] + monkeypatch.setattr(module, "get_langchain_tools", lambda: langchain_tools) + monkeypatch.setattr(module, "get_crewai_tools", lambda: crewai_tools) + monkeypatch.setattr(module, "get_strands_tools", lambda: strands_tools) + + assert module.get_tools("langchain") is langchain_tools + assert module.get_tools("crewai") is crewai_tools + assert module.get_tools("strands") is strands_tools From 5e6cf05bf87f867ca82264f9508ee21122ebeb19 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 01:18:19 -0500 Subject: [PATCH 285/287] test: cover meshy base client behavior --- tests/connectors/meshy/test_meshy_base.py | 198 ++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/tests/connectors/meshy/test_meshy_base.py b/tests/connectors/meshy/test_meshy_base.py index 4604cf7..4ee2a5a 100644 --- a/tests/connectors/meshy/test_meshy_base.py +++ b/tests/connectors/meshy/test_meshy_base.py @@ -8,6 +8,75 @@ import pytest from extended_data.connectors.meshy import base +from extended_data.connectors.meshy.models import Text3DResult +from extended_data.containers import ExtendedDict, ExtendedString + + +@pytest.fixture(autouse=True) +def reset_meshy_base(monkeypatch: pytest.MonkeyPatch) -> None: + """Reset Meshy base globals so helper tests stay isolated.""" + monkeypatch.setattr(base, "_client", None) + monkeypatch.setattr(base, "_inputs", None) + monkeypatch.setattr(base, "_last_request_time", 0) + monkeypatch.setattr(base, "_min_request_interval", 0.5) + + +def _raw_request(*args, **kwargs): + return base.request.__wrapped__(*args, **kwargs) + + +def test_configure_sets_and_merges_api_inputs() -> None: + """Meshy configuration should feed the shared InputProvider boundary.""" + base.configure(api_key="first-key", EXTRA_INPUT="value") + + assert base.get_api_key() == "first-key" + + base.configure(api_key="second-key") + + assert base.get_api_key() == "second-key" + + +def test_get_client_reuses_client_and_close_resets(monkeypatch: pytest.MonkeyPatch) -> None: + """Meshy HTTP clients should be lazy, reused, and closed explicitly.""" + client = MagicMock(spec=httpx.Client) + client_factory = MagicMock(return_value=client) + monkeypatch.setattr(base.httpx, "Client", client_factory) + + assert base.get_client() is client + assert base.get_client() is client + client_factory.assert_called_once_with(timeout=300.0) + + base.close() + + client.close.assert_called_once_with() + assert base._client is None + + +def test_rate_limit_sleeps_only_when_interval_has_not_elapsed(monkeypatch: pytest.MonkeyPatch) -> None: + """Rate limiting should sleep only for the remaining interval.""" + sleep = MagicMock() + monkeypatch.setattr(base.time, "sleep", sleep) + monkeypatch.setattr(base.time, "time", MagicMock(side_effect=[100.2, 100.7, 102.0, 102.0])) + monkeypatch.setattr(base, "_last_request_time", 100.0) + + base._rate_limit() + assert pytest.approx(sleep.call_args.args[0]) == 0.3 + assert base._last_request_time == 100.7 + + sleep.reset_mock() + base._rate_limit() + sleep.assert_not_called() + assert base._last_request_time == 102.0 + + +def test_headers_uses_bearer_api_key(monkeypatch: pytest.MonkeyPatch) -> None: + """Meshy request headers should be built from the configured API key.""" + monkeypatch.setattr(base, "get_api_key", lambda: "test-key") + + assert base._headers() == { + "Authorization": "Bearer test-key", + "Content-Type": "application/json", + } def test_meshy_request_redacts_sensitive_error_body(monkeypatch: pytest.MonkeyPatch) -> None: @@ -32,6 +101,53 @@ def test_meshy_request_redacts_sensitive_error_body(monkeypatch: pytest.MonkeyPa assert "[REDACTED]" in message +def test_meshy_request_builds_url_and_returns_success(monkeypatch: pytest.MonkeyPatch) -> None: + """Meshy requests should build versioned OpenAPI URLs with shared headers.""" + response = httpx.Response(200, content=b'{"result":"task-123"}') + mock_client = MagicMock() + mock_client.request.return_value = response + monkeypatch.setattr(base, "_rate_limit", lambda: None) + monkeypatch.setattr(base, "_headers", lambda: {"Authorization": "Bearer test"}) + monkeypatch.setattr(base, "get_client", lambda: mock_client) + + result = _raw_request("POST", "text-to-3d", version="v2", json={"prompt": "ship"}) + + assert result is response + mock_client.request.assert_called_once_with( + "POST", + "https://api.meshy.ai/openapi/v2/text-to-3d", + headers={"Authorization": "Bearer test"}, + json={"prompt": "ship"}, + ) + + +def test_meshy_request_raises_rate_limit_for_429_and_5xx(monkeypatch: pytest.MonkeyPatch) -> None: + """Meshy retryable responses should raise RateLimitError with bounded sleeps.""" + mock_client = MagicMock() + mock_client.request.side_effect = [ + httpx.Response(429, headers={"retry-after": "0.25"}), + httpx.Response(429, headers={"retry-after": "not-a-number"}), + httpx.Response(503, content=b"unavailable"), + ] + sleep = MagicMock() + monkeypatch.setattr(base, "_rate_limit", lambda: None) + monkeypatch.setattr(base, "_headers", lambda: {"Authorization": "Bearer test"}) + monkeypatch.setattr(base, "get_client", lambda: mock_client) + monkeypatch.setattr(base.time, "sleep", sleep) + + with pytest.raises(base.RateLimitError, match=r"0\.25s"): + _raw_request("GET", "text-to-3d") + sleep.assert_called_once_with(0.25) + + sleep.reset_mock() + with pytest.raises(base.RateLimitError, match="not-a-number"): + _raw_request("GET", "text-to-3d") + sleep.assert_called_once_with(5) + + with pytest.raises(base.RateLimitError, match="Server error 503"): + _raw_request("GET", "text-to-3d") + + def test_task_failure_message_redacts_sensitive_values() -> None: """Meshy task failure messages should share the connector redaction boundary.""" message = base.task_failure_message({"message": "failed password=hunter2 Authorization: Bearer raw_token"}) @@ -40,3 +156,85 @@ def test_task_failure_message_redacts_sensitive_values() -> None: assert "hunter2" not in message assert "raw_token" not in message assert "[REDACTED]" in message + + +def test_task_failure_message_falls_back_to_error_and_unknown() -> None: + """Task failure messages should preserve useful public errors.""" + assert base.task_failure_message({"error": "bad mesh"}) == "Task failed: bad mesh" + assert base.task_failure_message(None) == "Task failed: Unknown error" + + +def test_unexpected_response_message_redacts_sensitive_payloads() -> None: + """Unexpected response diagnostics should not echo secret-bearing payloads.""" + message = base.unexpected_response_message({"api_key": "key_123", "message": "Authorization: Bearer raw_token"}) + + assert "key_123" not in message + assert "raw_token" not in message + assert "[REDACTED]" in message + + +def test_decode_response_json_handles_empty_and_extended_payloads() -> None: + """Response JSON decoding should promote payloads across the data boundary.""" + assert base._decode_response_json(httpx.Response(204, content=b"")) is None + + result = base._decode_response_json(httpx.Response(200, content=b'{"result":"task-123"}')) + + assert isinstance(result, ExtendedDict) + assert result["result"] == "task-123" + + +def test_task_id_from_response_extracts_non_empty_result() -> None: + """Task creation responses should expose non-empty task IDs as ExtendedString.""" + task_id = base.task_id_from_response(httpx.Response(200, content=b'{"result":"task-123"}')) + + assert isinstance(task_id, ExtendedString) + assert task_id == "task-123" + + +def test_task_id_from_response_rejects_missing_or_blank_results() -> None: + """Task creation responses should fail loudly for unusable response bodies.""" + with pytest.raises(RuntimeError, match="missing 'result' key"): + base.task_id_from_response(httpx.Response(200, content=b'{"api_key":"key_123","result":" "}')) + + +def test_task_payload_from_response_validates_and_promotes_model_payload() -> None: + """Task status responses should validate through Pydantic and return extended data.""" + response = httpx.Response( + 200, + content=b'{"id":"task-123","status":"SUCCEEDED","progress":100,"created_at":1700000000}', + ) + + result = base.task_payload_from_response(response, Text3DResult, "text-to-3d") + + assert isinstance(result, ExtendedDict) + assert result["id"] == "task-123" + assert result["status"] == "SUCCEEDED" + + +def test_task_payload_from_response_redacts_invalid_payloads() -> None: + """Task status validation errors should redact unexpected vendor payloads.""" + response = httpx.Response(200, content=b'{"api_key":"key_123","status":"FAILED"}') + + with pytest.raises(RuntimeError) as exc_info: + base.task_payload_from_response(response, Text3DResult, "text-to-3d") + + message = str(exc_info.value) + assert "key_123" not in message + assert "[REDACTED]" in message + + +def test_download_creates_parent_directories_and_returns_size(tmp_path, monkeypatch: pytest.MonkeyPatch) -> None: + """Meshy downloads should write bytes and return the downloaded size.""" + response = MagicMock() + response.content = b"glb-bytes" + response.raise_for_status = MagicMock() + get = MagicMock(return_value=response) + monkeypatch.setattr(base.httpx, "get", get) + output_path = tmp_path / "nested" / "model.glb" + + size = base.download("https://assets.meshy.ai/model.glb", str(output_path)) + + assert size == len(b"glb-bytes") + assert output_path.read_bytes() == b"glb-bytes" + response.raise_for_status.assert_called_once_with() + get.assert_called_once_with("https://assets.meshy.ai/model.glb") From 7e07d8cd54a91e8538442e20b9840528340d774e Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 01:23:13 -0500 Subject: [PATCH 286/287] test: expand aws organizations coverage --- tests/connectors/test_aws_organizations.py | 350 +++++++++++++++++++++ 1 file changed, 350 insertions(+) diff --git a/tests/connectors/test_aws_organizations.py b/tests/connectors/test_aws_organizations.py index 48c2585..7167038 100644 --- a/tests/connectors/test_aws_organizations.py +++ b/tests/connectors/test_aws_organizations.py @@ -3,6 +3,7 @@ from __future__ import annotations +from collections.abc import Mapping from typing import Any from unittest.mock import MagicMock @@ -39,6 +40,47 @@ def list_roots(self): return {"Roots": [{"Id": "r-root"}]} +class _ParentPaginator: + def __init__(self, pages_by_parent: Mapping[str, list[dict[str, Any]]]) -> None: + self.pages_by_parent = pages_by_parent + + def paginate(self, ParentId: str) -> list[dict[str, Any]]: + return self.pages_by_parent.get(ParentId, []) + + +class _ResourceTagPaginator: + def __init__(self, tags_by_resource: Mapping[str, list[dict[str, str]]]) -> None: + self.tags_by_resource = tags_by_resource + + def paginate(self, ResourceId: str) -> list[dict[str, Any]]: + return [{"Tags": self.tags_by_resource.get(ResourceId, [])}] + + +class _OrganizationTreeClient: + def __init__( + self, + *, + account_pages: Mapping[str, list[dict[str, Any]]] | None = None, + ou_pages: Mapping[str, list[dict[str, Any]]] | None = None, + tags_by_resource: Mapping[str, list[dict[str, str]]] | None = None, + ) -> None: + self.account_pages = account_pages or {} + self.ou_pages = ou_pages or {} + self.tags_by_resource = tags_by_resource or {} + + def list_roots(self): + return {"Roots": [{"Id": "r-root"}]} + + def get_paginator(self, name: str): + if name == "list_accounts_for_parent": + return _ParentPaginator(self.account_pages) + if name == "list_organizational_units_for_parent": + return _ParentPaginator(self.ou_pages) + if name == "list_tags_for_resource": + return _ResourceTagPaginator(self.tags_by_resource) + raise AssertionError(f"unexpected paginator {name}") + + def _logged_text(logger: MagicMock) -> str: """Return concatenated mock logger messages.""" return "\n".join(str(arg) for call in logger.method_calls for arg in call.args) @@ -169,6 +211,63 @@ def list_roots(self): assert exc_info.value.__cause__ is None +def test_get_organization_accounts_recurses_units_tags_and_sorts() -> None: + """Organization account discovery should merge OU metadata, tags, and sort promoted payloads.""" + client = _OrganizationTreeClient( + account_pages={ + "r-root": [ + { + "Accounts": [ + { + "Id": "222222222222", + "Name": "Beta", + "Email": "beta@example.com", + "Status": "ACTIVE", + } + ] + } + ], + "ou-prod": [ + { + "Accounts": [ + { + "Id": "111111111111", + "Name": "Alpha", + "Email": "alpha@example.com", + "Status": "ACTIVE", + } + ] + } + ], + }, + ou_pages={ + "r-root": [ + { + "OrganizationalUnits": [ + {"Id": "ou-prod", "Arn": "arn:aws:organizations::123:ou/o-root/ou-prod", "Name": "Prod"} + ] + } + ], + "ou-prod": [{"OrganizationalUnits": []}], + }, + tags_by_resource={ + "111111111111": [{"Key": "Environment", "Value": "prod"}], + "222222222222": [{"Key": "Owner", "Value": "platform"}], + }, + ) + connector = _TestAWSOrganizations() + connector.register_client("organizations", client) + + result = connector.get_organization_accounts(unhump_accounts=True, sort_by_name=True) + + assert isinstance(result, ExtendedDict) + assert list(result.keys()) == ["111111111111", "222222222222"] + assert result["111111111111"]["ou_name"] == "Prod" + assert result["111111111111"]["tags"]["environment"] == "prod" + assert result["111111111111"]["managed"] is False + assert result["222222222222"]["tags"]["owner"] == "platform" + + def test_get_controltower_accounts_redacts_provider_warning() -> None: class _ControlTowerClient: def get_paginator(self, _: str): @@ -189,6 +288,43 @@ def get_paginator(self, _: str): assert "[REDACTED]" in logs +def test_get_controltower_accounts_extracts_outputs_and_skips_failed_products() -> None: + """Control Tower discovery should map AccountId outputs and skip unreadable products.""" + + class _ProvisionedProductsPaginator: + def paginate(self, **_: Any) -> list[dict[str, Any]]: + return [ + { + "ProvisionedProducts": [ + {"Id": "pp-good", "Name": "Managed Alpha", "Status": "AVAILABLE"}, + {"Id": "pp-denied", "Name": "Denied", "Status": "TAINTED"}, + {"Name": "No Id", "Status": "AVAILABLE"}, + ] + } + ] + + class _ControlTowerClient: + def get_paginator(self, name: str): + assert name == "search_provisioned_products" + return _ProvisionedProductsPaginator() + + def get_provisioned_product_outputs(self, ProvisionedProductId: str): + if ProvisionedProductId == "pp-denied": + raise ClientError({"Error": {"Code": "Denied", "Message": "private account 123456789012"}}, "Outputs") + return {"Outputs": [{"OutputKey": "AccountId", "OutputValue": "111111111111"}]} + + connector = _TestAWSOrganizations() + connector.register_client("servicecatalog", _ControlTowerClient()) + + result = connector.get_controltower_accounts(unhump_accounts=True, sort_by_name=True) + + assert isinstance(result, ExtendedDict) + assert list(result.keys()) == ["111111111111"] + assert result["111111111111"]["name"] == "Managed Alpha" + assert result["111111111111"]["managed"] is True + assert result["111111111111"]["provisioned_product_id"] == "pp-good" + + def test_preprocess_organization_compiles_sections(mocker, organizations_connector: _TestAWSOrganizations): mock_get_accounts = mocker.patch.object( organizations_connector, @@ -221,6 +357,24 @@ def test_preprocess_organization_compiles_sections(mocker, organizations_connect assert result["organizational_units"] == {"ou-1": {"name": "Shared"}} +def test_preprocess_organization_can_skip_classification(mocker, organizations_connector: _TestAWSOrganizations): + """Legacy preprocess helper should be able to emit raw account metadata.""" + mock_get_accounts = mocker.patch.object( + organizations_connector, + "get_accounts", + return_value={"123": {"name": "core"}}, + ) + mock_classify = mocker.patch.object(organizations_connector, "classify_accounts") + mocker.patch.object(organizations_connector, "get_organization_units", return_value={}) + + result = organizations_connector.preprocess_organization(include_classification=False) + + mock_get_accounts.assert_called_once() + mock_classify.assert_not_called() + assert result["accounts"] == {"123": {"name": "core"}} + assert result["account_count"] == 1 + + def test_get_accounts_merges_controltower_data(mocker, organizations_connector: _TestAWSOrganizations): mock_org = mocker.patch.object( organizations_connector, @@ -252,6 +406,84 @@ def test_get_accounts_merges_controltower_data(mocker, organizations_connector: assert result["100"]["name"] == "Alpha" +def test_get_accounts_can_skip_controltower_merge(mocker, organizations_connector: _TestAWSOrganizations): + """Combined account discovery should support Organizations-only callers.""" + mocker.patch.object( + organizations_connector, + "get_organization_accounts", + return_value={"200": {"Name": "Beta", "managed": False}}, + ) + controltower = mocker.patch.object(organizations_connector, "get_controltower_accounts", return_value={}) + + result = organizations_connector.get_accounts(include_controltower=False, unhump_accounts=False) + + controltower.assert_not_called() + assert result["200"]["Name"] == "Beta" + assert result["200"]["managed"] is False + + +def test_get_organization_units_builds_recursive_paths() -> None: + """Organizational unit discovery should preserve recursive OU paths.""" + client = _OrganizationTreeClient( + ou_pages={ + "r-root": [ + { + "OrganizationalUnits": [ + {"Id": "ou-prod", "Arn": "arn:aws:organizations::123:ou/o-root/ou-prod", "Name": "Prod"} + ] + } + ], + "ou-prod": [ + { + "OrganizationalUnits": [ + {"Id": "ou-apps", "Arn": "arn:aws:organizations::123:ou/o-root/ou-apps", "Name": "Apps"} + ] + } + ], + "ou-apps": [{"OrganizationalUnits": []}], + } + ) + connector = _TestAWSOrganizations() + connector.register_client("organizations", client) + + result = connector.get_organization_units(unhump_units=True) + + assert isinstance(result, ExtendedDict) + assert result["ou-prod"]["path"] == "Prod" + assert result["ou-apps"]["path"] == "Prod/Apps" + + +def test_build_org_units_with_tags_collects_control_tower_labels() -> None: + """Tagged OU helper should return normalized metadata used by account labeling.""" + client = _OrganizationTreeClient( + ou_pages={ + "r-root": [ + { + "OrganizationalUnits": [ + {"Id": "ou-prod", "Arn": "arn:aws:organizations::123:ou/o-root/ou-prod", "Name": "Prod"} + ] + } + ], + "ou-prod": [{"OrganizationalUnits": []}], + }, + tags_by_resource={"ou-prod": [{"Key": "Environment", "Value": "prod"}]}, + ) + connector = _TestAWSOrganizations() + connector.register_client("organizations", client) + + result = connector._build_org_units_with_tags(role_arn=None) + + assert result == { + "ou-prod": { + "id": "ou-prod", + "name": "Prod", + "arn": "arn:aws:organizations::123:ou/o-root/ou-prod", + "tags": {"Environment": "prod"}, + "control_tower_organizational_unit": "Prod (ou-prod)", + } + } + + def test_label_aws_accounts_builds_metadata(mocker, organizations_connector: _TestAWSOrganizations): mocker.patch.object( organizations_connector, @@ -293,6 +525,69 @@ def test_label_aws_accounts_builds_metadata(mocker, organizations_connector: _Te assert ".example.com" in account["subdomain"] +def test_build_labeled_account_handles_root_user_defaults_and_unit_name_lookup( + organizations_connector: _TestAWSOrganizations, +): + """Account labeling should cover root account and OU-name lookup defaults.""" + labeled = organizations_connector._build_labeled_account( + account_id="123456789012", + account_data={ + "Name": "User-Sandbox", + "Email": "user@example.com", + "OuName": "Sandbox", + "tags": {"Classifications": "Sandbox Accounts"}, + }, + controltower_data=None, + units_lookup={ + "ou-sandbox": { + "id": "ou-sandbox", + "name": "Sandbox", + "tags": {"Spoke": "true", "Classifications": "Development Accounts"}, + } + }, + domains={"dev": "dev.example.com", "default": "example.com"}, + caller_account_id="123456789012", + ) + + assert labeled["execution_role_arn"] == "" + assert labeled["environment"] == "dev" + assert labeled["domain"] == "dev.example.com" + assert labeled["subdomain"] == "dev.example.com" + assert labeled["spoke"] is True + assert set(labeled["classifications"]) == {"accounts", "sandbox", "development"} + + +def test_label_aws_accounts_includes_controltower_only_accounts(mocker, organizations_connector: _TestAWSOrganizations): + """Control Tower-only accounts should still receive normalized account labels.""" + mocker.patch.object(organizations_connector, "get_organization_accounts", return_value={}) + mocker.patch.object( + organizations_connector, + "get_controltower_accounts", + return_value={ + "999999999999": { + "Name": "Managed Shared", + "Email": "shared@example.com", + "managed": True, + "OrganizationalUnit": "Shared", + "ProvisionedProductId": "pp-999", + "tags": {"Environment": "stg"}, + } + }, + ) + organizations_connector.get_caller_account_id = lambda: "000000000000" # type: ignore[assignment] + + result = organizations_connector.label_aws_accounts( + domains={"stg": "example.com"}, + aws_organization_units={"ou-shared": {"id": "ou-shared", "name": "Shared", "tags": {}}}, + ) + + account = result["999999999999"] + assert account["managed"] is True + assert account["provisioned_product_id"] == "pp-999" + assert account["organizational_unit"] == "Shared" + assert account["subdomain"] == "managedshared.example.com" + + def test_classify_aws_accounts_generates_suffix(organizations_connector: _TestAWSOrganizations): labeled = { "123": {"classifications": ["production", "shared"]}, @@ -308,6 +603,34 @@ def test_classify_aws_accounts_generates_suffix(organizations_connector: _TestAW assert result["development_accounts_east"] == ["456"] +def test_classify_aws_accounts_fetches_labels_when_domains_are_provided( + mocker, + organizations_connector: _TestAWSOrganizations, +): + """Classification grouping should build labels when callers provide source domains.""" + label = mocker.patch.object( + organizations_connector, + "label_aws_accounts", + return_value={ + "123": {"classifications": ["production", "accounts"]}, + "456": {"classifications": ["shared"]}, + }, + ) + + result = organizations_connector.classify_aws_accounts(domains={"prod": "example.com"}) + + label.assert_called_once() + assert result == {"production_accounts": ["123"], "shared_accounts": ["456"]} + + +def test_classify_aws_accounts_requires_domains_when_labels_are_missing( + organizations_connector: _TestAWSOrganizations, +): + """Classification grouping should fail loudly without enough source data.""" + with pytest.raises(ValueError, match="domains mapping required"): + organizations_connector.classify_aws_accounts() + + def test_preprocess_aws_organization_uses_helpers(mocker, organizations_connector: _TestAWSOrganizations): labeled_accounts = { "123": { @@ -351,3 +674,30 @@ def list_roots(self): assert context["organization"]["root_id"] == "r-root" assert context["accounts_by_name"]["Prod Account"]["email"] == "prod@example.com" assert context["accounts_by_classification"]["production_accounts"] == ["123"] + + +def test_preprocess_aws_organization_accepts_precomputed_units(mocker, organizations_connector: _TestAWSOrganizations): + """Full organization preprocessing should reuse caller-provided OU metadata.""" + build_units = mocker.patch.object(organizations_connector, "_build_org_units_with_tags") + mocker.patch.object( + organizations_connector, + "label_aws_accounts", + return_value={ + "123": { + "account_name": "Shared", + "email": "shared@example.com", + "json_key": "Shared", + "classifications": ["shared"], + } + }, + ) + mocker.patch.object(organizations_connector, "classify_aws_accounts", return_value={"shared_accounts": ["123"]}) + + context = organizations_connector.preprocess_aws_organization( + domains={"default": "example.com"}, + aws_organization_units={"ou-shared": {"id": "ou-shared", "name": "Shared", "classifications": ["shared"]}}, + ) + + build_units.assert_not_called() + assert context["organization"]["ou_count"] == 1 + assert context["unit_classifications_by_name"]["Shared"] == ["shared"] From 9aaadce816c9ba7ee50275206b57488088251520 Mon Sep 17 00:00:00 2001 From: Jon Bogaty Date: Thu, 11 Jun 2026 01:28:54 -0500 Subject: [PATCH 287/287] test: expand slack connector coverage --- tests/connectors/test_slack_connector.py | 231 +++++++++++++++++++++++ 1 file changed, 231 insertions(+) diff --git a/tests/connectors/test_slack_connector.py b/tests/connectors/test_slack_connector.py index 927fe4f..5b38501 100644 --- a/tests/connectors/test_slack_connector.py +++ b/tests/connectors/test_slack_connector.py @@ -69,6 +69,58 @@ def test_slack_api_error_redacts_sensitive_response_text() -> None: assert error.response["authorization"] == "[REDACTED]" +def test_slack_response_payload_normalizes_sdk_shapes() -> None: + """Slack response normalization should redact mapping, SDK-data, get/status, and fallback shapes.""" + + class DataResponse: + data = {"ok": False, "token": "raw-token"} + status_code = 403 + + class GetterResponse: + status_code = 429 + + def get(self, key): + return {"ok": False, "error": "ratelimited", "warning": None}.get(key) + + mapping = slack_module._slack_response_payload({"ok": False, "password": "hunter2"}) + data = slack_module._slack_response_payload(DataResponse()) + getter = slack_module._slack_response_payload(GetterResponse()) + fallback = slack_module._slack_response_payload("authorization: Bearer raw-token") + + assert mapping["password"] == "[REDACTED]" + assert data["token"] == "[REDACTED]" + assert getter == {"ok": False, "error": "ratelimited", "status_code": 429} + assert "raw-token" not in fallback["response"] + assert "[REDACTED]" in fallback["response"] + + +def test_slack_block_helpers_skip_empty_values_and_apply_styles() -> None: + """Slack block helpers should encode mappings, skip empty context values, and apply styles.""" + context = get_field_context_message_blocks( + "deploy", + { + "service": "api", + "empty": "", + "details": {"region": "us-east-1"}, + **{f"k{i}": i for i in range(11)}, + }, + ) + key_value = get_key_value_blocks("count", 3) + rich = get_rich_text_blocks(["hello"], italic=True, strike=True) + + context_text = "\n".join( + str(element["text"]) + for block in context + if block["type"] == "context" + for element in block["elements"] + ) + assert "empty:" not in context_text + assert "details:" in context_text + assert len([block for block in context if block["type"] == "context"]) == 2 + assert key_value[0]["text"]["text"] == "*Count*: 3" + assert rich[0]["elements"][0]["style"] == {"italic": True, "strike": True} + + class TestSlackConnector: """Test suite for SlackConnector.""" @@ -121,6 +173,20 @@ def test_send_message(self, mock_webclient_class, base_connector_kwargs): assert ts == "1234567890.123456" mock_bot_client.chat_postMessage.assert_called_once() + @patch("extended_data.connectors.slack.WebClient") + def test_send_message_includes_thread_id(self, mock_webclient_class, base_connector_kwargs): + """Thread replies should pass Slack's thread_ts option through to the SDK.""" + mock_bot_client = MagicMock() + mock_bot_client.users_conversations.return_value = {"channels": [{"name": "general", "id": "C12345"}]} + mock_bot_client.chat_postMessage.return_value = {"ts": "1234567890.123456"} + mock_user_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + connector.send_message(channel_name="general", text="Reply", blocks=[], thread_id="1234567890.000001") + + assert mock_bot_client.chat_postMessage.call_args.kwargs["thread_ts"] == "1234567890.000001" + @patch("extended_data.connectors.slack.WebClient") def test_send_message_converts_extended_blocks_for_sdk(self, mock_webclient_class, base_connector_kwargs): """Slack SDK calls should receive builtin payloads even when helpers are extended.""" @@ -230,6 +296,23 @@ def test_send_message_redacts_missing_channel_name(self, mock_webclient_class, b assert "private-channel" not in str(exc_info.value) assert "[REDACTED]" in str(exc_info.value) + @patch("extended_data.connectors.slack.WebClient") + def test_send_message_redacts_missing_channel_id(self, mock_webclient_class, base_connector_kwargs): + """Channels without IDs should fail without echoing caller-provided channel names.""" + mock_bot_client = MagicMock() + mock_bot_client.users_conversations.return_value = {"channels": [{"name": "private-channel", "id": ""}]} + + mock_user_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with pytest.raises(RuntimeError) as exc_info: + connector.send_message(channel_name="private-channel", text="Test message", blocks=[]) + + assert "private-channel" not in str(exc_info.value) + assert "[REDACTED]" in str(exc_info.value) + @patch("extended_data.connectors.slack.WebClient") def test_get_bot_channels_api_error_redacts_response_without_raw_cause( self, @@ -314,6 +397,70 @@ def __init__(self, response): assert "[REDACTED]" in diagnostics assert exc_info.value.__cause__ is None + @patch("extended_data.connectors.slack.WebClient") + def test_call_api_retries_rate_limits_and_groups_success(self, mock_webclient_class, base_connector_kwargs): + """Rate-limited Slack calls should sleep, retry, and group the successful response.""" + + class FakeSlackApiError(Exception): + def __init__(self, response): + self.response = response + + class FakeSlackResponse(dict): + headers = {"Retry-After": "2"} + + mock_user_client = MagicMock() + mock_user_client.users_list.side_effect = [ + FakeSlackApiError(FakeSlackResponse(error="ratelimited")), + {"members": [{"id": "U1", "name": "alice"}]}, + ] + mock_bot_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with ( + patch("extended_data.connectors.slack.SlackApiError", FakeSlackApiError), + patch("extended_data.connectors.slack.sleep") as sleep, + ): + result = connector._call_api("users_list", group_by="members") + + assert result == {"U1": {"id": "U1", "name": "alice"}} + sleep.assert_called_once_with(2) + assert mock_user_client.users_list.call_count == 2 + + @patch("extended_data.connectors.slack.WebClient") + def test_call_api_rate_limit_timeout(self, mock_webclient_class, base_connector_kwargs): + """Repeated rate limits should raise TimeoutError once the retry budget is exceeded.""" + + class FakeSlackApiError(Exception): + def __init__(self, response): + self.response = response + + class FakeSlackResponse(dict): + headers = {"Retry-After": "31"} + + mock_user_client = MagicMock() + mock_user_client.users_list.side_effect = FakeSlackApiError(FakeSlackResponse(error="ratelimited")) + mock_bot_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with ( + patch("extended_data.connectors.slack.SlackApiError", FakeSlackApiError), + pytest.raises(TimeoutError, match="timed out after 31 seconds"), + ): + connector._call_api("users_list") + + @patch("extended_data.connectors.slack.WebClient") + def test_call_api_rejects_unsupported_methods(self, mock_webclient_class, base_connector_kwargs): + """Unsupported WebClient methods should fail explicitly.""" + mock_user_client = MagicMock(spec=[]) + mock_bot_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + with pytest.raises(AttributeError, match="not supported"): + connector._call_api("users_list") + @patch("extended_data.connectors.slack.SlackConnector._call_api") @patch("extended_data.connectors.slack.WebClient") def test_list_users_filters_deleted( @@ -355,6 +502,36 @@ def test_list_users_filters_deleted( team_id="T123", ) + @patch("extended_data.connectors.slack.SlackConnector._call_api") + @patch("extended_data.connectors.slack.WebClient") + def test_list_users_can_include_all_special_accounts( + self, + mock_webclient_class, + mock_call_api, + base_connector_kwargs, + ): + """Explicit inclusion flags should return deleted, bot, and app users unchanged.""" + mock_call_api.return_value = { + "U1": {"id": "U1", "deleted": True}, + "U2": {"id": "U2", "is_workflow_bot": True}, + "U3": {"id": "U3", "is_app_user": True}, + } + mock_user_client = MagicMock() + mock_bot_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + users = connector.list_users( + include_locale=False, + limit=100, + team_id="T123", + include_deleted=True, + include_bots=True, + include_app_users=True, + ) + + assert users == mock_call_api.return_value + @patch("extended_data.connectors.slack.SlackConnector._call_api") @patch("extended_data.connectors.slack.WebClient") def test_list_usergroups_filters_ids( @@ -395,6 +572,30 @@ def test_list_usergroups_filters_ids( team_id="T123", ) + @patch("extended_data.connectors.slack.SlackConnector._call_api") + @patch("extended_data.connectors.slack.WebClient") + def test_list_usergroups_returns_all_without_identifier_filter( + self, + mock_webclient_class, + mock_call_api, + base_connector_kwargs, + ): + """Usergroup listing should return all groups when no ID filter is supplied.""" + mock_call_api.return_value = { + "S1": {"id": "S1", "name": "Ops"}, + "S2": {"id": "S2", "name": "Eng"}, + } + mock_user_client = MagicMock() + mock_bot_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + groups = connector.list_usergroups(usergroup_ids=[" ", ""]) + + assert groups == mock_call_api.return_value + assert SlackConnector._normalize_identifier_filter(["S1", " S2 ", "", "S1"]) == {"S1", "S2"} + assert SlackConnector._normalize_identifier_filter("") is None + @patch("extended_data.connectors.slack.SlackConnector._call_api") @patch("extended_data.connectors.slack.WebClient") def test_list_conversations_channels_only( @@ -436,3 +637,33 @@ def test_list_conversations_channels_only( types="private_channel,public_channel", cursor="cursor123", ) + + @patch("extended_data.connectors.slack.SlackConnector._call_api") + @patch("extended_data.connectors.slack.WebClient") + def test_list_conversations_returns_all_when_not_channels_only( + self, + mock_webclient_class, + mock_call_api, + base_connector_kwargs, + ): + """Conversation listing should preserve non-channel conversations unless filtered.""" + mock_call_api.return_value = { + "C1": {"id": "C1", "is_channel": True}, + "D1": {"id": "D1", "is_channel": False}, + } + mock_user_client = MagicMock() + mock_bot_client = MagicMock() + mock_webclient_class.side_effect = [mock_user_client, mock_bot_client] + connector = SlackConnector(token="test-token", bot_token="bot-token", **base_connector_kwargs) + + conversations = connector.list_conversations( + exclude_archived=False, + limit=100, + team_id="T123", + types="im", + get_members=False, + channels_only=False, + ) + + assert conversations == mock_call_api.return_value + assert mock_call_api.call_args.kwargs["types"] == "im"