diff --git a/py/src/braintrust/integrations/anthropic/_utils.py b/py/src/braintrust/integrations/anthropic/_utils.py index 6e6af181..29a289b8 100644 --- a/py/src/braintrust/integrations/anthropic/_utils.py +++ b/py/src/braintrust/integrations/anthropic/_utils.py @@ -23,9 +23,9 @@ def __getattr__(self, name: str) -> Any: ("cache_creation_input_tokens", "prompt_cache_creation_tokens"), ) -_ANTHROPIC_CACHE_CREATION_METADATA_FIELDS = ( - ("ephemeral_5m_input_tokens", "cache_creation_ephemeral_5m_input_tokens"), - ("ephemeral_1h_input_tokens", "cache_creation_ephemeral_1h_input_tokens"), +_ANTHROPIC_CACHE_CREATION_METRIC_FIELDS = ( + ("ephemeral_5m_input_tokens", "prompt_cache_creation_5m_tokens"), + ("ephemeral_1h_input_tokens", "prompt_cache_creation_1h_tokens"), ) _ANTHROPIC_USAGE_METADATA_FIELDS = frozenset( @@ -71,10 +71,10 @@ def extract_anthropic_usage(usage: Any) -> tuple[dict[str, float], dict[str, Any cache_creation = _try_to_dict(usage.get("cache_creation")) cache_creation_breakdown: list[float] = [] if cache_creation is not None: - for source_name, metadata_key in _ANTHROPIC_CACHE_CREATION_METADATA_FIELDS: + for source_name, metric_name in _ANTHROPIC_CACHE_CREATION_METRIC_FIELDS: value = cache_creation.get(source_name) if is_numeric(value): - metadata[metadata_key] = int(value) + metrics[metric_name] = float(value) cache_creation_breakdown.append(float(value)) server_tool_use = _try_to_dict(usage.get("server_tool_use")) diff --git a/py/src/braintrust/integrations/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_1h_metrics.yaml b/py/src/braintrust/integrations/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_1h_metrics.yaml new file mode 100644 index 00000000..954ac150 --- /dev/null +++ b/py/src/braintrust/integrations/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_1h_metrics.yaml @@ -0,0 +1,34 @@ +interactions: +- request: + body: '{"max_tokens":16,"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"claude-haiku-4-5-20251001","system":[{"type":"text","text":"cached geography facts","cache_control":{"type":"ephemeral","ttl":"1h"}}],"temperature":0}' + headers: + Accept: + - application/json + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.97.0 + anthropic-beta: + - extended-cache-ttl-2025-04-11 + anthropic-version: + - '2023-06-01' + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"model":"claude-haiku-4-5-20251001","id":"msg_prompt_cache_1h","type":"message","role":"assistant","content":[{"type":"text","text":"Paris is the capital of France."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1301,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":1301},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}}' + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 24 Apr 2026 16:19:43 GMT + Server: + - cloudflare + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_5m_metrics.yaml b/py/src/braintrust/integrations/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_5m_metrics.yaml new file mode 100644 index 00000000..56a9ec76 --- /dev/null +++ b/py/src/braintrust/integrations/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_5m_metrics.yaml @@ -0,0 +1,32 @@ +interactions: +- request: + body: '{"max_tokens":16,"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"claude-haiku-4-5-20251001","system":[{"type":"text","text":"cached geography facts","cache_control":{"type":"ephemeral","ttl":"5m"}}],"temperature":0}' + headers: + Accept: + - application/json + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.97.0 + anthropic-version: + - '2023-06-01' + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"model":"claude-haiku-4-5-20251001","id":"msg_prompt_cache_5m","type":"message","role":"assistant","content":[{"type":"text","text":"Paris is the capital of France."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1248,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":1248,"ephemeral_1h_input_tokens":0},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}}' + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 24 Apr 2026 16:19:42 GMT + Server: + - cloudflare + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/anthropic/test_anthropic.py b/py/src/braintrust/integrations/anthropic/test_anthropic.py index ecaf3780..7865e85e 100644 --- a/py/src/braintrust/integrations/anthropic/test_anthropic.py +++ b/py/src/braintrust/integrations/anthropic/test_anthropic.py @@ -41,6 +41,7 @@ } PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==" PDF_BASE64 = "JVBERi0xLjAKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PmVuZG9iagoyIDAgb2JqCjw8L1R5cGUvUGFnZXMvS2lkc1szIDAgUl0vQ291bnQgMT4+ZW5kb2JqCjMgMCBvYmoKPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCA2MTIgNzkyXT4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZg0KMDAwMDAwMDAxMCAwMDAwMCBuDQowMDAwMDAwMDUzIDAwMDAwIG4NCjAwMDAwMDAxMDIgMDAwMDAgbg0KdHJhaWxlcgo8PC9TaXplIDQvUm9vdCAxIDAgUj4+CnN0YXJ0eHJlZgoxNDkKJUVPRg==" +PROMPT_CACHE_TEST_TEXT = "\n".join(f"Cached geography fact {i}: Paris is the capital of France." for i in range(300)) def _get_client(): @@ -337,17 +338,80 @@ def to_dict(self): "completion_tokens": 7.0, "prompt_cached_tokens": 3.0, "prompt_cache_creation_tokens": 7.0, + "prompt_cache_creation_5m_tokens": 2.0, + "prompt_cache_creation_1h_tokens": 5.0, "server_tool_use_web_search_requests": 2.0, "server_tool_use_web_fetch_requests": 1.0, "tokens": 28.0, } assert metadata == { - "cache_creation_ephemeral_5m_input_tokens": 2, - "cache_creation_ephemeral_1h_input_tokens": 5, "usage_service_tier": "standard", } +@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"]) +def test_anthropic_messages_create_prompt_cache_5m_metrics(memory_logger): + if os.environ.get("BRAINTRUST_TEST_PACKAGE_VERSION") != "latest": + pytest.skip("Prompt cache TTL breakdown requires the latest Anthropic SDK cassette") + + client = wrap_anthropic(_get_client()) + response = client.messages.create( + model=LATEST_MODEL, + max_tokens=16, + temperature=0, + system=[ + { + "type": "text", + "text": PROMPT_CACHE_TEST_TEXT, + "cache_control": {"type": "ephemeral", "ttl": "5m"}, + } + ], + messages=[{"role": "user", "content": "What is the capital of France?"}], + ) + + span = find_span_by_name(memory_logger.pop(), "anthropic.messages.create") + assert span["output"]["role"] == response.role + assert span["metrics"]["prompt_cache_creation_tokens"] == response.usage.cache_creation_input_tokens + assert ( + span["metrics"]["prompt_cache_creation_5m_tokens"] == response.usage.cache_creation.ephemeral_5m_input_tokens + ) + assert ( + span["metrics"]["prompt_cache_creation_1h_tokens"] == response.usage.cache_creation.ephemeral_1h_input_tokens + ) + + +@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"]) +def test_anthropic_messages_create_prompt_cache_1h_metrics(memory_logger): + if os.environ.get("BRAINTRUST_TEST_PACKAGE_VERSION") != "latest": + pytest.skip("Prompt cache TTL breakdown requires the latest Anthropic SDK cassette") + + client = wrap_anthropic(_get_client()) + response = client.messages.create( + model=LATEST_MODEL, + max_tokens=16, + temperature=0, + extra_headers={"anthropic-beta": "extended-cache-ttl-2025-04-11"}, + system=[ + { + "type": "text", + "text": PROMPT_CACHE_TEST_TEXT, + "cache_control": {"type": "ephemeral", "ttl": "1h"}, + } + ], + messages=[{"role": "user", "content": "What is the capital of France?"}], + ) + + span = find_span_by_name(memory_logger.pop(), "anthropic.messages.create") + assert span["output"]["role"] == response.role + assert span["metrics"]["prompt_cache_creation_tokens"] == response.usage.cache_creation_input_tokens + assert ( + span["metrics"]["prompt_cache_creation_5m_tokens"] == response.usage.cache_creation.ephemeral_5m_input_tokens + ) + assert ( + span["metrics"]["prompt_cache_creation_1h_tokens"] == response.usage.cache_creation.ephemeral_1h_input_tokens + ) + + @pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"]) def test_anthropic_messages_create_with_image_attachment_input(memory_logger): assert not memory_logger.pop() @@ -1279,8 +1343,8 @@ def test_setup_creates_spans(memory_logger): ) assert metrics["completion_tokens"] == usage.output_tokens assert metrics["prompt_cache_creation_tokens"] == usage.cache_creation_input_tokens - assert span["metadata"]["cache_creation_ephemeral_5m_input_tokens"] == ephemeral_5m - assert span["metadata"]["cache_creation_ephemeral_1h_input_tokens"] == ephemeral_1h + assert metrics["prompt_cache_creation_5m_tokens"] == ephemeral_5m + assert metrics["prompt_cache_creation_1h_tokens"] == ephemeral_1h assert "service_tier" not in metrics @@ -1310,14 +1374,12 @@ def test_extract_anthropic_usage_preserves_nested_numeric_fields(): assert metrics["completion_tokens"] == 12 assert metrics["tokens"] == 27 assert metrics["prompt_cache_creation_tokens"] == 7 - assert metadata["cache_creation_ephemeral_5m_input_tokens"] == 3 - assert metadata["cache_creation_ephemeral_1h_input_tokens"] == 4 + assert metrics["prompt_cache_creation_5m_tokens"] == 3 + assert metrics["prompt_cache_creation_1h_tokens"] == 4 assert metrics["server_tool_use_web_search_requests"] == 2 assert metrics["server_tool_use_web_fetch_requests"] == 1 assert "service_tier" not in metrics assert metadata == { - "cache_creation_ephemeral_5m_input_tokens": 3, - "cache_creation_ephemeral_1h_input_tokens": 4, "usage_service_tier": "standard", "usage_inference_geo": "not_available", }