braintrustdata · Abhijeet Prasad (AbhiPrasad) · Apr 24, 2026
diff --git a/py/src/braintrust/integrations/anthropic/_utils.py b/py/src/braintrust/integrations/anthropic/_utils.py
@@ -23,9 +23,9 @@ def __getattr__(self, name: str) -> Any:
     ("cache_creation_input_tokens", "prompt_cache_creation_tokens"),
 )
 
-_ANTHROPIC_CACHE_CREATION_METADATA_FIELDS = (
-    ("ephemeral_5m_input_tokens", "cache_creation_ephemeral_5m_input_tokens"),
-    ("ephemeral_1h_input_tokens", "cache_creation_ephemeral_1h_input_tokens"),
+_ANTHROPIC_CACHE_CREATION_METRIC_FIELDS = (
+    ("ephemeral_5m_input_tokens", "prompt_cache_creation_5m_tokens"),
+    ("ephemeral_1h_input_tokens", "prompt_cache_creation_1h_tokens"),
 )
 
 _ANTHROPIC_USAGE_METADATA_FIELDS = frozenset(
@@ -71,10 +71,10 @@ def extract_anthropic_usage(usage: Any) -> tuple[dict[str, float], dict[str, Any
     cache_creation = _try_to_dict(usage.get("cache_creation"))
     cache_creation_breakdown: list[float] = []
     if cache_creation is not None:
-        for source_name, metadata_key in _ANTHROPIC_CACHE_CREATION_METADATA_FIELDS:
+        for source_name, metric_name in _ANTHROPIC_CACHE_CREATION_METRIC_FIELDS:
             value = cache_creation.get(source_name)
             if is_numeric(value):
-                metadata[metadata_key] = int(value)
+                metrics[metric_name] = float(value)
                 cache_creation_breakdown.append(float(value))
 
     server_tool_use = _try_to_dict(usage.get("server_tool_use"))

diff --git a/...ns/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_1h_metrics.yaml b/...ns/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_1h_metrics.yaml
@@ -0,0 +1,34 @@
+interactions:
+- request:
+    body: '{"max_tokens":16,"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"claude-haiku-4-5-20251001","system":[{"type":"text","text":"cached geography facts","cache_control":{"type":"ephemeral","ttl":"1h"}}],"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Content-Type:
+      - application/json
+      Host:
+      - api.anthropic.com
+      User-Agent:
+      - Anthropic/Python 0.97.0
+      anthropic-beta:
+      - extended-cache-ttl-2025-04-11
+      anthropic-version:
+      - '2023-06-01'
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-haiku-4-5-20251001","id":"msg_prompt_cache_1h","type":"message","role":"assistant","content":[{"type":"text","text":"Paris is the capital of France."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1301,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":1301},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 24 Apr 2026 16:19:43 GMT
+      Server:
+      - cloudflare
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/...ns/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_5m_metrics.yaml b/...ns/anthropic/cassettes/latest/test_anthropic_messages_create_prompt_cache_5m_metrics.yaml
@@ -0,0 +1,32 @@
+interactions:
+- request:
+    body: '{"max_tokens":16,"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"claude-haiku-4-5-20251001","system":[{"type":"text","text":"cached geography facts","cache_control":{"type":"ephemeral","ttl":"5m"}}],"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Content-Type:
+      - application/json
+      Host:
+      - api.anthropic.com
+      User-Agent:
+      - Anthropic/Python 0.97.0
+      anthropic-version:
+      - '2023-06-01'
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-haiku-4-5-20251001","id":"msg_prompt_cache_5m","type":"message","role":"assistant","content":[{"type":"text","text":"Paris is the capital of France."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1248,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":1248,"ephemeral_1h_input_tokens":0},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 24 Apr 2026 16:19:42 GMT
+      Server:
+      - cloudflare
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/anthropic/test_anthropic.py b/py/src/braintrust/integrations/anthropic/test_anthropic.py
@@ -41,6 +41,7 @@
 }
 PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
 PDF_BASE64 = "JVBERi0xLjAKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PmVuZG9iagoyIDAgb2JqCjw8L1R5cGUvUGFnZXMvS2lkc1szIDAgUl0vQ291bnQgMT4+ZW5kb2JqCjMgMCBvYmoKPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCA2MTIgNzkyXT4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZg0KMDAwMDAwMDAxMCAwMDAwMCBuDQowMDAwMDAwMDUzIDAwMDAwIG4NCjAwMDAwMDAxMDIgMDAwMDAgbg0KdHJhaWxlcgo8PC9TaXplIDQvUm9vdCAxIDAgUj4+CnN0YXJ0eHJlZgoxNDkKJUVPRg=="
+PROMPT_CACHE_TEST_TEXT = "\n".join(f"Cached geography fact {i}: Paris is the capital of France." for i in range(300))
 
 
 def _get_client():
@@ -337,17 +338,80 @@ def to_dict(self):
         "completion_tokens": 7.0,
         "prompt_cached_tokens": 3.0,
         "prompt_cache_creation_tokens": 7.0,
+        "prompt_cache_creation_5m_tokens": 2.0,
+        "prompt_cache_creation_1h_tokens": 5.0,
         "server_tool_use_web_search_requests": 2.0,
         "server_tool_use_web_fetch_requests": 1.0,
         "tokens": 28.0,
     }
     assert metadata == {
-        "cache_creation_ephemeral_5m_input_tokens": 2,
-        "cache_creation_ephemeral_1h_input_tokens": 5,
         "usage_service_tier": "standard",
     }
 
 
+@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"])
+def test_anthropic_messages_create_prompt_cache_5m_metrics(memory_logger):
+    if os.environ.get("BRAINTRUST_TEST_PACKAGE_VERSION") != "latest":
+        pytest.skip("Prompt cache TTL breakdown requires the latest Anthropic SDK cassette")
+
+    client = wrap_anthropic(_get_client())
+    response = client.messages.create(
+        model=LATEST_MODEL,
+        max_tokens=16,
+        temperature=0,
+        system=[
+            {
+                "type": "text",
+                "text": PROMPT_CACHE_TEST_TEXT,
+                "cache_control": {"type": "ephemeral", "ttl": "5m"},
+            }
+        ],
+        messages=[{"role": "user", "content": "What is the capital of France?"}],
+    )
+
+    span = find_span_by_name(memory_logger.pop(), "anthropic.messages.create")
+    assert span["output"]["role"] == response.role
+    assert span["metrics"]["prompt_cache_creation_tokens"] == response.usage.cache_creation_input_tokens
+    assert (
+        span["metrics"]["prompt_cache_creation_5m_tokens"] == response.usage.cache_creation.ephemeral_5m_input_tokens
+    )
+    assert (
+        span["metrics"]["prompt_cache_creation_1h_tokens"] == response.usage.cache_creation.ephemeral_1h_input_tokens
+    )
+
+
+@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"])
+def test_anthropic_messages_create_prompt_cache_1h_metrics(memory_logger):
+    if os.environ.get("BRAINTRUST_TEST_PACKAGE_VERSION") != "latest":
+        pytest.skip("Prompt cache TTL breakdown requires the latest Anthropic SDK cassette")
+
+    client = wrap_anthropic(_get_client())
+    response = client.messages.create(
+        model=LATEST_MODEL,
+        max_tokens=16,
+        temperature=0,
+        extra_headers={"anthropic-beta": "extended-cache-ttl-2025-04-11"},
+        system=[
+            {
+                "type": "text",
+                "text": PROMPT_CACHE_TEST_TEXT,
+                "cache_control": {"type": "ephemeral", "ttl": "1h"},
+            }
+        ],
+        messages=[{"role": "user", "content": "What is the capital of France?"}],
+    )
+
+    span = find_span_by_name(memory_logger.pop(), "anthropic.messages.create")
+    assert span["output"]["role"] == response.role
+    assert span["metrics"]["prompt_cache_creation_tokens"] == response.usage.cache_creation_input_tokens
+    assert (
+        span["metrics"]["prompt_cache_creation_5m_tokens"] == response.usage.cache_creation.ephemeral_5m_input_tokens
+    )
+    assert (
+        span["metrics"]["prompt_cache_creation_1h_tokens"] == response.usage.cache_creation.ephemeral_1h_input_tokens
+    )
+
+
 @pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"])
 def test_anthropic_messages_create_with_image_attachment_input(memory_logger):
     assert not memory_logger.pop()
@@ -1279,8 +1343,8 @@ def test_setup_creates_spans(memory_logger):
     )
     assert metrics["completion_tokens"] == usage.output_tokens
     assert metrics["prompt_cache_creation_tokens"] == usage.cache_creation_input_tokens
-    assert span["metadata"]["cache_creation_ephemeral_5m_input_tokens"] == ephemeral_5m
-    assert span["metadata"]["cache_creation_ephemeral_1h_input_tokens"] == ephemeral_1h
+    assert metrics["prompt_cache_creation_5m_tokens"] == ephemeral_5m
+    assert metrics["prompt_cache_creation_1h_tokens"] == ephemeral_1h
     assert "service_tier" not in metrics
 
 
@@ -1310,14 +1374,12 @@ def test_extract_anthropic_usage_preserves_nested_numeric_fields():
     assert metrics["completion_tokens"] == 12
     assert metrics["tokens"] == 27
     assert metrics["prompt_cache_creation_tokens"] == 7
-    assert metadata["cache_creation_ephemeral_5m_input_tokens"] == 3
-    assert metadata["cache_creation_ephemeral_1h_input_tokens"] == 4
+    assert metrics["prompt_cache_creation_5m_tokens"] == 3
+    assert metrics["prompt_cache_creation_1h_tokens"] == 4
     assert metrics["server_tool_use_web_search_requests"] == 2
     assert metrics["server_tool_use_web_fetch_requests"] == 1
     assert "service_tier" not in metrics
     assert metadata == {
-        "cache_creation_ephemeral_5m_input_tokens": 3,
-        "cache_creation_ephemeral_1h_input_tokens": 4,
         "usage_service_tier": "standard",
         "usage_inference_geo": "not_available",
     }