Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions py/src/braintrust/integrations/anthropic/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def __getattr__(self, name: str) -> Any:
("cache_creation_input_tokens", "prompt_cache_creation_tokens"),
)

_ANTHROPIC_CACHE_CREATION_METADATA_FIELDS = (
("ephemeral_5m_input_tokens", "cache_creation_ephemeral_5m_input_tokens"),
("ephemeral_1h_input_tokens", "cache_creation_ephemeral_1h_input_tokens"),
_ANTHROPIC_CACHE_CREATION_METRIC_FIELDS = (
("ephemeral_5m_input_tokens", "prompt_cache_creation_5m_tokens"),
("ephemeral_1h_input_tokens", "prompt_cache_creation_1h_tokens"),
)

_ANTHROPIC_USAGE_METADATA_FIELDS = frozenset(
Expand Down Expand Up @@ -71,10 +71,10 @@ def extract_anthropic_usage(usage: Any) -> tuple[dict[str, float], dict[str, Any
cache_creation = _try_to_dict(usage.get("cache_creation"))
cache_creation_breakdown: list[float] = []
if cache_creation is not None:
for source_name, metadata_key in _ANTHROPIC_CACHE_CREATION_METADATA_FIELDS:
for source_name, metric_name in _ANTHROPIC_CACHE_CREATION_METRIC_FIELDS:
value = cache_creation.get(source_name)
if is_numeric(value):
metadata[metadata_key] = int(value)
metrics[metric_name] = float(value)
cache_creation_breakdown.append(float(value))

server_tool_use = _try_to_dict(usage.get("server_tool_use"))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
interactions:
- request:
body: '{"max_tokens":16,"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"claude-haiku-4-5-20251001","system":[{"type":"text","text":"cached geography facts","cache_control":{"type":"ephemeral","ttl":"1h"}}],"temperature":0}'
headers:
Accept:
- application/json
Content-Type:
- application/json
Host:
- api.anthropic.com
User-Agent:
- Anthropic/Python 0.97.0
anthropic-beta:
- extended-cache-ttl-2025-04-11
anthropic-version:
- '2023-06-01'
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: '{"model":"claude-haiku-4-5-20251001","id":"msg_prompt_cache_1h","type":"message","role":"assistant","content":[{"type":"text","text":"Paris is the capital of France."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1301,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":1301},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}}'
headers:
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Fri, 24 Apr 2026 16:19:43 GMT
Server:
- cloudflare
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
interactions:
- request:
body: '{"max_tokens":16,"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"claude-haiku-4-5-20251001","system":[{"type":"text","text":"cached geography facts","cache_control":{"type":"ephemeral","ttl":"5m"}}],"temperature":0}'
headers:
Accept:
- application/json
Content-Type:
- application/json
Host:
- api.anthropic.com
User-Agent:
- Anthropic/Python 0.97.0
anthropic-version:
- '2023-06-01'
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: '{"model":"claude-haiku-4-5-20251001","id":"msg_prompt_cache_5m","type":"message","role":"assistant","content":[{"type":"text","text":"Paris is the capital of France."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":12,"cache_creation_input_tokens":1248,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":1248,"ephemeral_1h_input_tokens":0},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}}'
headers:
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Fri, 24 Apr 2026 16:19:42 GMT
Server:
- cloudflare
status:
code: 200
message: OK
version: 1
78 changes: 70 additions & 8 deletions py/src/braintrust/integrations/anthropic/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
}
PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
PDF_BASE64 = "JVBERi0xLjAKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PmVuZG9iagoyIDAgb2JqCjw8L1R5cGUvUGFnZXMvS2lkc1szIDAgUl0vQ291bnQgMT4+ZW5kb2JqCjMgMCBvYmoKPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCA2MTIgNzkyXT4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZg0KMDAwMDAwMDAxMCAwMDAwMCBuDQowMDAwMDAwMDUzIDAwMDAwIG4NCjAwMDAwMDAxMDIgMDAwMDAgbg0KdHJhaWxlcgo8PC9TaXplIDQvUm9vdCAxIDAgUj4+CnN0YXJ0eHJlZgoxNDkKJUVPRg=="
PROMPT_CACHE_TEST_TEXT = "\n".join(f"Cached geography fact {i}: Paris is the capital of France." for i in range(300))


def _get_client():
Expand Down Expand Up @@ -337,17 +338,80 @@ def to_dict(self):
"completion_tokens": 7.0,
"prompt_cached_tokens": 3.0,
"prompt_cache_creation_tokens": 7.0,
"prompt_cache_creation_5m_tokens": 2.0,
"prompt_cache_creation_1h_tokens": 5.0,
"server_tool_use_web_search_requests": 2.0,
"server_tool_use_web_fetch_requests": 1.0,
"tokens": 28.0,
}
assert metadata == {
"cache_creation_ephemeral_5m_input_tokens": 2,
"cache_creation_ephemeral_1h_input_tokens": 5,
"usage_service_tier": "standard",
}


@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"])
def test_anthropic_messages_create_prompt_cache_5m_metrics(memory_logger):
if os.environ.get("BRAINTRUST_TEST_PACKAGE_VERSION") != "latest":
pytest.skip("Prompt cache TTL breakdown requires the latest Anthropic SDK cassette")

client = wrap_anthropic(_get_client())
response = client.messages.create(
model=LATEST_MODEL,
max_tokens=16,
temperature=0,
system=[
{
"type": "text",
"text": PROMPT_CACHE_TEST_TEXT,
"cache_control": {"type": "ephemeral", "ttl": "5m"},
}
],
messages=[{"role": "user", "content": "What is the capital of France?"}],
)

span = find_span_by_name(memory_logger.pop(), "anthropic.messages.create")
assert span["output"]["role"] == response.role
assert span["metrics"]["prompt_cache_creation_tokens"] == response.usage.cache_creation_input_tokens
assert (
span["metrics"]["prompt_cache_creation_5m_tokens"] == response.usage.cache_creation.ephemeral_5m_input_tokens
)
assert (
span["metrics"]["prompt_cache_creation_1h_tokens"] == response.usage.cache_creation.ephemeral_1h_input_tokens
)


@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"])
def test_anthropic_messages_create_prompt_cache_1h_metrics(memory_logger):
if os.environ.get("BRAINTRUST_TEST_PACKAGE_VERSION") != "latest":
pytest.skip("Prompt cache TTL breakdown requires the latest Anthropic SDK cassette")

client = wrap_anthropic(_get_client())
response = client.messages.create(
model=LATEST_MODEL,
max_tokens=16,
temperature=0,
extra_headers={"anthropic-beta": "extended-cache-ttl-2025-04-11"},
system=[
{
"type": "text",
"text": PROMPT_CACHE_TEST_TEXT,
"cache_control": {"type": "ephemeral", "ttl": "1h"},
}
],
messages=[{"role": "user", "content": "What is the capital of France?"}],
)

span = find_span_by_name(memory_logger.pop(), "anthropic.messages.create")
assert span["output"]["role"] == response.role
assert span["metrics"]["prompt_cache_creation_tokens"] == response.usage.cache_creation_input_tokens
assert (
span["metrics"]["prompt_cache_creation_5m_tokens"] == response.usage.cache_creation.ephemeral_5m_input_tokens
)
assert (
span["metrics"]["prompt_cache_creation_1h_tokens"] == response.usage.cache_creation.ephemeral_1h_input_tokens
)


@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"])
def test_anthropic_messages_create_with_image_attachment_input(memory_logger):
assert not memory_logger.pop()
Expand Down Expand Up @@ -1279,8 +1343,8 @@ def test_setup_creates_spans(memory_logger):
)
assert metrics["completion_tokens"] == usage.output_tokens
assert metrics["prompt_cache_creation_tokens"] == usage.cache_creation_input_tokens
assert span["metadata"]["cache_creation_ephemeral_5m_input_tokens"] == ephemeral_5m
assert span["metadata"]["cache_creation_ephemeral_1h_input_tokens"] == ephemeral_1h
assert metrics["prompt_cache_creation_5m_tokens"] == ephemeral_5m
assert metrics["prompt_cache_creation_1h_tokens"] == ephemeral_1h
assert "service_tier" not in metrics


Expand Down Expand Up @@ -1310,14 +1374,12 @@ def test_extract_anthropic_usage_preserves_nested_numeric_fields():
assert metrics["completion_tokens"] == 12
assert metrics["tokens"] == 27
assert metrics["prompt_cache_creation_tokens"] == 7
assert metadata["cache_creation_ephemeral_5m_input_tokens"] == 3
assert metadata["cache_creation_ephemeral_1h_input_tokens"] == 4
assert metrics["prompt_cache_creation_5m_tokens"] == 3
assert metrics["prompt_cache_creation_1h_tokens"] == 4
assert metrics["server_tool_use_web_search_requests"] == 2
assert metrics["server_tool_use_web_fetch_requests"] == 1
assert "service_tier" not in metrics
assert metadata == {
"cache_creation_ephemeral_5m_input_tokens": 3,
"cache_creation_ephemeral_1h_input_tokens": 4,
"usage_service_tier": "standard",
"usage_inference_geo": "not_available",
}
Expand Down