From 1bfdb56a2c68757d8706e29f25824fb6cd8257ab Mon Sep 17 00:00:00 2001 From: Priyanka-Microsoft Date: Mon, 25 May 2026 14:44:56 +0530 Subject: [PATCH 1/5] token usage --- infra/dashboards/deploy-workbooks.ps1 | 86 ++++ infra/dashboards/token-usage-queries.kql | 113 +++++ infra/dashboards/workbook-eks-content.json | 1 + infra/dashboards/workbook-gke-content.json | 1 + infra/main.bicep | 12 +- infra/main.parameters.json | 23 +- infra/main_custom.bicep | 12 +- infra/modules/tokenUsageWorkbook.bicep | 458 ++++++++++++++++++ src/frontend/Dockerfile | 1 + src/processor/Dockerfile | 3 +- src/processor/pyproject.toml | 2 + .../azure_openai_response_retry.py | 181 ++++++- .../agent_framework/groupchat_orchestrator.py | 164 +++++++ .../src/libs/base/orchestrator_base.py | 23 +- src/processor/src/main.py | 29 ++ src/processor/src/main_service.py | 33 ++ src/processor/src/services/queue_service.py | 1 + .../src/steps/analysis/models/step_param.py | 1 + .../orchestration/analysis_orchestrator.py | 1 + .../yaml_convert_orchestrator.py | 1 + .../orchestration/design_orchestrator.py | 1 + .../documentation_orchestrator.py | 1 + .../src/steps/migration_processor.py | 22 + src/processor/src/utils/agent_telemetry.py | 66 +++ src/processor/src/utils/event_utils.py | 64 +++ .../src/utils/token_usage_tracker.py | 403 +++++++++++++++ 26 files changed, 1694 insertions(+), 9 deletions(-) create mode 100644 infra/dashboards/deploy-workbooks.ps1 create mode 100644 infra/dashboards/token-usage-queries.kql create mode 100644 infra/dashboards/workbook-eks-content.json create mode 100644 infra/dashboards/workbook-gke-content.json create mode 100644 infra/modules/tokenUsageWorkbook.bicep create mode 100644 src/processor/src/utils/event_utils.py create mode 100644 src/processor/src/utils/token_usage_tracker.py diff --git a/infra/dashboards/deploy-workbooks.ps1 b/infra/dashboards/deploy-workbooks.ps1 new file mode 100644 index 00000000..caa88198 --- /dev/null +++ b/infra/dashboards/deploy-workbooks.ps1 @@ -0,0 +1,86 @@ +# ============================================================= +# LLM Token Usage Workbook Deployment Script +# ============================================================= +# Usage: +# .\deploy-workbooks.ps1 -ResourceGroup -AppInsightsResourceId [-Location ] +# +# Example: +# .\deploy-workbooks.ps1 ` +# -ResourceGroup "rg-my-permanent-rg" ` +# -AppInsightsResourceId "/subscriptions//resourcegroups//providers/microsoft.insights/components/" ` +# -Location "australiaeast" +# ============================================================= + +param( + [Parameter(Mandatory=$true)] + [string]$ResourceGroup, + + [Parameter(Mandatory=$true)] + [string]$AppInsightsResourceId, + + [string]$Location = "australiaeast" +) + +$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path + +# Deploy GKE workbook +$gkeContent = Get-Content "$scriptDir\workbook-gke-content.json" -Raw +$gkeId = [guid]::NewGuid().ToString() + +$body = @{ + location = $Location + kind = "shared" + properties = @{ + displayName = "LLM Token Usage Dashboard - GKE" + serializedData = $gkeContent + version = "Notebook/1.0" + sourceId = $AppInsightsResourceId + category = "workbook" + } + tags = @{ + "hidden-title" = "LLM Token Usage Dashboard - GKE" + } +} | ConvertTo-Json -Depth 5 + +$bodyFile = [System.IO.Path]::GetTempFileName() +$body | Set-Content $bodyFile -Encoding UTF8 + +az rest --method PUT ` + --url "https://management.azure.com/subscriptions/$(az account show --query id -o tsv)/resourceGroups/$ResourceGroup/providers/microsoft.insights/workbooks/$($gkeId)?api-version=2022-04-01" ` + --body "@$bodyFile" ` + --headers "Content-Type=application/json" 2>&1 | Out-Null + +Write-Host "Deployed GKE workbook: $gkeId" +Remove-Item $bodyFile + +# Deploy EKS workbook +$eksContent = Get-Content "$scriptDir\workbook-eks-content.json" -Raw +$eksId = [guid]::NewGuid().ToString() + +$body = @{ + location = $Location + kind = "shared" + properties = @{ + displayName = "LLM Token Usage Dashboard - EKS" + serializedData = $eksContent + version = "Notebook/1.0" + sourceId = $AppInsightsResourceId + category = "workbook" + } + tags = @{ + "hidden-title" = "LLM Token Usage Dashboard - EKS" + } +} | ConvertTo-Json -Depth 5 + +$bodyFile = [System.IO.Path]::GetTempFileName() +$body | Set-Content $bodyFile -Encoding UTF8 + +az rest --method PUT ` + --url "https://management.azure.com/subscriptions/$(az account show --query id -o tsv)/resourceGroups/$ResourceGroup/providers/microsoft.insights/workbooks/$($eksId)?api-version=2022-04-01" ` + --body "@$bodyFile" ` + --headers "Content-Type=application/json" 2>&1 | Out-Null + +Write-Host "Deployed EKS workbook: $eksId" +Remove-Item $bodyFile + +Write-Host "`nDone! Both workbooks deployed to $ResourceGroup" diff --git a/infra/dashboards/token-usage-queries.kql b/infra/dashboards/token-usage-queries.kql new file mode 100644 index 00000000..38a0fc19 --- /dev/null +++ b/infra/dashboards/token-usage-queries.kql @@ -0,0 +1,113 @@ +// ============================================================================= +// LLM Token Usage Dashboard Queries for Application Insights +// ============================================================================= +// These KQL queries can be used in Azure Application Insights / Log Analytics +// to visualize token usage across agents, models, steps, and users. +// ============================================================================= + +// ---- 1. Overall Token Usage Summary (last 24h) ---- +customEvents +| where name == "LLM_Token_Usage_Summary" +| where timestamp > ago(24h) +| extend process_id = tostring(customDimensions.process_id), + total_input = toint(customDimensions.total_input_tokens), + total_output = toint(customDimensions.total_output_tokens), + total = toint(customDimensions.total_tokens), + call_count = toint(customDimensions.total_calls) +| project timestamp, process_id, total_input, total_output, total, call_count +| order by timestamp desc + +// ---- 2. Per-Agent Token Usage ---- +customEvents +| where name == "LLM_Agent_Token_Usage" +| where timestamp > ago(24h) +| extend agent_name = tostring(customDimensions.agent_name), + input_tokens = toint(customDimensions.input_tokens), + output_tokens = toint(customDimensions.output_tokens), + total_tokens = toint(customDimensions.total_tokens), + calls = toint(customDimensions.call_count), + process_id = tostring(customDimensions.process_id) +| summarize total_input = sum(input_tokens), + total_output = sum(output_tokens), + total = sum(total_tokens), + total_calls = sum(calls) + by agent_name +| order by total desc + +// ---- 3. Per-Model Token Usage ---- +customEvents +| where name == "LLM_Model_Token_Usage" +| where timestamp > ago(24h) +| extend model_name = tostring(customDimensions.model_deployment_name), + input_tokens = toint(customDimensions.input_tokens), + output_tokens = toint(customDimensions.output_tokens), + total_tokens = toint(customDimensions.total_tokens), + calls = toint(customDimensions.call_count), + process_id = tostring(customDimensions.process_id) +| summarize total_input = sum(input_tokens), + total_output = sum(output_tokens), + total = sum(total_tokens), + total_calls = sum(calls) + by model_name +| order by total desc + +// ---- 4. Per-Step (Team) Token Usage ---- +customEvents +| where name == "LLM_Step_Token_Usage" +| where timestamp > ago(24h) +| extend step_name = tostring(customDimensions.step_name), + input_tokens = toint(customDimensions.input_tokens), + output_tokens = toint(customDimensions.output_tokens), + total_tokens = toint(customDimensions.total_tokens), + calls = toint(customDimensions.call_count), + process_id = tostring(customDimensions.process_id) +| summarize total_input = sum(input_tokens), + total_output = sum(output_tokens), + total = sum(total_tokens), + total_calls = sum(calls) + by step_name +| order by total desc + +// ---- 5. Per-User Token Usage (requires user_id in process telemetry) ---- +customEvents +| where name == "LLM_Token_Usage_Summary" +| where timestamp > ago(24h) +| extend process_id = tostring(customDimensions.process_id), + total_tokens = toint(customDimensions.total_tokens), + user_id = tostring(customDimensions.user_id) +| summarize total = sum(total_tokens), runs = count() by user_id +| order by total desc + +// ---- 6. Individual LLM Call Log ---- +customEvents +| where name == "LLM_Token_Usage" +| where timestamp > ago(24h) +| extend agent_name = tostring(customDimensions.agent_name), + step_name = tostring(customDimensions.step_name), + model = tostring(customDimensions.model_deployment_name), + input_tokens = toint(customDimensions.input_tokens), + output_tokens = toint(customDimensions.output_tokens), + total_tokens = toint(customDimensions.total_tokens), + process_id = tostring(customDimensions.process_id) +| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens +| order by timestamp desc + +// ---- 7. Hourly Token Usage Trend ---- +customEvents +| where name == "LLM_Token_Usage" +| where timestamp > ago(7d) +| extend total_tokens = toint(customDimensions.total_tokens) +| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h) +| order by timestamp asc +| render timechart + +// ---- 8. Estimated Cost (GPT-4o pricing: $2.50/1M input, $10/1M output) ---- +customEvents +| where name == "LLM_Token_Usage_Summary" +| where timestamp > ago(24h) +| extend process_id = tostring(customDimensions.process_id), + input_tokens = toint(customDimensions.total_input_tokens), + output_tokens = toint(customDimensions.total_output_tokens) +| extend estimated_cost_usd = (input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0) +| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd +| order by estimated_cost_usd desc diff --git a/infra/dashboards/workbook-eks-content.json b/infra/dashboards/workbook-eks-content.json new file mode 100644 index 00000000..04433e99 --- /dev/null +++ b/infra/dashboards/workbook-eks-content.json @@ -0,0 +1 @@ +{"version":"Notebook/1.0","items":[{"type":1,"content":{"json":"# LLM Token Usage Dashboard\n\nThis workbook provides comprehensive visibility into LLM token consumption across agents, models, workflow steps, and users.\n\n---"},"name":"header"},{"type":9,"content":{"version":"KqlParameterItem/1.0","parameters":[{"id":"time-range-param","version":"KqlParameterItem/1.0","name":"TimeRange","type":4,"isRequired":true,"value":{"durationMs":1800000,"endTime":"2026-05-21T06:50:00.000Z"},"typeSettings":{"selectableValues":[{"durationMs":3600000},{"durationMs":14400000},{"durationMs":86400000},{"durationMs":259200000},{"durationMs":604800000},{"durationMs":2592000000}],"allowCustom":true},"label":"Time Range"}],"style":"pills","queryType":0,"resourceType":"microsoft.insights/components"},"name":"parameters"},{"type":1,"content":{"json":"## Overall Token Usage Summary"},"name":"summary-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| summarize \n total_input = sum(toint(customDimensions.total_input_tokens)),\n total_output = sum(toint(customDimensions.total_output_tokens)),\n total = sum(toint(customDimensions.total_tokens)),\n total_calls = sum(toint(customDimensions.total_calls)),\n processes = dcount(tostring(customDimensions.process_id))","size":4,"title":"Token Usage Totals","queryType":0,"resourceType":"microsoft.insights/components","visualization":"tiles","tileSettings":{"titleContent":{"columnMatch":"Column1","formatter":1},"leftContent":{"columnMatch":"total","formatter":12,"formatOptions":{"palette":"auto"},"numberFormat":{"unit":0,"options":{"style":"decimal","maximumFractionDigits":0}}},"showBorder":true}},"name":"summary-tiles"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_input = toint(customDimensions.total_input_tokens),\n total_output = toint(customDimensions.total_output_tokens),\n total = toint(customDimensions.total_tokens),\n call_count = toint(customDimensions.total_calls)\n| project timestamp, process_id, total_input, total_output, total, call_count\n| order by timestamp desc","size":0,"title":"Token Usage by Process","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"summary-table"},{"type":1,"content":{"json":"## Per-Agent Token Usage"},"name":"agent-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by agent_name\n| order by total desc","size":0,"title":"Token Consumption by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"customWidth":"50","name":"agent-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by agent_name\n| order by total desc","size":0,"title":"Token Distribution by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"agent-chart"},{"type":1,"content":{"json":"## Per-Model Token Usage"},"name":"model-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by model_name\n| order by total desc","size":0,"title":"Token Consumption by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"green"}}]}},"customWidth":"50","name":"model-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by model_name\n| order by total desc","size":0,"title":"Token Distribution by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"model-chart"},{"type":1,"content":{"json":"## Per-Step (Team) Token Usage"},"name":"step-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Token Consumption by Workflow Step","queryType":0,"resourceType":"microsoft.insights/components","visualization":"barchart","chartSettings":{"xAxis":"step_name","yAxis":"total","group":"step_name"}},"customWidth":"50","name":"step-chart"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Step Usage Details","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"orange"}}]}},"customWidth":"50","name":"step-table"},{"type":1,"content":{"json":"## Per-User Token Usage"},"name":"user-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_tokens = toint(customDimensions.total_tokens),\n user_id = tostring(customDimensions.user_id)\n| summarize total = sum(total_tokens), runs = count() by user_id\n| order by total desc","size":0,"title":"Token Usage by User","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"purple"}}]}},"name":"user-table"},{"type":1,"content":{"json":"## Token Usage Trends"},"name":"trend-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend total_tokens = toint(customDimensions.total_tokens)\n| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h)\n| order by timestamp asc","size":0,"title":"Hourly Token Consumption","queryType":0,"resourceType":"microsoft.insights/components","visualization":"linechart","chartSettings":{"xAxis":"timestamp","yAxis":"hourly_tokens","showLegend":true}},"name":"trend-chart"},{"type":1,"content":{"json":"## Estimated Cost\n\n> Cost estimates use GPT-4o pricing: **$2.50 / 1M input tokens**, **$10.00 / 1M output tokens**. Adjust for your actual model pricing."},"name":"cost-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n input_tokens = toint(customDimensions.total_input_tokens),\n output_tokens = toint(customDimensions.total_output_tokens)\n| extend estimated_cost_usd = round((input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0), 4)\n| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd\n| order by estimated_cost_usd desc","size":0,"title":"Estimated Cost per Process (USD)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"estimated_cost_usd","formatter":3,"formatOptions":{"palette":"redBright"}}]}},"name":"cost-table"},{"type":1,"content":{"json":"## Individual LLM Call Log"},"name":"calls-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n step_name = tostring(customDimensions.step_name),\n model = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n process_id = tostring(customDimensions.process_id)\n| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens\n| order by timestamp desc\n| take 200","size":0,"title":"Recent LLM Calls (last 200)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total_tokens","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"calls-table"}],"isLocked":false,"fallbackResourceIds":["/subscriptions/1d5876cd-7603-407a-96d2-ae5ca9a9c5f3/resourcegroups/rg-pricmglogp33/providers/microsoft.insights/components/appi-pricmglogp33usmqm"]} diff --git a/infra/dashboards/workbook-gke-content.json b/infra/dashboards/workbook-gke-content.json new file mode 100644 index 00000000..ad05834c --- /dev/null +++ b/infra/dashboards/workbook-gke-content.json @@ -0,0 +1 @@ +{"version":"Notebook/1.0","items":[{"type":1,"content":{"json":"# LLM Token Usage Dashboard\n\nThis workbook provides comprehensive visibility into LLM token consumption across agents, models, workflow steps, and users.\n\n---"},"name":"header"},{"type":9,"content":{"version":"KqlParameterItem/1.0","parameters":[{"id":"time-range-param","version":"KqlParameterItem/1.0","name":"TimeRange","type":4,"isRequired":true,"value":{"durationMs":1500000,"endTime":"2026-05-21T06:08:00.000Z"},"typeSettings":{"selectableValues":[{"durationMs":3600000},{"durationMs":14400000},{"durationMs":86400000},{"durationMs":259200000},{"durationMs":604800000},{"durationMs":2592000000}],"allowCustom":true},"label":"Time Range"}],"style":"pills","queryType":0,"resourceType":"microsoft.insights/components"},"name":"parameters"},{"type":1,"content":{"json":"## Overall Token Usage Summary"},"name":"summary-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| summarize \n total_input = sum(toint(customDimensions.total_input_tokens)),\n total_output = sum(toint(customDimensions.total_output_tokens)),\n total = sum(toint(customDimensions.total_tokens)),\n total_calls = sum(toint(customDimensions.total_calls)),\n processes = dcount(tostring(customDimensions.process_id))","size":4,"title":"Token Usage Totals","queryType":0,"resourceType":"microsoft.insights/components","visualization":"tiles","tileSettings":{"titleContent":{"columnMatch":"Column1","formatter":1},"leftContent":{"columnMatch":"total","formatter":12,"formatOptions":{"palette":"auto"},"numberFormat":{"unit":0,"options":{"style":"decimal","maximumFractionDigits":0}}},"showBorder":true}},"name":"summary-tiles"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_input = toint(customDimensions.total_input_tokens),\n total_output = toint(customDimensions.total_output_tokens),\n total = toint(customDimensions.total_tokens),\n call_count = toint(customDimensions.total_calls)\n| project timestamp, process_id, total_input, total_output, total, call_count\n| order by timestamp desc","size":0,"title":"Token Usage by Process","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"summary-table"},{"type":1,"content":{"json":"## Per-Agent Token Usage"},"name":"agent-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by agent_name\n| order by total desc","size":0,"title":"Token Consumption by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"customWidth":"50","name":"agent-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by agent_name\n| order by total desc","size":0,"title":"Token Distribution by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"agent-chart"},{"type":1,"content":{"json":"## Per-Model Token Usage"},"name":"model-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by model_name\n| order by total desc","size":0,"title":"Token Consumption by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"green"}}]}},"customWidth":"50","name":"model-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by model_name\n| order by total desc","size":0,"title":"Token Distribution by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"model-chart"},{"type":1,"content":{"json":"## Per-Step (Team) Token Usage"},"name":"step-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Token Consumption by Workflow Step","queryType":0,"resourceType":"microsoft.insights/components","visualization":"barchart","chartSettings":{"xAxis":"step_name","yAxis":"total","group":"step_name"}},"customWidth":"50","name":"step-chart"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Step Usage Details","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"orange"}}]}},"customWidth":"50","name":"step-table"},{"type":1,"content":{"json":"## Per-User Token Usage"},"name":"user-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_tokens = toint(customDimensions.total_tokens),\n user_id = tostring(customDimensions.user_id)\n| summarize total = sum(total_tokens), runs = count() by user_id\n| order by total desc","size":0,"title":"Token Usage by User","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"purple"}}]}},"name":"user-table"},{"type":1,"content":{"json":"## Token Usage Trends"},"name":"trend-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend total_tokens = toint(customDimensions.total_tokens)\n| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h)\n| order by timestamp asc","size":0,"title":"Hourly Token Consumption","queryType":0,"resourceType":"microsoft.insights/components","visualization":"linechart","chartSettings":{"xAxis":"timestamp","yAxis":"hourly_tokens","showLegend":true}},"name":"trend-chart"},{"type":1,"content":{"json":"## Estimated Cost\n\n> Cost estimates use GPT-4o pricing: **$2.50 / 1M input tokens**, **$10.00 / 1M output tokens**. Adjust for your actual model pricing."},"name":"cost-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n input_tokens = toint(customDimensions.total_input_tokens),\n output_tokens = toint(customDimensions.total_output_tokens)\n| extend estimated_cost_usd = round((input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0), 4)\n| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd\n| order by estimated_cost_usd desc","size":0,"title":"Estimated Cost per Process (USD)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"estimated_cost_usd","formatter":3,"formatOptions":{"palette":"redBright"}}]}},"name":"cost-table"},{"type":1,"content":{"json":"## Individual LLM Call Log"},"name":"calls-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n step_name = tostring(customDimensions.step_name),\n model = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n process_id = tostring(customDimensions.process_id)\n| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens\n| order by timestamp desc\n| take 200","size":0,"title":"Recent LLM Calls (last 200)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total_tokens","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"calls-table"}],"isLocked":false,"fallbackResourceIds":["/subscriptions/1d5876cd-7603-407a-96d2-ae5ca9a9c5f3/resourcegroups/rg-pricmglogp33/providers/microsoft.insights/components/appi-pricmglogp33usmqm"]} diff --git a/infra/main.bicep b/infra/main.bicep index 904fee03..3306c57a 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -91,7 +91,7 @@ param enableTelemetry bool = true param enablePrivateNetworking bool = false @description('Optional. Enable monitoring applicable resources, aligned with the Well Architected Framework recommendations. This setting enables Application Insights and Log Analytics and configures all the resources applicable resources to send logs. Defaults to false.') -param enableMonitoring bool = false +param enableMonitoring bool = true @description('Optional. Enable scalability for applicable resources, aligned with the Well Architected Framework recommendations. Defaults to false.') param enableScalability bool = false @@ -310,6 +310,16 @@ module applicationInsights 'br/public:avm/res/insights/component:0.6.0' = if (en } } +// ========== LLM Token Usage Workbook ========== // +module tokenUsageWorkbook './modules/tokenUsageWorkbook.bicep' = if (enableMonitoring) { + name: take('module.token-usage-workbook.${solutionSuffix}', 64) + params: { + location: solutionLocation + applicationInsightsResourceId: applicationInsights!.outputs.resourceId + tags: allTags + } +} + // ========== Virtual Network ========== // module virtualNetwork './modules/virtualNetwork.bicep' = if (enablePrivateNetworking) { name: take('module.virtual-network.${solutionSuffix}', 64) diff --git a/infra/main.parameters.json b/infra/main.parameters.json index b4a1a7cc..6df293a4 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -26,11 +26,17 @@ "gptDeploymentCapacity": { "value": "${AZURE_ENV_GPT_MODEL_CAPACITY}" }, - "existingLogAnalyticsWorkspaceId": { - "value": "${AZURE_ENV_EXISTING_LOG_ANALYTICS_WORKSPACE_RID}" + "enableTelemetry": { + "value": true }, - "existingFoundryProjectResourceId": { - "value": "${AZURE_EXISTING_AIPROJECT_RESOURCE_ID}" + "enableMonitoring": { + "value": true + }, + "enablePrivateNetworking": { + "value": false + }, + "enableScalability": { + "value": false }, "vmAdminUsername": { "value": "${AZURE_ENV_VM_ADMIN_USERNAME}" @@ -38,8 +44,17 @@ "vmAdminPassword": { "value": "${AZURE_ENV_VM_ADMIN_PASSWORD}" }, + "existingLogAnalyticsWorkspaceId": { + "value": "${AZURE_ENV_EXISTING_LOG_ANALYTICS_WORKSPACE_RID}" + }, + "existingFoundryProjectResourceId": { + "value": "${AZURE_EXISTING_AIPROJECT_RESOURCE_ID}" + }, "imageTag": { "value": "${AZURE_ENV_IMAGE_TAG}" + }, + "vmSize": { + "value": "${AZURE_ENV_VM_SIZE}" } } } diff --git a/infra/main_custom.bicep b/infra/main_custom.bicep index f93b93ab..9e106e59 100644 --- a/infra/main_custom.bicep +++ b/infra/main_custom.bicep @@ -84,7 +84,7 @@ param enableTelemetry bool = true param enablePrivateNetworking bool = false @description('Optional. Enable monitoring applicable resources, aligned with the Well Architected Framework recommendations. This setting enables Application Insights and Log Analytics and configures all the resources applicable resources to send logs. Defaults to false.') -param enableMonitoring bool = false +param enableMonitoring bool = true @description('Optional. Enable scalability for applicable resources, aligned with the Well Architected Framework recommendations. Defaults to false.') param enableScalability bool = false @@ -288,6 +288,16 @@ module applicationInsights 'br/public:avm/res/insights/component:0.6.0' = if (en } } +// ========== LLM Token Usage Workbook ========== // +module tokenUsageWorkbook './modules/tokenUsageWorkbook.bicep' = if (enableMonitoring) { + name: take('module.token-usage-workbook.${solutionSuffix}', 64) + params: { + location: solutionLocation + applicationInsightsResourceId: applicationInsights!.outputs.resourceId + tags: allTags + } +} + // ========== Virtual Network ========== // module virtualNetwork './modules/virtualNetwork.bicep' = if (enablePrivateNetworking) { name: take('module.virtual-network.${solutionSuffix}', 64) diff --git a/infra/modules/tokenUsageWorkbook.bicep b/infra/modules/tokenUsageWorkbook.bicep new file mode 100644 index 00000000..6531bdda --- /dev/null +++ b/infra/modules/tokenUsageWorkbook.bicep @@ -0,0 +1,458 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +@description('Required. The location for the workbook resource.') +param location string + +@description('Required. The resource ID of the Application Insights instance to query.') +param applicationInsightsResourceId string + +@description('Optional. Tags to apply to the workbook resource.') +param tags object = {} + +@description('Optional. Display name for the workbook.') +param workbookDisplayName string = 'LLM Token Usage Dashboard' + +// Generate a deterministic GUID for the workbook based on resource group and name +var workbookId = guid(resourceGroup().id, 'token-usage-workbook') + +var workbookContent = { + version: 'Notebook/1.0' + items: [ + { + type: 1 + content: { + json: '# LLM Token Usage Dashboard\n\nThis workbook provides comprehensive visibility into LLM token consumption across agents, models, workflow steps, and users.\n\n---' + } + name: 'header' + } + { + type: 9 + content: { + version: 'KqlParameterItem/1.0' + parameters: [ + { + id: 'time-range-param' + version: 'KqlParameterItem/1.0' + name: 'TimeRange' + type: 4 + isRequired: true + value: { + durationMs: 86400000 + } + typeSettings: { + selectableValues: [ + { durationMs: 3600000 } + { durationMs: 14400000 } + { durationMs: 86400000 } + { durationMs: 259200000 } + { durationMs: 604800000 } + { durationMs: 2592000000 } + ] + allowCustom: true + } + label: 'Time Range' + } + ] + style: 'pills' + queryType: 0 + resourceType: 'microsoft.insights/components' + } + name: 'parameters' + } + // ===== Row 1: Summary Tiles ===== + { + type: 1 + content: { + json: '## Overall Token Usage Summary' + } + name: 'summary-header' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Token_Usage_Summary"\n| where timestamp {TimeRange}\n| summarize \n total_input = sum(toint(customDimensions.total_input_tokens)),\n total_output = sum(toint(customDimensions.total_output_tokens)),\n total = sum(toint(customDimensions.total_tokens)),\n total_calls = sum(toint(customDimensions.total_calls)),\n processes = dcount(tostring(customDimensions.process_id))' + size: 4 + title: 'Token Usage Totals' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'tiles' + tileSettings: { + titleContent: { + columnMatch: 'Column1' + formatter: 1 + } + leftContent: { + columnMatch: 'total' + formatter: 12 + formatOptions: { + palette: 'auto' + } + numberFormat: { + unit: 0 + options: { + style: 'decimal' + maximumFractionDigits: 0 + } + } + } + showBorder: true + } + } + name: 'summary-tiles' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Token_Usage_Summary"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_input = toint(customDimensions.total_input_tokens),\n total_output = toint(customDimensions.total_output_tokens),\n total = toint(customDimensions.total_tokens),\n call_count = toint(customDimensions.total_calls)\n| project timestamp, process_id, total_input, total_output, total, call_count\n| order by timestamp desc' + size: 0 + title: 'Token Usage by Process' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'table' + gridSettings: { + formatters: [ + { + columnMatch: 'total' + formatter: 3 + formatOptions: { + palette: 'blue' + } + } + ] + } + } + name: 'summary-table' + } + // ===== Row 2: Per-Agent Token Usage ===== + { + type: 1 + content: { + json: '## Per-Agent Token Usage' + } + name: 'agent-header' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Agent_Token_Usage"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by agent_name\n| order by total desc' + size: 0 + title: 'Token Consumption by Agent' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'table' + gridSettings: { + formatters: [ + { + columnMatch: 'total' + formatter: 3 + formatOptions: { + palette: 'blue' + } + } + ] + } + } + customWidth: '50' + name: 'agent-table' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Agent_Token_Usage"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by agent_name\n| order by total desc' + size: 0 + title: 'Token Distribution by Agent' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'piechart' + } + customWidth: '50' + name: 'agent-chart' + } + // ===== Row 3: Per-Model Token Usage ===== + { + type: 1 + content: { + json: '## Per-Model Token Usage' + } + name: 'model-header' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Model_Token_Usage"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by model_name\n| order by total desc' + size: 0 + title: 'Token Consumption by Model' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'table' + gridSettings: { + formatters: [ + { + columnMatch: 'total' + formatter: 3 + formatOptions: { + palette: 'green' + } + } + ] + } + } + customWidth: '50' + name: 'model-table' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Model_Token_Usage"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by model_name\n| order by total desc' + size: 0 + title: 'Token Distribution by Model' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'piechart' + } + customWidth: '50' + name: 'model-chart' + } + // ===== Row 4: Per-Step (Team) Token Usage ===== + { + type: 1 + content: { + json: '## Per-Step (Team) Token Usage' + } + name: 'step-header' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Step_Token_Usage"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc' + size: 0 + title: 'Token Consumption by Workflow Step' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'barchart' + chartSettings: { + xAxis: 'step_name' + yAxis: 'total' + group: 'step_name' + } + } + customWidth: '50' + name: 'step-chart' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Step_Token_Usage"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc' + size: 0 + title: 'Step Usage Details' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'table' + gridSettings: { + formatters: [ + { + columnMatch: 'total' + formatter: 3 + formatOptions: { + palette: 'orange' + } + } + ] + } + } + customWidth: '50' + name: 'step-table' + } + // ===== Row 5: Per-User Token Usage ===== + { + type: 1 + content: { + json: '## Per-User Token Usage' + } + name: 'user-header' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Token_Usage_Summary"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_tokens = toint(customDimensions.total_tokens),\n user_id = tostring(customDimensions.user_id)\n| summarize total = sum(total_tokens), runs = count() by user_id\n| order by total desc' + size: 0 + title: 'Token Usage by User' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'table' + gridSettings: { + formatters: [ + { + columnMatch: 'total' + formatter: 3 + formatOptions: { + palette: 'purple' + } + } + ] + } + } + name: 'user-table' + } + // ===== Row 6: Hourly Token Usage Trend ===== + { + type: 1 + content: { + json: '## Token Usage Trends' + } + name: 'trend-header' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Token_Usage"\n| where timestamp {TimeRange}\n| extend total_tokens = toint(customDimensions.total_tokens)\n| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h)\n| order by timestamp asc' + size: 0 + title: 'Hourly Token Consumption' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'linechart' + chartSettings: { + xAxis: 'timestamp' + yAxis: 'hourly_tokens' + showLegend: true + } + } + name: 'trend-chart' + } + // ===== Row 7: Estimated Cost ===== + { + type: 1 + content: { + json: '## Estimated Cost\n\n> Cost estimates use GPT-4o pricing: **$2.50 / 1M input tokens**, **$10.00 / 1M output tokens**. Adjust for your actual model pricing.' + } + name: 'cost-header' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Token_Usage_Summary"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n input_tokens = toint(customDimensions.total_input_tokens),\n output_tokens = toint(customDimensions.total_output_tokens)\n| extend estimated_cost_usd = round((input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0), 4)\n| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd\n| order by estimated_cost_usd desc' + size: 0 + title: 'Estimated Cost per Process (USD)' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'table' + gridSettings: { + formatters: [ + { + columnMatch: 'estimated_cost_usd' + formatter: 3 + formatOptions: { + palette: 'redBright' + } + } + ] + } + } + name: 'cost-table' + } + // ===== Row 8: Individual LLM Call Log ===== + { + type: 1 + content: { + json: '## Individual LLM Call Log' + } + name: 'calls-header' + } + { + type: 3 + content: { + version: 'KqlItem/1.0' + query: 'customEvents\n| where name == "LLM_Token_Usage"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n step_name = tostring(customDimensions.step_name),\n model = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n process_id = tostring(customDimensions.process_id)\n| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens\n| order by timestamp desc\n| take 200' + size: 0 + title: 'Recent LLM Calls (last 200)' + queryType: 0 + resourceType: 'microsoft.insights/components' + crossComponentResources: [ + applicationInsightsResourceId + ] + visualization: 'table' + gridSettings: { + formatters: [ + { + columnMatch: 'total_tokens' + formatter: 3 + formatOptions: { + palette: 'blue' + } + } + ] + } + } + name: 'calls-table' + } + ] + isLocked: false + fallbackResourceIds: [ + applicationInsightsResourceId + ] +} + +resource tokenUsageWorkbook 'Microsoft.Insights/workbooks@2023-06-01' = { + name: workbookId + location: location + tags: tags + kind: 'shared' + properties: { + displayName: workbookDisplayName + category: 'workbook' + version: '1.0' + serializedData: string(workbookContent) + sourceId: applicationInsightsResourceId + } +} + +@description('The resource ID of the created workbook.') +output resourceId string = tokenUsageWorkbook.id + +@description('The name of the created workbook.') +output name string = tokenUsageWorkbook.name diff --git a/src/frontend/Dockerfile b/src/frontend/Dockerfile index 0ad16303..c7c40439 100644 --- a/src/frontend/Dockerfile +++ b/src/frontend/Dockerfile @@ -13,6 +13,7 @@ RUN npm install COPY . . # Build the app +ENV NODE_OPTIONS="--max-old-space-size=4096" RUN npm run build # Runtime stage diff --git a/src/processor/Dockerfile b/src/processor/Dockerfile index afe293a5..317c8e03 100644 --- a/src/processor/Dockerfile +++ b/src/processor/Dockerfile @@ -36,7 +36,8 @@ RUN curl -fsSLo /tmp/node.tar.gz "https://nodejs.org/dist/v${NODE_VERSION}/node- COPY pyproject.toml uv.lock ./ # Install dependencies using UV -RUN uv sync --frozen --python 3.12 +# Re-lock to pick up any pyproject.toml changes (e.g. new deps), then install. +RUN uv lock --python 3.12 && uv sync --frozen --python 3.12 # Copy the entire source code COPY src/ ./src/ diff --git a/src/processor/pyproject.toml b/src/processor/pyproject.toml index 846621b5..80a0768a 100644 --- a/src/processor/pyproject.toml +++ b/src/processor/pyproject.toml @@ -13,6 +13,8 @@ dependencies = [ "azure-ai-projects==2.0.0b3", "azure-appconfiguration==1.7.2", "azure-core==1.38.0", + "azure-monitor-events-extension==0.1.0", + "azure-monitor-opentelemetry==1.8.7", "azure-cosmos==4.15.0", "azure-identity==1.26.0b1", "azure-storage-blob==12.28.0", diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 5851b809..bc7d4e4b 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -23,6 +23,140 @@ logger = logging.getLogger(__name__) +def _extract_tokens_from_dict_or_obj(ud: Any) -> tuple[int, int, int]: + """Extract (input, output, total) token counts from a dict or object.""" + inp = out = tot = 0 + if isinstance(ud, dict): + inp = ud.get("input_token_count", 0) or ud.get("input_tokens", 0) or 0 + out = ud.get("output_token_count", 0) or ud.get("output_tokens", 0) or 0 + tot = ud.get("total_token_count", 0) or ud.get("total_tokens", 0) or 0 + else: + inp = getattr(ud, "input_token_count", 0) or getattr(ud, "input_tokens", 0) or 0 + out = getattr(ud, "output_token_count", 0) or getattr(ud, "output_tokens", 0) or 0 + tot = getattr(ud, "total_token_count", 0) or getattr(ud, "total_tokens", 0) or 0 + if not tot: + tot = int(inp) + int(out) + return int(inp), int(out), int(tot) + + +def _try_emit_token_event(inp: int, out: int, tot: int, source: str) -> None: + """Log token usage found in stream/response for diagnostics. + + The actual LLM_Token_Usage event is emitted by TokenUsageTracker.record() + in the orchestrator, which has full context (agent, step, model, user). + This function only logs for debugging to avoid duplicate events. + """ + if tot > 0 or inp > 0 or out > 0: + logger.info( + "[TOKEN_STREAM] usage found: input=%s output=%s total=%s source=%s", + inp, out, tot, source, + ) + + +def _emit_usage_from_stream_item(item: Any) -> None: + """Check a streamed ChatResponseUpdate for usage Content and emit an App Insights event. + + Checks multiple locations where usage data may appear: + 1. item.contents[] with type="usage" and usage_details + 2. item.usage (direct attribute - some SDK versions) + 3. item.metadata with usage keys + """ + try: + item_type = type(item).__name__ + + # --- Path 1: contents list with Content(type="usage") --- + contents = getattr(item, "contents", None) + if contents: + for content in contents: + ctype = getattr(content, "type", None) + if ctype == "usage": + # SDK UsageContent uses "details"; fall back to "usage_details" + ud = getattr(content, "details", None) or getattr(content, "usage_details", None) + if ud: + inp, out, tot = _extract_tokens_from_dict_or_obj(ud) + _try_emit_token_event(inp, out, tot, "stream_contents") + return + + # --- Path 2: direct .usage attribute --- + usage = getattr(item, "usage", None) + if usage is not None: + inp, out, tot = _extract_tokens_from_dict_or_obj(usage) + _try_emit_token_event(inp, out, tot, "stream_usage_attr") + return + + # --- Path 3: .metadata dict with usage keys --- + metadata = getattr(item, "metadata", None) + if isinstance(metadata, dict): + if any(k in metadata for k in ("input_tokens", "input_token_count", "usage")): + usage_data = metadata.get("usage", metadata) + inp, out, tot = _extract_tokens_from_dict_or_obj(usage_data) + _try_emit_token_event(inp, out, tot, "stream_metadata") + return + + # --- Diagnostic: log item shape for debugging (only for non-text items) --- + if contents: + content_types = [getattr(c, "type", "?") for c in contents] + if any(t not in ("text",) for t in content_types): + logger.debug( + "[TOKEN_DIAG] item_type=%s content_types=%s attrs=%s", + item_type, + content_types, + [a for a in dir(item) if not a.startswith("_")], + ) + except Exception as e: + logger.debug("[TOKEN_STREAM] error in emit: %s", e) + + +def _emit_usage_from_response(response: Any) -> None: + """Extract and emit token usage from a non-streaming ChatResponse. + + Checks usage_details (SDK attribute) and contents for UsageContent items. + """ + try: + # Path 1: response.usage_details (ChatResponse from SDK) + ud = getattr(response, "usage_details", None) or getattr(response, "details", None) + if ud is not None: + inp, out, tot = _extract_tokens_from_dict_or_obj(ud) + _try_emit_token_event(inp, out, tot, "response_usage_details") + return + + # Path 2: response.usage direct attribute + usage = getattr(response, "usage", None) + if usage is not None: + inp, out, tot = _extract_tokens_from_dict_or_obj(usage) + _try_emit_token_event(inp, out, tot, "response_usage_attr") + return + + # Path 3: contents list with UsageContent + contents = getattr(response, "contents", None) + if contents: + for content in contents: + ctype = getattr(content, "type", None) + if ctype == "usage": + ud = getattr(content, "details", None) or getattr(content, "usage_details", None) + if ud: + inp, out, tot = _extract_tokens_from_dict_or_obj(ud) + _try_emit_token_event(inp, out, tot, "response_contents") + return + + # Path 4: messages list with usage content + messages = getattr(response, "messages", None) + if messages: + for msg in messages: + msg_contents = getattr(msg, "contents", None) + if not msg_contents: + continue + for item in msg_contents: + if getattr(item, "type", None) == "usage": + ud = getattr(item, "details", None) or getattr(item, "usage_details", None) + if ud: + inp, out, tot = _extract_tokens_from_dict_or_obj(ud) + _try_emit_token_event(inp, out, tot, "response_msg_contents") + return + except Exception as e: + logger.debug("[TOKEN_RESPONSE] error in emit: %s", e) + + def _format_exc_brief(exc: BaseException) -> str: name = type(exc).__name__ msg = str(exc) @@ -78,6 +212,20 @@ def _looks_like_rate_limit(error: BaseException) -> bool: if isinstance(status, int) and 500 <= status < 600: return True + # "The model produced invalid content" is a transient error from Azure OpenAI + # when the model output fails content/schema validation — worth retrying. + # "No tool call found" is a 400 error when the conversation has orphaned + # function call outputs with no matching tool call request. + if any( + s in msg + for s in [ + "model produced invalid content", + "invalid content", + "no tool call found", + ] + ): + return True + cause = getattr(error, "__cause__", None) if cause and cause is not error: return _looks_like_rate_limit(cause) @@ -548,12 +696,15 @@ async def _inner_get_response( ) try: - return await _retry_call( + response = await _retry_call( lambda: parent_inner_get_response( messages=effective_messages, chat_options=chat_options, **kwargs ), config=self._retry_config, ) + # Extract and emit token usage from non-streaming response + _emit_usage_from_response(response) + return response except Exception as e: if not ( self._context_trim_config.enabled @@ -643,8 +794,36 @@ async def _tail(): async for item in iterator: yield item + _item_count = 0 + _last_item = None async for item in _tail(): + _item_count += 1 + _last_item = item + _emit_usage_from_stream_item(item) yield item + + # After stream completes, log diagnostic about the last item + if _last_item is not None: + try: + _attrs = [a for a in dir(_last_item) if not a.startswith("_")] + _contents = getattr(_last_item, "contents", None) + _content_info = [] + if _contents: + for _c in _contents: + _ct = getattr(_c, "type", "?") + _ca = [a for a in dir(_c) if not a.startswith("_")] + _content_info.append({"type": _ct, "attrs": _ca}) + _usage_attr = getattr(_last_item, "usage", None) + logger.info( + "[TOKEN_DIAG_FINAL] stream_items=%d last_item_type=%s attrs=%s contents=%s usage_attr=%s", + _item_count, + type(_last_item).__name__, + _attrs, + _content_info, + repr(_usage_attr) if _usage_attr is not None else "None", + ) + except Exception: + pass return except StopAsyncIteration: return diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 5cb63938..50711fd3 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -36,6 +36,8 @@ from mem0 import AsyncMemory from pydantic import BaseModel, ValidationError +from utils.token_usage_tracker import TokenUsageTracker, extract_usage_from_response, _parse_usage_object + logger = logging.getLogger(__name__) @@ -93,6 +95,7 @@ class OrchestrationResult(Generic[TOutput]): result: TOutput | None = None error: str | None = None execution_time_seconds: float = 0.0 + token_usage_summary: dict[str, Any] | None = None @staticmethod def _to_jsonable(value: Any) -> Any: @@ -156,6 +159,7 @@ def model_dump(self) -> dict[str, Any]: "result": self._to_jsonable(self.result), "error": self.error, "execution_time_seconds": self.execution_time_seconds, + "token_usage_summary": self.token_usage_summary, } def to_json(self, *, indent: int = 2) -> str: @@ -195,6 +199,7 @@ def __init__( max_rounds: int = 100, max_seconds: float | None = None, result_output_format: type[TOutput] | None = None, + token_usage_tracker: TokenUsageTracker | None = None, ): """ Initialize the orchestrator. @@ -224,11 +229,15 @@ def __init__( self.max_seconds = max_seconds self.result_format = result_output_format + # Token usage tracker (optional — provided by OrchestratorBase) + self.token_usage_tracker = token_usage_tracker + # Runtime state self.agents: dict[str, ChatAgent] = participants self.agent_tool_usage: dict[str, list[dict[str, Any]]] = {} self.agent_responses: list[AgentResponse] = [] self._initialized: bool = False + self._streaming_captured_usage: bool = False # Streaming response buffer self._last_executor_id: str | None = None @@ -546,6 +555,11 @@ async def run_stream( # items inside ChatMessage.contents. self._backfill_tool_usage_from_conversation(conversation) + # Backfill token usage from conversation messages. + # Streaming events may not surface usage Content items, but the final + # conversation messages reliably carry them. + self._backfill_token_usage_from_conversation(conversation) + # Post-workflow analysis (optional) final_analysis = None result_format = self.result_format @@ -606,6 +620,11 @@ async def run_stream( execution_time = (datetime.now() - start_time).total_seconds() # Build result + # Collect token usage summary if tracker is active + token_summary = None + if self.token_usage_tracker is not None: + token_summary = self.token_usage_tracker.get_summary() + result = OrchestrationResult[TOutput]( success=True, conversation=conversation, @@ -614,6 +633,7 @@ async def run_stream( result=final_analysis, error=None, execution_time_seconds=execution_time, + token_usage_summary=token_summary, ) # Callback for completion with Typed Result @@ -625,6 +645,10 @@ async def run_stream( except Exception as e: execution_time = (datetime.now() - start_time).total_seconds() + token_summary = None + if self.token_usage_tracker is not None: + token_summary = self.token_usage_tracker.get_summary() + error_result = OrchestrationResult[TOutput]( success=False, conversation=[], @@ -633,6 +657,7 @@ async def run_stream( result=None, error=str(e), execution_time_seconds=execution_time, + token_usage_summary=token_summary, ) if on_workflow_complete: @@ -660,6 +685,78 @@ async def _handle_agent_update( self._append_text_chunk(event) await self._process_tool_calls(event, agent_name, stream_callback) + # Extract token usage from the streaming update if tracker is active. + # Check multiple paths where usage data may appear. + if self.token_usage_tracker is not None: + try: + data = event.data + # Path 1: data.contents with Content(type="usage") + contents = getattr(data, "contents", None) + if contents: + for item in contents: + ctype = getattr(item, "type", None) + if ctype == "usage": + # SDK UsageContent uses "details"; fall back to "usage_details" + ud = getattr(item, "details", None) or getattr(item, "usage_details", None) + if ud: + record = _parse_usage_object(ud) + if record and record.total_tokens > 0: + self.token_usage_tracker.record( + input_tokens=record.input_tokens, + output_tokens=record.output_tokens, + total_tokens=record.total_tokens, + agent_name=agent_name, + step_name=self.name, + ) + self._streaming_captured_usage = True + logger.info( + "[TOKEN_ORCH] recorded from contents: agent=%s step=%s tokens=%s", + agent_name, self.name, record.total_tokens, + ) + return + # Path 2: data.usage direct attribute + usage = getattr(data, "usage", None) + if usage is not None: + record = _parse_usage_object(usage) + if record and record.total_tokens > 0: + self.token_usage_tracker.record( + input_tokens=record.input_tokens, + output_tokens=record.output_tokens, + total_tokens=record.total_tokens, + agent_name=agent_name, + step_name=self.name, + ) + self._streaming_captured_usage = True + logger.info( + "[TOKEN_ORCH] recorded from usage attr: agent=%s step=%s tokens=%s", + agent_name, self.name, record.total_tokens, + ) + return + # Path 3: event itself may carry usage + event_usage = getattr(event, "usage", None) + if event_usage is not None: + record = _parse_usage_object(event_usage) + if record and record.total_tokens > 0: + self.token_usage_tracker.record( + input_tokens=record.input_tokens, + output_tokens=record.output_tokens, + total_tokens=record.total_tokens, + agent_name=agent_name, + step_name=self.name, + ) + self._streaming_captured_usage = True + logger.info( + "[TOKEN_ORCH] recorded from event.usage: agent=%s step=%s tokens=%s", + agent_name, self.name, record.total_tokens, + ) + return + except Exception: + logger.debug( + "Failed to extract token usage from update (agent=%s)", + agent_name, + exc_info=True, + ) + def _normalize_executor_id(self, executor_id: str) -> str: """Normalize executor id to agent name. @@ -930,6 +1027,73 @@ def _backfill_tool_usage_from_conversation( # Best effort only; don't break orchestration continue + def _backfill_token_usage_from_conversation( + self, conversation: list[ChatMessage] + ) -> None: + """Extract token usage from the final conversation messages. + + The agent_framework attaches ``Content(type="usage")`` items to + assistant messages when the underlying LLM response completes. + Streaming updates may not surface these, so we scan the final + conversation as a **fallback only when streaming did not capture + any usage** to avoid double-counting. + """ + if self.token_usage_tracker is None: + return + + # Skip backfill if streaming already captured token usage for this orchestrator run. + if getattr(self, "_streaming_captured_usage", False): + logger.info( + "[TOKEN] Skipping backfill — streaming already captured usage (step=%s)", + self.name, + ) + return + + found_any = False + for msg in conversation: + try: + role = getattr(msg, "role", None) + author = getattr(msg, "author_name", None) or "unknown" + contents = getattr(msg, "contents", None) + if not contents: + continue + + for item in contents: + item_type = getattr(item, "type", None) + if item_type != "usage": + continue + + # SDK UsageContent uses "details"; fall back to "usage_details" + ud = getattr(item, "details", None) or getattr(item, "usage_details", None) + if ud is None: + continue + + record = _parse_usage_object(ud) + if record and record.total_tokens > 0: + agent_name = self._normalize_executor_id(author) + self.token_usage_tracker.record( + input_tokens=record.input_tokens, + output_tokens=record.output_tokens, + total_tokens=record.total_tokens, + agent_name=agent_name, + step_name=self.name, + ) + found_any = True + except Exception: + continue + + if found_any: + logger.info( + "[TOKEN] Backfilled token usage from conversation (step=%s)", + self.name, + ) + else: + logger.warning( + "[TOKEN] No usage Content found in conversation messages (step=%s, msgs=%d)", + self.name, + len(conversation), + ) + async def _complete_agent_response( self, agent_id: str, diff --git a/src/processor/src/libs/base/orchestrator_base.py b/src/processor/src/libs/base/orchestrator_base.py index 46dce8c6..cfeb8a04 100644 --- a/src/processor/src/libs/base/orchestrator_base.py +++ b/src/processor/src/libs/base/orchestrator_base.py @@ -26,6 +26,7 @@ ) from utils.agent_telemetry import TelemetryManager from utils.console_util import format_agent_message +from utils.token_usage_tracker import TokenUsageTracker from .agent_base import AgentBase @@ -42,6 +43,7 @@ def __init__(self, app_context=None): self.initialized = False self.memory_store: QdrantMemoryStore | None = None self.step_name: str = "" + self.token_tracker: TokenUsageTracker | None = None def is_console_summarization_enabled(self) -> bool: """Return True if console summarization (extra LLM call per turn) is enabled. @@ -83,6 +85,23 @@ async def initialize(self, process_id: str): self.agents = await self.create_agents(self.agentinfos, process_id=process_id) self.initialized = True + # Resolve workflow-level token usage tracker from AppContext (if registered) + if self.app_context.is_registered(TokenUsageTracker): + try: + self.token_tracker = self.app_context.get_service(TokenUsageTracker) + # Register model deployment name for all agents so per-model tracking works + try: + deployment_name = self.agent_framework_helper.settings.get_service_config( + "default" + ).chat_deployment_name + if deployment_name and self.token_tracker: + for agent_name in (self.agents or {}): + self.token_tracker.set_agent_model(agent_name, deployment_name) + except Exception: + logger.debug("Could not register agent-model mapping", exc_info=True) + except Exception: + self.token_tracker = None + async def flush_agent_memories(self) -> None: """Flush buffered memories from all agent context providers. @@ -188,10 +207,12 @@ async def create_agents( ) elif agent_info.agent_name == "ResultGenerator": # Structured JSON generation; deterministic and bounded. + # Use 25_000 to prevent truncation of complex nested JSON schemas + # which causes "model produced invalid content" errors. builder = ( builder .with_temperature(0.0) - .with_max_tokens(12_000) + .with_max_tokens(25_000) .with_tool_choice("none") ) diff --git a/src/processor/src/main.py b/src/processor/src/main.py index 79531ff4..0c7252ab 100644 --- a/src/processor/src/main.py +++ b/src/processor/src/main.py @@ -45,8 +45,37 @@ def initialize(self): self.application_context.configuration, ) + self._configure_azure_monitor() self.register_services() + def _configure_azure_monitor(self): + """Initialise Azure Monitor OpenTelemetry exporter, if configured.""" + connection_string = os.environ.get( + "APPLICATIONINSIGHTS_CONNECTION_STRING", "" + ).strip() + if not connection_string: + logger.info( + "APPLICATIONINSIGHTS_CONNECTION_STRING not set; " + "skipping Azure Monitor OpenTelemetry configuration." + ) + return + + try: + from azure.monitor.opentelemetry import configure_azure_monitor + + configure_azure_monitor( + connection_string=connection_string, + enable_live_metrics=True, + ) + logger.info( + "Azure Monitor OpenTelemetry configured (live metrics enabled)." + ) + except Exception: + logger.exception( + "Failed to configure Azure Monitor OpenTelemetry; " + "continuing without App Insights export." + ) + def register_services(self): self.application_context.add_singleton( AgentFrameworkHelper, AgentFrameworkHelper() diff --git a/src/processor/src/main_service.py b/src/processor/src/main_service.py index 3b346161..88191403 100644 --- a/src/processor/src/main_service.py +++ b/src/processor/src/main_service.py @@ -105,8 +105,41 @@ def initialize(self): "Application initialized with configuration: %s", self.application_context.configuration, ) + self._configure_azure_monitor() self.register_services() + def _configure_azure_monitor(self): + """Initialise Azure Monitor OpenTelemetry exporter, if configured. + + Required so that ``track_event`` from ``azure-monitor-events-extension`` + has an export pipeline for custom events (e.g. token usage tracking). + """ + connection_string = os.environ.get( + "APPLICATIONINSIGHTS_CONNECTION_STRING", "" + ).strip() + if not connection_string: + logger.info( + "APPLICATIONINSIGHTS_CONNECTION_STRING not set; " + "skipping Azure Monitor OpenTelemetry configuration." + ) + return + + try: + from azure.monitor.opentelemetry import configure_azure_monitor + + configure_azure_monitor( + connection_string=connection_string, + enable_live_metrics=True, + ) + logger.info( + "Azure Monitor OpenTelemetry configured (live metrics enabled)." + ) + except Exception: + logger.exception( + "Failed to configure Azure Monitor OpenTelemetry; " + "continuing without App Insights export." + ) + def register_services(self): """Register application services into the dependency injection container. diff --git a/src/processor/src/services/queue_service.py b/src/processor/src/services/queue_service.py index a6b1be1e..0c65a30f 100644 --- a/src/processor/src/services/queue_service.py +++ b/src/processor/src/services/queue_service.py @@ -1272,6 +1272,7 @@ def _build_task_param(self, queue_message: QueueMessage) -> Analysis_TaskParam: source_file_folder=req["source_file_folder"], workspace_file_folder=req["workspace_file_folder"], output_file_folder=req["output_file_folder"], + user_id=parsed.user_id or req.get("user_id", ""), ) async def _ensure_queues_exist(self): diff --git a/src/processor/src/steps/analysis/models/step_param.py b/src/processor/src/steps/analysis/models/step_param.py index ca358049..3ed75f9a 100644 --- a/src/processor/src/steps/analysis/models/step_param.py +++ b/src/processor/src/steps/analysis/models/step_param.py @@ -16,3 +16,4 @@ class Analysis_TaskParam(BaseModel): source_file_folder: str = Field(description="Path to the source files folder") output_file_folder: str = Field(description="Path to the output files folder") workspace_file_folder: str = Field(description="Path to the workspace files folder") + user_id: str = Field(default="", description="User identifier for token usage tracking") diff --git a/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py b/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py index 93f8f2f0..5a182b04 100644 --- a/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py +++ b/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py @@ -84,6 +84,7 @@ async def execute( participants=self.agents, memory_client=None, result_output_format=Analysis_BooleanExtendedResult, + token_usage_tracker=self.token_tracker, ) orchestration_result = await orchestrator.run_stream( diff --git a/src/processor/src/steps/convert/orchestration/yaml_convert_orchestrator.py b/src/processor/src/steps/convert/orchestration/yaml_convert_orchestrator.py index f1fe8b4d..f2ee60ca 100644 --- a/src/processor/src/steps/convert/orchestration/yaml_convert_orchestrator.py +++ b/src/processor/src/steps/convert/orchestration/yaml_convert_orchestrator.py @@ -93,6 +93,7 @@ async def execute( memory_client=None, max_seconds=900, result_output_format=Yaml_ExtendedBooleanResult, + token_usage_tracker=self.token_tracker, ) orchestration_result = await orchestrator.run_stream( diff --git a/src/processor/src/steps/design/orchestration/design_orchestrator.py b/src/processor/src/steps/design/orchestration/design_orchestrator.py index d2dd47f0..68fea0c5 100644 --- a/src/processor/src/steps/design/orchestration/design_orchestrator.py +++ b/src/processor/src/steps/design/orchestration/design_orchestrator.py @@ -84,6 +84,7 @@ async def execute( participants=self.agents, memory_client=None, result_output_format=Design_ExtendedBooleanResult, + token_usage_tracker=self.token_tracker, ) orchestration_result = await orchestrator.run_stream( diff --git a/src/processor/src/steps/documentation/orchestration/documentation_orchestrator.py b/src/processor/src/steps/documentation/orchestration/documentation_orchestrator.py index 0aa6c443..cb995f91 100644 --- a/src/processor/src/steps/documentation/orchestration/documentation_orchestrator.py +++ b/src/processor/src/steps/documentation/orchestration/documentation_orchestrator.py @@ -98,6 +98,7 @@ async def execute( participants=self.agents, memory_client=None, result_output_format=Documentation_ExtendedBooleanResult, + token_usage_tracker=self.token_tracker, ) orchestration_result = await orchestrator.run_stream( diff --git a/src/processor/src/steps/migration_processor.py b/src/processor/src/steps/migration_processor.py index 0ded130f..549038e3 100644 --- a/src/processor/src/steps/migration_processor.py +++ b/src/processor/src/steps/migration_processor.py @@ -56,6 +56,7 @@ from libs.reporting.models.failure_context import FailureType from utils.agent_telemetry import TelemetryManager from utils.credential_util import get_bearer_token_provider +from utils.token_usage_tracker import TokenUsageTracker from .analysis.models.step_param import Analysis_TaskParam from .analysis.workflow.analysis_executor import AnalysisExecutor @@ -297,6 +298,14 @@ async def run(self, input_data: Analysis_TaskParam) -> Any: self.app_context._instances.pop(QdrantMemoryStore, None) self.app_context.add_singleton(QdrantMemoryStore, memory_store) + # Create workflow-level token usage tracker and register in app context + token_tracker = TokenUsageTracker( + process_id=input_data.process_id, + user_id=getattr(input_data, "user_id", "") or "", + ) + self.app_context._instances.pop(TokenUsageTracker, None) + self.app_context.add_singleton(TokenUsageTracker, token_tracker) + try: telemetry: TelemetryManager = await self.app_context.get_service_async( TelemetryManager @@ -714,6 +723,19 @@ async def _generate_report_summary( # print(f"{event.__class__.__name__} ({event.origin.value}): {event}") pass finally: + # Emit token usage summary events to Application Insights and persist to Cosmos + try: + token_tracker.emit_summary_events() + telemetry_mgr: TelemetryManager = ( + await self.app_context.get_service_async(TelemetryManager) + ) + await telemetry_mgr.persist_token_usage( + process_id=input_data.process_id, + token_summary=token_tracker.get_summary(), + ) + except Exception as e: + logger.warning("Failed to emit/persist token usage: %s", e) + # Clean up shared memory store if memory_store is not None: try: diff --git a/src/processor/src/utils/agent_telemetry.py b/src/processor/src/utils/agent_telemetry.py index 9e574377..15a0d188 100644 --- a/src/processor/src/utils/agent_telemetry.py +++ b/src/processor/src/utils/agent_telemetry.py @@ -296,6 +296,23 @@ class ProcessStatus(RootEntityBase["ProcessStatus", str]): description="Comprehensive UI data including file manifests, dashboard metrics, and downloadable artifacts", ) + # Token Usage Tracking + total_input_tokens: int = 0 + total_output_tokens: int = 0 + total_tokens: int = 0 + token_usage_by_agent: dict[str, dict[str, Any]] = Field( + default_factory=dict, + description="Token usage per agent: {agent_name: {input_tokens, output_tokens, total_tokens, call_count, model_deployment_name}}", + ) + token_usage_by_model: dict[str, dict[str, Any]] = Field( + default_factory=dict, + description="Token usage per model: {model_deployment_name: {input_tokens, output_tokens, total_tokens, call_count}}", + ) + token_usage_by_step: dict[str, dict[str, Any]] = Field( + default_factory=dict, + description="Token usage per step: {step_name: {input_tokens, output_tokens, total_tokens, call_count}}", + ) + class AgentActivityRepository(RepositoryBase[ProcessStatus, str]): def __init__(self, app_context: AppContext): @@ -1625,3 +1642,52 @@ async def get_ui_telemetry_data(self, process_id: str) -> dict[str, Any]: except Exception as e: logger.error(f"[UI-TELEMETRY] Failed to retrieve UI data: {e}") return {} + + async def persist_token_usage( + self, + process_id: str, + token_summary: dict[str, Any], + ) -> None: + """Persist aggregated token usage data to the ProcessStatus in Cosmos DB. + + Parameters + ---------- + process_id: + The process whose telemetry record should be updated. + token_summary: + The output of ``TokenUsageTracker.get_summary()``, containing + ``total``, ``by_agent``, ``by_model``, and ``by_step`` dictionaries. + """ + if not self.repository: + logger.info("[TELEMETRY] Development mode — token usage not persisted") + return + + try: + current_process = await self.repository.get_async(process_id) + if not current_process: + logger.warning( + "[TOKEN] Process %s not found — cannot persist token usage", + process_id, + ) + return + + total = token_summary.get("total", {}) + current_process.total_input_tokens = total.get("input_tokens", 0) + current_process.total_output_tokens = total.get("output_tokens", 0) + current_process.total_tokens = total.get("total_tokens", 0) + current_process.token_usage_by_agent = token_summary.get("by_agent", {}) + current_process.token_usage_by_model = token_summary.get("by_model", {}) + current_process.token_usage_by_step = token_summary.get("by_step", {}) + current_process.last_update_time = _get_utc_timestamp() + + await self.repository.update_async(current_process) + logger.info( + "[TOKEN] Persisted token usage for process %s: total=%d", + process_id, + current_process.total_tokens, + ) + except Exception: + logger.exception( + "[TOKEN] Failed to persist token usage (process_id=%s)", + process_id, + ) diff --git a/src/processor/src/utils/event_utils.py b/src/processor/src/utils/event_utils.py new file mode 100644 index 00000000..abbc1a89 --- /dev/null +++ b/src/processor/src/utils/event_utils.py @@ -0,0 +1,64 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Lightweight helpers for emitting Application Insights custom events from the processor service. + +Mirrors the backend-api ``track_event_if_configured`` pattern so the processor +can emit structured custom events (e.g. token usage) to the same Application +Insights workspace. +""" +from __future__ import annotations + +import logging +import os +from typing import Any, Mapping + +logger = logging.getLogger(__name__) + +APP_INSIGHTS_CONN_STRING_ENV = "APPLICATIONINSIGHTS_CONNECTION_STRING" + +_warned_unconfigured: bool = False + + +def _is_app_insights_configured() -> bool: + value = os.environ.get(APP_INSIGHTS_CONN_STRING_ENV) + return bool(value and value.strip()) + + +def track_event_if_configured( + name: str, properties: Mapping[str, Any] | None = None +) -> None: + """Emit an Application Insights custom event, gated on configuration. + + No-op when ``APPLICATIONINSIGHTS_CONNECTION_STRING`` is unset. + Swallows export failures so telemetry never breaks processing. + """ + global _warned_unconfigured + + if not _is_app_insights_configured(): + if not _warned_unconfigured: + logger.warning( + "APPLICATIONINSIGHTS_CONNECTION_STRING is not set; " + "track_event_if_configured(name=%s) is a no-op.", + name, + ) + _warned_unconfigured = True + return + + safe_properties: dict[str, Any] = dict(properties) if properties else {} + + try: + from azure.monitor.events.extension import track_event # type: ignore[import-not-found] + except ImportError: + logger.warning( + "azure-monitor-events-extension is not installed; " + "skipping track_event(name=%s).", + name, + ) + return + + try: + track_event(name, safe_properties) + except Exception: + logger.exception( + "Failed to publish App Insights custom event name=%s.", name + ) diff --git a/src/processor/src/utils/token_usage_tracker.py b/src/processor/src/utils/token_usage_tracker.py new file mode 100644 index 00000000..ae10a68d --- /dev/null +++ b/src/processor/src/utils/token_usage_tracker.py @@ -0,0 +1,403 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""LLM Token Usage Tracker for comprehensive consumption monitoring. + +Tracks token usage across four dimensions: +- Per agent (e.g. Chief_Architect, EKS_Expert) +- Per team/step (analysis, design, yaml, documentation) +- Per user/process +- Per model deployment + +Usage data is emitted to Application Insights as custom events and can be +persisted to Cosmos DB via the TelemetryManager. +""" +from __future__ import annotations + +import logging +import threading +from dataclasses import dataclass, field +from typing import Any + +from utils.event_utils import track_event_if_configured + +logger = logging.getLogger(__name__) + + +@dataclass +class TokenUsageRecord: + """Token counts for a single LLM interaction.""" + + input_tokens: int = 0 + output_tokens: int = 0 + total_tokens: int = 0 + + +@dataclass +class AggregatedTokenUsage: + """Accumulated token usage with call count.""" + + input_tokens: int = 0 + output_tokens: int = 0 + total_tokens: int = 0 + call_count: int = 0 + + def add(self, record: TokenUsageRecord) -> None: + self.input_tokens += record.input_tokens + self.output_tokens += record.output_tokens + self.total_tokens += record.total_tokens + self.call_count += 1 + + def to_dict(self) -> dict[str, int]: + return { + "input_tokens": self.input_tokens, + "output_tokens": self.output_tokens, + "total_tokens": self.total_tokens, + "call_count": self.call_count, + } + + +class TokenUsageTracker: + """Thread-safe tracker that aggregates LLM token usage across multiple dimensions. + + Accumulates usage per agent, per step (team), per model, and overall per process. + Emits Application Insights custom events for each recorded interaction and + provides summary emission at process completion. + """ + + def __init__(self, process_id: str, user_id: str = ""): + self.process_id = process_id + self.user_id = user_id + self._lock = threading.Lock() + + # Aggregation buckets + self._by_agent: dict[str, AggregatedTokenUsage] = {} + self._by_step: dict[str, AggregatedTokenUsage] = {} + self._by_model: dict[str, AggregatedTokenUsage] = {} + self._total = AggregatedTokenUsage() + + # Agent-to-model mapping for richer telemetry + self._agent_model_map: dict[str, str] = {} + + def set_agent_model(self, agent_name: str, model_deployment_name: str) -> None: + """Register the model deployment used by a specific agent.""" + with self._lock: + self._agent_model_map[agent_name] = model_deployment_name + + def record( + self, + *, + input_tokens: int, + output_tokens: int, + total_tokens: int, + agent_name: str = "", + step_name: str = "", + model_deployment_name: str = "", + ) -> None: + """Record a single LLM call's token usage. + + Accumulates into all relevant dimensions and emits a per-call + Application Insights event. + """ + if total_tokens <= 0 and input_tokens <= 0 and output_tokens <= 0: + return + + if total_tokens <= 0: + total_tokens = input_tokens + output_tokens + + record = TokenUsageRecord( + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=total_tokens, + ) + + # Resolve model from agent map if not provided + if not model_deployment_name and agent_name: + model_deployment_name = self._agent_model_map.get(agent_name, "") + + with self._lock: + self._total.add(record) + + if agent_name: + if agent_name not in self._by_agent: + self._by_agent[agent_name] = AggregatedTokenUsage() + self._by_agent[agent_name].add(record) + + if step_name: + if step_name not in self._by_step: + self._by_step[step_name] = AggregatedTokenUsage() + self._by_step[step_name].add(record) + + if model_deployment_name: + if model_deployment_name not in self._by_model: + self._by_model[model_deployment_name] = AggregatedTokenUsage() + self._by_model[model_deployment_name].add(record) + + # Emit per-call event to Application Insights + try: + track_event_if_configured( + "LLM_Token_Usage", + { + "process_id": self.process_id, + "user_id": self.user_id, + "agent_name": agent_name, + "step_name": step_name, + "model_deployment_name": model_deployment_name, + "input_tokens": str(input_tokens), + "output_tokens": str(output_tokens), + "total_tokens": str(total_tokens), + }, + ) + except Exception: + logger.debug("Failed to emit per-call token usage event", exc_info=True) + + logger.info( + "[TOKEN] Recorded: agent=%s step=%s model=%s input=%d output=%d total=%d | cumulative=%d", + agent_name, + step_name, + model_deployment_name, + input_tokens, + output_tokens, + total_tokens, + self._total.total_tokens, + ) + + def get_summary(self) -> dict[str, Any]: + """Return a snapshot of all accumulated token usage.""" + with self._lock: + return { + "process_id": self.process_id, + "user_id": self.user_id, + "total": self._total.to_dict(), + "by_agent": {k: v.to_dict() for k, v in self._by_agent.items()}, + "by_step": {k: v.to_dict() for k, v in self._by_step.items()}, + "by_model": {k: v.to_dict() for k, v in self._by_model.items()}, + } + + def emit_summary_events(self) -> None: + """Emit summary-level Application Insights custom events. + + Call this at the end of a process/workflow to produce aggregated events + that are easy to query in KQL. + """ + summary = self.get_summary() + + try: + # Overall summary + track_event_if_configured( + "LLM_Token_Usage_Summary", + { + "process_id": self.process_id, + "user_id": self.user_id, + "total_input_tokens": str(summary["total"]["input_tokens"]), + "total_output_tokens": str(summary["total"]["output_tokens"]), + "total_tokens": str(summary["total"]["total_tokens"]), + "total_calls": str(summary["total"]["call_count"]), + "agent_count": str(len(summary["by_agent"])), + "model_count": str(len(summary["by_model"])), + "step_count": str(len(summary["by_step"])), + }, + ) + + # Per-agent events + for agent_name, usage in summary["by_agent"].items(): + model = self._agent_model_map.get(agent_name, "") + track_event_if_configured( + "LLM_Agent_Token_Usage", + { + "process_id": self.process_id, + "user_id": self.user_id, + "agent_name": agent_name, + "model_deployment_name": model, + "input_tokens": str(usage["input_tokens"]), + "output_tokens": str(usage["output_tokens"]), + "total_tokens": str(usage["total_tokens"]), + "call_count": str(usage["call_count"]), + }, + ) + + # Per-model events + for model_name, usage in summary["by_model"].items(): + track_event_if_configured( + "LLM_Model_Token_Usage", + { + "process_id": self.process_id, + "user_id": self.user_id, + "model_deployment_name": model_name, + "input_tokens": str(usage["input_tokens"]), + "output_tokens": str(usage["output_tokens"]), + "total_tokens": str(usage["total_tokens"]), + "call_count": str(usage["call_count"]), + }, + ) + + # Per-step (team) events + for step_name, usage in summary["by_step"].items(): + track_event_if_configured( + "LLM_Step_Token_Usage", + { + "process_id": self.process_id, + "user_id": self.user_id, + "step_name": step_name, + "input_tokens": str(usage["input_tokens"]), + "output_tokens": str(usage["output_tokens"]), + "total_tokens": str(usage["total_tokens"]), + "call_count": str(usage["call_count"]), + }, + ) + + logger.info( + "[TOKEN] Emitted summary events: total=%d agents=%d models=%d steps=%d", + summary["total"]["total_tokens"], + len(summary["by_agent"]), + len(summary["by_model"]), + len(summary["by_step"]), + ) + except Exception: + logger.exception("[TOKEN] Failed to emit summary events") + + +def extract_usage_from_response(response: Any) -> TokenUsageRecord | None: + """Extract token usage from an agent_framework or OpenAI SDK response object. + + Handles multiple response shapes: + 1. response.usage (OpenAI SDK ChatCompletion) + 2. response.usage_details (agent_framework Content objects) + 3. response dict with usage keys + 4. AgentResponseUpdate with contents containing usage + """ + if response is None: + return None + + # 1. Direct .usage attribute (OpenAI ChatCompletion, Responses API) + usage = getattr(response, "usage", None) + if usage is not None: + record = _parse_usage_object(usage) + if record: + return record + + # 2. .usage_details or .details attribute + usage_details = getattr(response, "details", None) or getattr(response, "usage_details", None) + if usage_details is not None: + record = _parse_usage_object(usage_details) + if record: + return record + + # 3. raw_representation with usage + raw = getattr(response, "raw_representation", None) + if raw is not None: + raw_usage = getattr(raw, "usage", None) + if raw_usage is not None: + record = _parse_usage_object(raw_usage) + if record: + return record + if isinstance(raw, dict) and "usage" in raw: + record = _parse_usage_object(raw["usage"]) + if record: + return record + + # 4. contents list with usage items (AgentResponseUpdate) + contents = getattr(response, "contents", None) + if contents: + for item in contents: + item_type = getattr(item, "type", None) + if item_type == "usage": + # SDK UsageContent uses "details"; fall back to "usage_details" + ud = getattr(item, "details", None) or getattr(item, "usage_details", None) + if isinstance(ud, dict): + record = _parse_usage_object(ud) + if record: + return record + elif ud is not None: + record = _parse_usage_object(ud) + if record: + return record + # Direct details/usage_details on content item + ud = getattr(item, "details", None) or getattr(item, "usage_details", None) + if isinstance(ud, dict) and ud: + record = _parse_usage_object(ud) + if record: + return record + # Dict content item + if isinstance(item, dict): + if "details" in item: + record = _parse_usage_object(item["details"]) + if record: + return record + if "usage_details" in item: + record = _parse_usage_object(item["usage_details"]) + if record: + return record + if "input_token_count" in item or "total_token_count" in item: + record = _parse_usage_object(item) + if record: + return record + + # 5. additional_properties + addl = getattr(response, "additional_properties", None) + if isinstance(addl, dict) and "usage" in addl: + record = _parse_usage_object(addl["usage"]) + if record: + return record + + # 6. Dict response + if isinstance(response, dict): + if "usage" in response: + record = _parse_usage_object(response["usage"]) + if record: + return record + record = _parse_usage_object(response) + if record: + return record + + return None + + +def _parse_usage_object(usage: Any) -> TokenUsageRecord | None: + """Parse a usage object (dict or object with attrs) into a TokenUsageRecord.""" + if usage is None: + return None + + if isinstance(usage, dict): + inp = ( + usage.get("input_token_count", 0) + or usage.get("prompt_tokens", 0) + or usage.get("input_tokens", 0) + or 0 + ) + out = ( + usage.get("output_token_count", 0) + or usage.get("completion_tokens", 0) + or usage.get("output_tokens", 0) + or 0 + ) + tot = ( + usage.get("total_token_count", 0) + or usage.get("total_tokens", 0) + or (inp + out) + ) + else: + inp = ( + getattr(usage, "input_token_count", 0) + or getattr(usage, "prompt_tokens", 0) + or getattr(usage, "input_tokens", 0) + or 0 + ) + out = ( + getattr(usage, "output_token_count", 0) + or getattr(usage, "completion_tokens", 0) + or getattr(usage, "output_tokens", 0) + or 0 + ) + tot = ( + getattr(usage, "total_token_count", 0) + or getattr(usage, "total_tokens", 0) + or (inp + out) + ) + + if tot > 0 or inp > 0 or out > 0: + return TokenUsageRecord( + input_tokens=int(inp), + output_tokens=int(out), + total_tokens=int(tot) if tot > 0 else int(inp) + int(out), + ) + return None From a31f8f035338a366d9a23eb8162a96034830a41f Mon Sep 17 00:00:00 2001 From: Priyanka-Microsoft Date: Mon, 25 May 2026 15:27:11 +0530 Subject: [PATCH 2/5] optimize the code --- .../agent_framework/groupchat_orchestrator.py | 111 ++++++++---------- src/processor/src/utils/event_utils.py | 17 ++- .../src/utils/token_usage_tracker.py | 91 +++++--------- 3 files changed, 91 insertions(+), 128 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 50711fd3..1607b752 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -484,6 +484,7 @@ async def run_stream( self._tool_call_emitted.clear() self._tool_call_recorded.clear() self._tool_call_index.clear() + self._streaming_captured_usage = False self._conversation: list[ChatMessage] = [] # Track conversation during workflow try: @@ -686,70 +687,9 @@ async def _handle_agent_update( await self._process_tool_calls(event, agent_name, stream_callback) # Extract token usage from the streaming update if tracker is active. - # Check multiple paths where usage data may appear. if self.token_usage_tracker is not None: try: - data = event.data - # Path 1: data.contents with Content(type="usage") - contents = getattr(data, "contents", None) - if contents: - for item in contents: - ctype = getattr(item, "type", None) - if ctype == "usage": - # SDK UsageContent uses "details"; fall back to "usage_details" - ud = getattr(item, "details", None) or getattr(item, "usage_details", None) - if ud: - record = _parse_usage_object(ud) - if record and record.total_tokens > 0: - self.token_usage_tracker.record( - input_tokens=record.input_tokens, - output_tokens=record.output_tokens, - total_tokens=record.total_tokens, - agent_name=agent_name, - step_name=self.name, - ) - self._streaming_captured_usage = True - logger.info( - "[TOKEN_ORCH] recorded from contents: agent=%s step=%s tokens=%s", - agent_name, self.name, record.total_tokens, - ) - return - # Path 2: data.usage direct attribute - usage = getattr(data, "usage", None) - if usage is not None: - record = _parse_usage_object(usage) - if record and record.total_tokens > 0: - self.token_usage_tracker.record( - input_tokens=record.input_tokens, - output_tokens=record.output_tokens, - total_tokens=record.total_tokens, - agent_name=agent_name, - step_name=self.name, - ) - self._streaming_captured_usage = True - logger.info( - "[TOKEN_ORCH] recorded from usage attr: agent=%s step=%s tokens=%s", - agent_name, self.name, record.total_tokens, - ) - return - # Path 3: event itself may carry usage - event_usage = getattr(event, "usage", None) - if event_usage is not None: - record = _parse_usage_object(event_usage) - if record and record.total_tokens > 0: - self.token_usage_tracker.record( - input_tokens=record.input_tokens, - output_tokens=record.output_tokens, - total_tokens=record.total_tokens, - agent_name=agent_name, - step_name=self.name, - ) - self._streaming_captured_usage = True - logger.info( - "[TOKEN_ORCH] recorded from event.usage: agent=%s step=%s tokens=%s", - agent_name, self.name, record.total_tokens, - ) - return + self._try_record_streaming_usage(event, agent_name) except Exception: logger.debug( "Failed to extract token usage from update (agent=%s)", @@ -757,6 +697,53 @@ async def _handle_agent_update( exc_info=True, ) + def _try_record_streaming_usage(self, event: Any, agent_name: str) -> None: + """Try to extract and record token usage from a streaming event. + + Checks three paths in priority order: + 1. event.data.contents with Content(type="usage") + 2. event.data.usage direct attribute + 3. event.usage top-level attribute + """ + candidates: list[tuple[Any, str]] = [] + data = event.data + + # Path 1: data.contents with Content(type="usage") + contents = getattr(data, "contents", None) + if contents: + for item in contents: + if getattr(item, "type", None) == "usage": + ud = getattr(item, "details", None) or getattr(item, "usage_details", None) + if ud: + candidates.append((ud, "contents")) + + # Path 2: data.usage + usage = getattr(data, "usage", None) + if usage is not None: + candidates.append((usage, "data.usage")) + + # Path 3: event.usage + event_usage = getattr(event, "usage", None) + if event_usage is not None: + candidates.append((event_usage, "event.usage")) + + for candidate, source in candidates: + record = _parse_usage_object(candidate) + if record and record.total_tokens > 0: + self.token_usage_tracker.record( + input_tokens=record.input_tokens, + output_tokens=record.output_tokens, + total_tokens=record.total_tokens, + agent_name=agent_name, + step_name=self.name, + ) + self._streaming_captured_usage = True + logger.info( + "[TOKEN_ORCH] recorded from %s: agent=%s step=%s tokens=%s", + source, agent_name, self.name, record.total_tokens, + ) + return + def _normalize_executor_id(self, executor_id: str) -> str: """Normalize executor id to agent name. diff --git a/src/processor/src/utils/event_utils.py b/src/processor/src/utils/event_utils.py index abbc1a89..dc43e8fd 100644 --- a/src/processor/src/utils/event_utils.py +++ b/src/processor/src/utils/event_utils.py @@ -16,6 +16,11 @@ APP_INSIGHTS_CONN_STRING_ENV = "APPLICATIONINSIGHTS_CONNECTION_STRING" +_UNCONFIGURED_WARNING = ( + "APPLICATIONINSIGHTS_CONNECTION_STRING is not set; " + "track_event_if_configured(name=%s) is a no-op." +) + _warned_unconfigured: bool = False @@ -24,6 +29,12 @@ def _is_app_insights_configured() -> bool: return bool(value and value.strip()) +def reset_unconfigured_warning_for_tests() -> None: + """Test-only helper: reset the once-per-process warning latch.""" + global _warned_unconfigured + _warned_unconfigured = False + + def track_event_if_configured( name: str, properties: Mapping[str, Any] | None = None ) -> None: @@ -36,11 +47,7 @@ def track_event_if_configured( if not _is_app_insights_configured(): if not _warned_unconfigured: - logger.warning( - "APPLICATIONINSIGHTS_CONNECTION_STRING is not set; " - "track_event_if_configured(name=%s) is a no-op.", - name, - ) + logger.warning(_UNCONFIGURED_WARNING, name) _warned_unconfigured = True return diff --git a/src/processor/src/utils/token_usage_tracker.py b/src/processor/src/utils/token_usage_tracker.py index ae10a68d..25e5e0b6 100644 --- a/src/processor/src/utils/token_usage_tracker.py +++ b/src/processor/src/utils/token_usage_tracker.py @@ -299,34 +299,23 @@ def extract_usage_from_response(response: Any) -> TokenUsageRecord | None: contents = getattr(response, "contents", None) if contents: for item in contents: - item_type = getattr(item, "type", None) - if item_type == "usage": - # SDK UsageContent uses "details"; fall back to "usage_details" + # Try usage-typed content items first, then any item with details + ud = None + if getattr(item, "type", None) == "usage": ud = getattr(item, "details", None) or getattr(item, "usage_details", None) - if isinstance(ud, dict): - record = _parse_usage_object(ud) - if record: - return record - elif ud is not None: - record = _parse_usage_object(ud) - if record: - return record - # Direct details/usage_details on content item - ud = getattr(item, "details", None) or getattr(item, "usage_details", None) - if isinstance(ud, dict) and ud: + if ud is None: + ud = getattr(item, "details", None) or getattr(item, "usage_details", None) + if ud is not None: record = _parse_usage_object(ud) if record: return record # Dict content item if isinstance(item, dict): - if "details" in item: - record = _parse_usage_object(item["details"]) - if record: - return record - if "usage_details" in item: - record = _parse_usage_object(item["usage_details"]) - if record: - return record + for key in ("details", "usage_details"): + if key in item: + record = _parse_usage_object(item[key]) + if record: + return record if "input_token_count" in item or "total_token_count" in item: record = _parse_usage_object(item) if record: @@ -352,52 +341,32 @@ def extract_usage_from_response(response: Any) -> TokenUsageRecord | None: return None +def _get_field(obj: Any, *names: str) -> int: + """Read the first non-zero value from *obj* for the given field names. + + Works uniformly for dicts (via ``get``) and objects (via ``getattr``). + """ + getter = obj.get if isinstance(obj, dict) else lambda k, d=0: getattr(obj, k, d) + for name in names: + val = getter(name, 0) + if val: + return int(val) + return 0 + + def _parse_usage_object(usage: Any) -> TokenUsageRecord | None: """Parse a usage object (dict or object with attrs) into a TokenUsageRecord.""" if usage is None: return None - if isinstance(usage, dict): - inp = ( - usage.get("input_token_count", 0) - or usage.get("prompt_tokens", 0) - or usage.get("input_tokens", 0) - or 0 - ) - out = ( - usage.get("output_token_count", 0) - or usage.get("completion_tokens", 0) - or usage.get("output_tokens", 0) - or 0 - ) - tot = ( - usage.get("total_token_count", 0) - or usage.get("total_tokens", 0) - or (inp + out) - ) - else: - inp = ( - getattr(usage, "input_token_count", 0) - or getattr(usage, "prompt_tokens", 0) - or getattr(usage, "input_tokens", 0) - or 0 - ) - out = ( - getattr(usage, "output_token_count", 0) - or getattr(usage, "completion_tokens", 0) - or getattr(usage, "output_tokens", 0) - or 0 - ) - tot = ( - getattr(usage, "total_token_count", 0) - or getattr(usage, "total_tokens", 0) - or (inp + out) - ) + inp = _get_field(usage, "input_token_count", "prompt_tokens", "input_tokens") + out = _get_field(usage, "output_token_count", "completion_tokens", "output_tokens") + tot = _get_field(usage, "total_token_count", "total_tokens") or (inp + out) if tot > 0 or inp > 0 or out > 0: return TokenUsageRecord( - input_tokens=int(inp), - output_tokens=int(out), - total_tokens=int(tot) if tot > 0 else int(inp) + int(out), + input_tokens=inp, + output_tokens=out, + total_tokens=tot if tot > 0 else inp + out, ) return None From f11248b55a3ff81ceb66f735148688dbb7061b83 Mon Sep 17 00:00:00 2001 From: Priyanka-Microsoft Date: Wed, 27 May 2026 17:45:42 +0530 Subject: [PATCH 3/5] optimize the code --- .../app/libs/logging/llm_token_telemetry.py | 935 ++++++++++++++++++ .../src/tests/test_llm_token_telemetry.py | 572 +++++++++++ .../src/utils/llm_token_telemetry.py | 935 ++++++++++++++++++ .../src/utils/token_usage_tracker.py | 287 ++---- 4 files changed, 2548 insertions(+), 181 deletions(-) create mode 100644 src/backend-api/src/app/libs/logging/llm_token_telemetry.py create mode 100644 src/processor/src/tests/test_llm_token_telemetry.py create mode 100644 src/processor/src/utils/llm_token_telemetry.py diff --git a/src/backend-api/src/app/libs/logging/llm_token_telemetry.py b/src/backend-api/src/app/libs/logging/llm_token_telemetry.py new file mode 100644 index 00000000..b3035fc8 --- /dev/null +++ b/src/backend-api/src/app/libs/logging/llm_token_telemetry.py @@ -0,0 +1,935 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Cross-accelerator LLM token-usage telemetry helpers. + +A single, dependency-light helper module that can be dropped into any Microsoft +Solution Accelerator to capture LLM token usage and emit standardized custom +events to Application Insights. + +Why this file exists +-------------------- +Seven solution accelerators have independently shipped near-identical +``token_usage_utils.py`` modules (see PRs: content-generation #860, CKM #933, +content-processing #586, Container-Migration #257, agentic-data-foundation +#383, customer-chatbot #218, MACAE #1003). They all: + +* extract token counts from agent_framework / Azure OpenAI responses, +* emit the same three custom events (``LLM_Token_Usage_Summary``, + ``LLM_Agent_Token_Usage``, ``LLM_Model_Token_Usage``), +* defensively swallow telemetry errors, +* duplicate the same KQL queries and Azure Workbook. + +This module consolidates the union of those behaviours behind one stable API +so each accelerator can replace its bespoke helper with an import. + +Public API +---------- +- ``TokenUsage`` -- immutable dataclass for counts +- ``extract_usage(obj)`` -- agent_framework run result / message +- ``extract_usage_from_dict(d)`` -- raw dict from any SDK +- ``extract_usage_from_stream_chunk`` -- streaming chunks +- ``extract_realtime_usage(resp)`` -- Azure AI Voice Live response.done +- ``TokenUsageEmitter`` -- emits the three events + optional + per-user / per-team / speech events +- ``TokenUsageScope`` -- context-manager that accumulates and + auto-emits on exit +- ``track_tokens`` -- decorator wrapper around the scope + +Design rules +------------ +* Telemetry NEVER raises. Extraction failures return ``None``; emission + failures are logged at WARNING. +* No hard dependency on ``azure-monitor-events-extension``; if absent the + emitter degrades to logging only. +* Arbitrary correlation dimensions are passed as ``**dimensions`` kwargs and + surface verbatim as custom-event properties. This is how each accelerator + attaches its own keys (``conversation_id``, ``process_id``, ``team_name``, + ``file_name``, ``tenant``, etc.) without forking the helper. +""" +from __future__ import annotations + +import asyncio +import functools +import logging +import os +import random +from contextlib import AbstractContextManager +from dataclasses import dataclass, field +from typing import Any, Callable, Iterable, Mapping, Optional +from unittest.mock import NonCallableMock + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Event-name constants -- keep these stable; KQL queries and workbooks bind +# to these exact strings. +# --------------------------------------------------------------------------- +EVENT_SUMMARY = "LLM_Token_Usage_Summary" +EVENT_AGENT = "LLM_Agent_Token_Usage" +EVENT_MODEL = "LLM_Model_Token_Usage" +EVENT_USER = "LLM_User_Token_Usage" +EVENT_TEAM = "LLM_Team_Token_Usage" +EVENT_SPEECH = "Speech_Usage" + + +# Token-count field aliases observed across model providers / SDK versions. +_INPUT_KEYS = ( + "input_token_count", + "input_tokens", + "prompt_tokens", + "promptTokens", +) +_OUTPUT_KEYS = ( + "output_token_count", + "output_tokens", + "completion_tokens", + "completionTokens", +) +_TOTAL_KEYS = ( + "total_token_count", + "total_tokens", + "totalTokens", +) + + +# --------------------------------------------------------------------------- +# Data model +# --------------------------------------------------------------------------- +@dataclass(frozen=True) +class TokenUsage: + """Normalized token-usage record.""" + + input_tokens: int = 0 + output_tokens: int = 0 + total_tokens: int = 0 + + # Optional realtime / voice fields (None unless populated) + input_audio_tokens: Optional[int] = None + input_text_tokens: Optional[int] = None + input_cached_tokens: Optional[int] = None + output_audio_tokens: Optional[int] = None + output_text_tokens: Optional[int] = None + + @property + def has_any(self) -> bool: + return bool(self.input_tokens or self.output_tokens or self.total_tokens) + + def __add__(self, other: "TokenUsage") -> "TokenUsage": + if not isinstance(other, TokenUsage): + return NotImplemented + + def _sum(a: Optional[int], b: Optional[int]) -> Optional[int]: + if a is None and b is None: + return None + return (a or 0) + (b or 0) + + return TokenUsage( + input_tokens=self.input_tokens + other.input_tokens, + output_tokens=self.output_tokens + other.output_tokens, + total_tokens=self.total_tokens + other.total_tokens, + input_audio_tokens=_sum(self.input_audio_tokens, other.input_audio_tokens), + input_text_tokens=_sum(self.input_text_tokens, other.input_text_tokens), + input_cached_tokens=_sum(self.input_cached_tokens, other.input_cached_tokens), + output_audio_tokens=_sum(self.output_audio_tokens, other.output_audio_tokens), + output_text_tokens=_sum(self.output_text_tokens, other.output_text_tokens), + ) + + def to_event_props(self) -> dict[str, str]: + """Stringified property bag suitable for App Insights custom events.""" + props: dict[str, str] = { + "input_tokens": str(self.input_tokens), + "output_tokens": str(self.output_tokens), + "total_tokens": str(self.total_tokens), + } + for name in ( + "input_audio_tokens", + "input_text_tokens", + "input_cached_tokens", + "output_audio_tokens", + "output_text_tokens", + ): + value = getattr(self, name) + if value is not None: + props[name] = str(value) + return props + + +# --------------------------------------------------------------------------- +# Low-level coercion helpers +# --------------------------------------------------------------------------- +def _to_int(value: Any, default: int = 0) -> int: + """Best-effort int conversion; bool excluded; never raises.""" + if value is None or isinstance(value, bool): + return default + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str): + s = value.strip() + if s.isdigit(): + return int(s) + try: + return int(value) + except (TypeError, ValueError): + return default + + +def _get(obj: Any, key: str, default: Any = None) -> Any: + """Read an attribute or dict key uniformly.""" + if obj is None: + return default + if isinstance(obj, Mapping): + return obj.get(key, default) + return getattr(obj, key, default) + + +def _is_iterable(obj: Any) -> bool: + """True only for real iterables (lists/tuples/sets/generators), NOT for + arbitrary objects (e.g. ``unittest.mock.Mock``) that happen to expose + ``__iter__`` but blow up on iteration.""" + if obj is None: + return False + if isinstance(obj, (list, tuple, set, frozenset)): + return True + # Strings are iterable but never the right answer for "messages". + if isinstance(obj, (str, bytes, bytearray, Mapping)): + return False + # Fall back to a duck-typed check, but reject Mock instances which would + # otherwise pretend to support iteration. + if isinstance(obj, NonCallableMock): + return False + return hasattr(obj, "__iter__") + + +def _read_counts(usage_obj: Any) -> Optional[TokenUsage]: + """Read ``input/output/total`` from any usage-bearing object/dict.""" + if usage_obj is None: + return None + + inp = out = tot = 0 + for k in _INPUT_KEYS: + v = _get(usage_obj, k) + if v: + inp = _to_int(v) + break + for k in _OUTPUT_KEYS: + v = _get(usage_obj, k) + if v: + out = _to_int(v) + break + for k in _TOTAL_KEYS: + v = _get(usage_obj, k) + if v: + tot = _to_int(v) + break + + if tot == 0 and (inp or out): + tot = inp + out + if not (inp or out or tot): + return None + return TokenUsage(input_tokens=inp, output_tokens=out, total_tokens=tot) + + +# --------------------------------------------------------------------------- +# Extraction -- public +# --------------------------------------------------------------------------- +def extract_usage(result: Any) -> Optional[TokenUsage]: + """Extract usage from an agent_framework run result, ChatMessage, or + OpenAI-style ChatCompletion. + + Checks (in order): + 1. ``result.usage_details`` or ``result.usage`` + 2. ``result.raw_representation.usage`` (OpenAI ChatCompletion shape) + 3. Aggregated ``result.messages[*].contents[*].usage_details`` + + Never raises -- returns ``None`` on any unexpected shape. + """ + if result is None: + return None + + try: + for attr in ("usage_details", "usage"): + found = _read_counts(_get(result, attr)) + if found: + return found + + raw = _get(result, "raw_representation") + if raw is not None: + found = _read_counts(_get(raw, "usage")) + if found: + return found + + aggregated = TokenUsage() + found_any = False + messages = _get(result, "messages") + if not _is_iterable(messages): + return None + for msg in messages: + contents = _get(msg, "contents") + if not _is_iterable(contents): + continue + for content in contents: + usage = _get(content, "usage_details") or _get(content, "usage") + piece = _read_counts(usage) + if piece: + aggregated = aggregated + piece + found_any = True + return aggregated if found_any else None + except Exception as exc: + logger.debug("extract_usage failed: %s", exc, exc_info=True) + return None + + +def extract_usage_from_dict(data: Any) -> Optional[TokenUsage]: + """Extract from a raw dict / SDK usage object.""" + return _read_counts(data) + + +def extract_usage_from_stream_chunk(chunk: Any) -> Optional[TokenUsage]: + """Streaming chunks: try the top-level shape, then ``chunk.metadata.usage``.""" + found = extract_usage(chunk) + if found: + return found + metadata = _get(chunk, "metadata") + if metadata is not None: + return _read_counts(_get(metadata, "usage")) + return None + + +def extract_realtime_usage(response_obj: Any) -> Optional[TokenUsage]: + """Azure AI Voice Live ``response.done`` payload extractor. + + Includes audio / text / cached sub-counts when present. + """ + usage = _get(response_obj, "usage") + if usage is None: + return None + + inp = _to_int(_get(usage, "input_tokens")) + out = _to_int(_get(usage, "output_tokens")) + tot = _to_int(_get(usage, "total_tokens")) + if tot == 0 and (inp or out): + tot = inp + out + + in_details = _get(usage, "input_token_details") or {} + out_details = _get(usage, "output_token_details") or {} + + record = TokenUsage( + input_tokens=inp, + output_tokens=out, + total_tokens=tot, + input_audio_tokens=_to_int(_get(in_details, "audio_tokens")), + input_text_tokens=_to_int(_get(in_details, "text_tokens")), + input_cached_tokens=_to_int(_get(in_details, "cached_tokens")), + output_audio_tokens=_to_int(_get(out_details, "audio_tokens")), + output_text_tokens=_to_int(_get(out_details, "text_tokens")), + ) + # Only return if at least one non-zero count surfaced. + if record.has_any or any( + v for v in ( + record.input_audio_tokens, + record.input_text_tokens, + record.input_cached_tokens, + record.output_audio_tokens, + record.output_text_tokens, + ) + ): + return record + return None + + +# --------------------------------------------------------------------------- +# Tool / sub-agent attribution +# --------------------------------------------------------------------------- +def detect_invoked_tools(result: Any) -> set[str]: + """Return the set of tool/function names invoked in an agent result, + inferred from ``function_call`` content items. + + Used by orchestrators that expose sub-agents via ``.as_tool()`` to attribute + token usage only to the sub-agents that were actually called. Never raises. + """ + invoked: set[str] = set() + try: + messages = _get(result, "messages") + if not _is_iterable(messages): + return invoked + for msg in messages: + contents = _get(msg, "contents") + if not _is_iterable(contents): + continue + for content in contents: + if _get(content, "type") == "function_call": + name = _get(content, "name") + if name: + invoked.add(str(name)) + except Exception as exc: + logger.debug("detect_invoked_tools failed: %s", exc, exc_info=True) + return invoked + + +# --------------------------------------------------------------------------- +# Event sink (optional Application Insights dependency) +# --------------------------------------------------------------------------- +EventSink = Callable[[str, Mapping[str, str]], None] + + +def _default_event_sink() -> Optional[EventSink]: + """Return ``azure.monitor.events.extension.track_event`` if importable, + else ``None``. Resolved lazily so the helper still works in unit tests + without the dependency installed.""" + try: + from azure.monitor.events.extension import track_event # type: ignore + except Exception: # pragma: no cover - optional dep + return None + return track_event + + +# --------------------------------------------------------------------------- +# Emitter +# --------------------------------------------------------------------------- +class TokenUsageEmitter: + """Emit standardized token-usage custom events. + + Parameters + ---------- + connection_string: + Application Insights connection string. If ``None`` (default), the + ``APPLICATIONINSIGHTS_CONNECTION_STRING`` env var is consulted. When + no connection string is configured the emitter logs and skips the + ``track_event`` call. + static_dimensions: + Properties merged into every event (e.g. ``{"app": "customer-chatbot"}``). + event_sink: + Callable ``(event_name, props_dict) -> None``. Defaults to + ``azure.monitor.events.extension.track_event``. Override in tests. + pricing: + Optional mapping ``{model_deployment_name -> (usd_per_1k_input, + usd_per_1k_output)}``. When provided, an ``estimated_cost_usd`` + property is attached to agent / model / summary events. Model lookup + is case-insensitive. Use this to avoid hard-coding rates in KQL. + user_id_hasher: + Optional callable ``str -> str`` applied to any ``user_id`` value + before it leaves the emitter. Use this to satisfy PII / GDPR + requirements (e.g. HMAC-SHA256 with a tenant-scoped salt). Applied + to both ``static_dimensions['user_id']`` (at construction) and + per-call ``user_id`` kwargs. + sample_rate: + Fraction of high-cardinality events (agent / model / user / team / + speech) actually shipped, in ``[0.0, 1.0]``. The cheap **summary + event always fires** regardless of sample_rate so per-request totals + remain accurate; only the per-dimension breakdown is sampled. + Defaults to ``1.0`` (no sampling). + logger: + Override the module logger. + """ + + def __init__( + self, + *, + connection_string: Optional[str] = None, + static_dimensions: Optional[Mapping[str, Any]] = None, + event_sink: Optional[EventSink] = None, + pricing: Optional[Mapping[str, tuple[float, float]]] = None, + user_id_hasher: Optional[Callable[[str], str]] = None, + sample_rate: float = 1.0, + logger: Optional[logging.Logger] = None, + ) -> None: + self._cs = connection_string if connection_string is not None else os.getenv( + "APPLICATIONINSIGHTS_CONNECTION_STRING" + ) + self._sink = event_sink if event_sink is not None else _default_event_sink() + self._log = logger or logging.getLogger(__name__) + + # PII hashing applied to user_id everywhere. + self._user_id_hasher = user_id_hasher + + # Sampling clamp to [0, 1]. + try: + sr = float(sample_rate) + except (TypeError, ValueError): + sr = 1.0 + self._sample_rate = max(0.0, min(1.0, sr)) + + # Case-insensitive pricing lookup. Values stored as a (in, out) tuple. + self._pricing: dict[str, tuple[float, float]] = {} + for model, rates in (pricing or {}).items(): + if not model or rates is None: + continue + try: + inp, out = rates + self._pricing[str(model).lower()] = (float(inp), float(out)) + except (TypeError, ValueError): + self._log.warning("Ignoring malformed pricing entry: %s=%r", model, rates) + + # Pre-stringify static dims once. user_id (if present) is hashed here + # so the raw value is never retained on the emitter. + raw_static = dict(static_dimensions or {}) + if "user_id" in raw_static: + raw_static["user_id"] = self._apply_user_id_hash(raw_static["user_id"]) + self._static: dict[str, str] = { + k: ("" if v is None else str(v)) for k, v in raw_static.items() + } + + # -- public surface --------------------------------------------------- + @property + def enabled(self) -> bool: + return bool(self._cs) and self._sink is not None + + @property + def sample_rate(self) -> float: + return self._sample_rate + + # -- internal helpers ------------------------------------------------- + def _apply_user_id_hash(self, value: Any) -> Any: + """Apply the configured user_id_hasher; never raises.""" + if value is None or value == "" or self._user_id_hasher is None: + return value + try: + return self._user_id_hasher(str(value)) + except Exception as exc: # never let hashing break telemetry + self._log.warning("user_id_hasher raised: %s", exc) + return value + + def _should_sample(self) -> bool: + """Sampling decision for high-cardinality events.""" + if self._sample_rate >= 1.0: + return True + if self._sample_rate <= 0.0: + return False + return random.random() < self._sample_rate + + def _cost_props( + self, model_deployment_name: Optional[str], usage: TokenUsage + ) -> dict[str, str]: + """Return ``{'estimated_cost_usd': '...'}`` when pricing is configured + for the given model, else ``{}``. 6-decimal formatting.""" + if not self._pricing or not model_deployment_name: + return {} + rate = self._pricing.get(model_deployment_name.lower()) + if not rate: + return {} + inp_rate, out_rate = rate + cost = (usage.input_tokens * inp_rate + usage.output_tokens * out_rate) / 1000.0 + return {"estimated_cost_usd": f"{cost:.6f}"} + + def _summary_cost_props( + self, + primary_model: Optional[str], + additional_agents: Mapping[str, str], + usage: TokenUsage, + ) -> dict[str, str]: + """Best-effort cost for the summary event: charge full usage at the + primary model's rate (the SDK aggregates sub-agent tokens to the + orchestrator, so apportioning is not possible without per-agent + usage). Falls back to silent skip when no rate is known.""" + if primary_model: + cost = self._cost_props(primary_model, usage) + if cost: + return cost + for m in additional_agents.values(): + cost = self._cost_props(m, usage) + if cost: + return cost + return {} + + def emit(self, event_name: str, **dimensions: Any) -> None: + """Low-level: emit an event with arbitrary properties. + + Non-string values are stringified. ``None`` values are dropped. Any + ``user_id`` value is passed through the configured hasher. + Never raises. + """ + props = dict(self._static) # cheap shallow copy of pre-stringified dims + for k, v in dimensions.items(): + if v is None: + continue + if k == "user_id": + v = self._apply_user_id_hash(v) + if v is None or v == "": + continue + props[k] = v if isinstance(v, str) else str(v) + + if not self.enabled: + self._log.debug( + "App Insights not configured -- skipping event %s (%s)", + event_name, props, + ) + return + try: + self._sink(event_name, props) # type: ignore[misc] + except Exception as exc: # never break the caller + self._log.warning("track_event(%s) failed: %s", event_name, exc) + + # -- typed convenience emitters -------------------------------------- + def emit_agent( + self, + *, + agent_name: str, + model_deployment_name: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + if not usage.has_any or not self._should_sample(): + return + self.emit( + EVENT_AGENT, + agent_name=agent_name, + model_deployment_name=model_deployment_name, + **usage.to_event_props(), + **self._cost_props(model_deployment_name, usage), + **dimensions, + ) + + def emit_model( + self, + *, + model_deployment_name: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + if not usage.has_any or not self._should_sample(): + return + self.emit( + EVENT_MODEL, + model_deployment_name=model_deployment_name, + **usage.to_event_props(), + **self._cost_props(model_deployment_name, usage), + **dimensions, + ) + + def emit_user( + self, + *, + user_id: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + if not usage.has_any or not user_id or not self._should_sample(): + return + self.emit( + EVENT_USER, + user_id=user_id, + **usage.to_event_props(), + **dimensions, + ) + + def emit_team( + self, + *, + team_name: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + if not usage.has_any or not team_name or not self._should_sample(): + return + self.emit( + EVENT_TEAM, + team_name=team_name, + **usage.to_event_props(), + **dimensions, + ) + + def emit_summary( + self, + *, + usage: TokenUsage, + agent_count: int = 1, + model_count: int = 1, + primary_model: Optional[str] = None, + additional_agents: Optional[Mapping[str, str]] = None, + **dimensions: Any, + ) -> None: + """The summary event always fires (ignores ``sample_rate``) so per- + request totals remain accurate even when high-cardinality events are + sampled.""" + if not usage.has_any: + return + # Summary historically uses ``total_input_tokens`` / ``total_output_tokens`` + # field names; preserve that wire format for backward compatibility. + props = { + "total_input_tokens": str(usage.input_tokens), + "total_output_tokens": str(usage.output_tokens), + "total_tokens": str(usage.total_tokens), + "agent_count": str(agent_count), + "model_count": str(model_count), + "sample_rate": f"{self._sample_rate:.4f}", + } + # Carry over realtime sub-counts if present. + for k, v in usage.to_event_props().items(): + props.setdefault(k, v) + # Optional total cost. + props.update(self._summary_cost_props(primary_model, additional_agents or {}, usage)) + self.emit(EVENT_SUMMARY, **props, **dimensions) + + def emit_speech( + self, + *, + model_deployment_name: str, + source: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + """Voice-Live / realtime speech usage event.""" + if not self._should_sample(): + return + self.emit( + EVENT_SPEECH, + model_deployment_name=model_deployment_name, + source=source, + **usage.to_event_props(), + **self._cost_props(model_deployment_name, usage), + **dimensions, + ) + + # -- combined emit: summary + agent + per-distinct-model --------------- + def emit_all( + self, + *, + agent_name: str, + model_deployment_name: str, + usage: TokenUsage, + additional_agents: Optional[Mapping[str, str]] = None, + emit_user_event: bool = False, + emit_team_event: bool = False, + **dimensions: Any, + ) -> None: + """Convenience: emit summary, agent, and one model event per distinct + model deployment in one shot. + + ``additional_agents`` maps sub-agent name -> its model deployment name + so callers can describe orchestrators that involve multiple agents. + + ``emit_user_event`` / ``emit_team_event`` opt in to the user/team + events; ``user_id`` / ``team_name`` must be present in dimensions for + those to fire. + """ + if not usage.has_any: + return + + agents = {agent_name: model_deployment_name} + if additional_agents: + agents.update({k: v for k, v in additional_agents.items() if k}) + models = {m for m in agents.values() if m} + + self.emit_summary( + usage=usage, + agent_count=len(agents), + model_count=len(models) or 1, + primary_model=model_deployment_name, + additional_agents=additional_agents, + **dimensions, + ) + self.emit_agent( + agent_name=agent_name, + model_deployment_name=model_deployment_name, + usage=usage, + **dimensions, + ) + for model in models: + self.emit_model( + model_deployment_name=model, + usage=usage, + **dimensions, + ) + if emit_user_event and dimensions.get("user_id"): + self.emit_user( + user_id=str(dimensions["user_id"]), + usage=usage, + agent_name=agent_name, + model_deployment_name=model_deployment_name, + ) + if emit_team_event and dimensions.get("team_name"): + self.emit_team( + team_name=str(dimensions["team_name"]), + usage=usage, + agent_name=agent_name, + model_deployment_name=model_deployment_name, + ) + + self._log.info( + "[TOKEN USAGE] agent=%s model=%s input=%d output=%d total=%d %s", + agent_name, + model_deployment_name, + usage.input_tokens, + usage.output_tokens, + usage.total_tokens, + " ".join(f"{k}={v}" for k, v in dimensions.items() if v), + ) + + +# --------------------------------------------------------------------------- +# Scope / decorator sugar +# --------------------------------------------------------------------------- +@dataclass +class TokenUsageScope(AbstractContextManager): + """Accumulate usage across multiple results, then emit on exit. + + Example:: + + with TokenUsageScope(emitter, + agent_name="chat", + model_deployment_name=cfg.model, + user_id=user_id) as scope: + result = await agent.run(prompt) + scope.add(result) # extracts and accumulates + """ + + emitter: TokenUsageEmitter + agent_name: str + model_deployment_name: str + dimensions: dict[str, Any] = field(default_factory=dict) + additional_agents: dict[str, str] = field(default_factory=dict) + emit_user_event: bool = False + emit_team_event: bool = False + usage: TokenUsage = field(default_factory=TokenUsage) + + def __init__( + self, + emitter: TokenUsageEmitter, + *, + agent_name: str, + model_deployment_name: str, + additional_agents: Optional[Mapping[str, str]] = None, + emit_user_event: bool = False, + emit_team_event: bool = False, + **dimensions: Any, + ) -> None: + self.emitter = emitter + self.agent_name = agent_name + self.model_deployment_name = model_deployment_name + self.additional_agents = dict(additional_agents or {}) + self.emit_user_event = emit_user_event + self.emit_team_event = emit_team_event + self.dimensions = dict(dimensions) + self.usage = TokenUsage() + + # -- accumulation ----------------------------------------------------- + def add(self, source: Any) -> Optional[TokenUsage]: + """Extract usage from any supported shape and add to the running total. + + Never raises -- extraction failures return ``None`` and are logged + at DEBUG. + """ + try: + found = extract_usage(source) or extract_usage_from_stream_chunk(source) + except Exception as exc: # belt + braces; extractors are already safe + logger.debug("TokenUsageScope.add failed: %s", exc, exc_info=True) + return None + if found: + self.usage = self.usage + found + return found + + def add_usage(self, usage: TokenUsage) -> None: + self.usage = self.usage + usage + + def add_chunks(self, chunks: Iterable[Any]) -> None: + for c in chunks: + self.add(c) + + # -- context manager -------------------------------------------------- + def __exit__(self, exc_type, exc, tb) -> None: + # Always emit (best-effort) regardless of exception status. + try: + self.emitter.emit_all( + agent_name=self.agent_name, + model_deployment_name=self.model_deployment_name, + usage=self.usage, + additional_agents=self.additional_agents, + emit_user_event=self.emit_user_event, + emit_team_event=self.emit_team_event, + **self.dimensions, + ) + except Exception as emit_exc: # pragma: no cover - belt + braces + logger.warning("TokenUsageScope emit failed: %s", emit_exc) + return None # do not suppress exceptions + + +def track_tokens( + emitter: TokenUsageEmitter, + *, + agent_name: str, + model_deployment_name: str, + dimension_args: Optional[Mapping[str, str]] = None, + additional_agents: Optional[Mapping[str, str]] = None, + emit_user_event: bool = False, + emit_team_event: bool = False, +): + """Decorator: wrap an async or sync function that returns an LLM result. + + ``dimension_args`` maps emitted-property-name -> callable-keyword-argument + name so per-call values (e.g. ``user_id``) are forwarded to the event. + + Example:: + + @track_tokens(emitter, + agent_name="chat", + model_deployment_name=settings.model, + dimension_args={"user_id": "user_id", + "session_id": "session_id"}) + async def run_chat(prompt, *, user_id, session_id): ... + """ + + dim_args = dict(dimension_args or {}) + + def _decorator(fn: Callable[..., Any]): + is_coro = _is_coroutine_function(fn) + + if is_coro: + @functools.wraps(fn) + async def _aw(*args, **kwargs) -> Any: + with _scope_for(kwargs) as scope: + result = await fn(*args, **kwargs) + scope.add(result) + return result + return _aw + + @functools.wraps(fn) + def _sw(*args, **kwargs) -> Any: + with _scope_for(kwargs) as scope: + result = fn(*args, **kwargs) + scope.add(result) + return result + return _sw + + def _scope_for(call_kwargs: Mapping[str, Any]) -> TokenUsageScope: + dimensions = { + prop: call_kwargs.get(kw) + for prop, kw in dim_args.items() + if call_kwargs.get(kw) is not None + } + return TokenUsageScope( + emitter, + agent_name=agent_name, + model_deployment_name=model_deployment_name, + additional_agents=additional_agents, + emit_user_event=emit_user_event, + emit_team_event=emit_team_event, + **dimensions, + ) + + return _decorator + + +def _is_coroutine_function(fn: Callable[..., Any]) -> bool: + return asyncio.iscoroutinefunction(fn) + + +__all__ = [ + "EVENT_SUMMARY", + "EVENT_AGENT", + "EVENT_MODEL", + "EVENT_USER", + "EVENT_TEAM", + "EVENT_SPEECH", + "TokenUsage", + "TokenUsageEmitter", + "TokenUsageScope", + "track_tokens", + "extract_usage", + "extract_usage_from_dict", + "extract_usage_from_stream_chunk", + "extract_realtime_usage", + "detect_invoked_tools", +] diff --git a/src/processor/src/tests/test_llm_token_telemetry.py b/src/processor/src/tests/test_llm_token_telemetry.py new file mode 100644 index 00000000..6a24f5b1 --- /dev/null +++ b/src/processor/src/tests/test_llm_token_telemetry.py @@ -0,0 +1,572 @@ +"""Unit tests for app.utils.llm_token_telemetry. + +Covers: +- TokenUsage arithmetic and realtime sub-fields +- All extractors (dict / object / raw_representation / aggregated messages / + streaming chunks / realtime / Mock-input safety) +- detect_invoked_tools +- TokenUsageEmitter: enabled/disabled, sink-throws-doesn't-propagate, + static_dimensions merge, all typed emitters, emit_all distinct models +- TokenUsageScope: happy path, exception in body still emits, multi-add +""" +from __future__ import annotations + +import logging +from unittest.mock import Mock + +import pytest + +from app.utils.llm_token_telemetry import ( + EVENT_AGENT, + EVENT_MODEL, + EVENT_SPEECH, + EVENT_SUMMARY, + TokenUsage, + TokenUsageEmitter, + TokenUsageScope, + detect_invoked_tools, + extract_realtime_usage, + extract_usage, + extract_usage_from_dict, + extract_usage_from_stream_chunk, +) + + +# --------------------------------------------------------------------------- +# TokenUsage +# --------------------------------------------------------------------------- +class TestTokenUsage: + def test_has_any_false_when_zero(self): + assert TokenUsage().has_any is False + + def test_has_any_true_when_any_nonzero(self): + assert TokenUsage(input_tokens=1).has_any is True + assert TokenUsage(total_tokens=5).has_any is True + + def test_addition_basic(self): + a = TokenUsage(1, 2, 3) + b = TokenUsage(4, 5, 9) + assert a + b == TokenUsage(5, 7, 12) + + def test_addition_realtime_subfields(self): + a = TokenUsage(1, 2, 3, input_audio_tokens=10) + b = TokenUsage(4, 5, 9, input_audio_tokens=20, output_audio_tokens=7) + c = a + b + assert c.input_audio_tokens == 30 + assert c.output_audio_tokens == 7 # None + 7 -> 7 + + def test_addition_returns_notimplemented_for_other_types(self): + assert TokenUsage(1).__add__("nope") is NotImplemented + + def test_to_event_props_omits_none_subfields(self): + props = TokenUsage(1, 2, 3).to_event_props() + assert props == {"input_tokens": "1", "output_tokens": "2", "total_tokens": "3"} + + def test_to_event_props_includes_realtime_when_present(self): + props = TokenUsage(1, 2, 3, input_audio_tokens=4).to_event_props() + assert props["input_audio_tokens"] == "4" + + +# --------------------------------------------------------------------------- +# extract_usage_from_dict +# --------------------------------------------------------------------------- +class TestExtractFromDict: + @pytest.mark.parametrize("data,expected", [ + ({"prompt_tokens": 12, "completion_tokens": 8}, (12, 8, 20)), + ({"input_tokens": 5, "output_tokens": 7, "total_tokens": 12}, (5, 7, 12)), + ({"input_token_count": 3, "output_token_count": 4}, (3, 4, 7)), + ({"promptTokens": 1, "completionTokens": 2, "totalTokens": 3}, (1, 2, 3)), + ]) + def test_aliases(self, data, expected): + u = extract_usage_from_dict(data) + assert (u.input_tokens, u.output_tokens, u.total_tokens) == expected + + def test_none_returns_none(self): + assert extract_usage_from_dict(None) is None + + def test_empty_returns_none(self): + assert extract_usage_from_dict({}) is None + + def test_total_falls_back_to_sum(self): + u = extract_usage_from_dict({"input_tokens": 4, "output_tokens": 6}) + assert u.total_tokens == 10 + + def test_string_digits_coerced(self): + u = extract_usage_from_dict({"input_tokens": "10", "output_tokens": "20"}) + assert u.input_tokens == 10 + assert u.output_tokens == 20 + + +# --------------------------------------------------------------------------- +# extract_usage (object shapes) +# --------------------------------------------------------------------------- +class _Bag: + """Minimal attribute bag (acts like an SDK model object).""" + pass + + +class TestExtractUsage: + def test_usage_details_dict(self): + r = _Bag() + r.usage_details = {"input_token_count": 5, "output_token_count": 7} + u = extract_usage(r) + assert u.total_tokens == 12 + + def test_usage_details_object(self): + r = _Bag() + details = _Bag() + details.input_token_count = 5 + details.output_token_count = 7 + details.total_token_count = 12 + r.usage_details = details + u = extract_usage(r) + assert u.total_tokens == 12 + + def test_raw_representation_openai_shape(self): + r = _Bag() + raw = _Bag() + raw.usage = {"prompt_tokens": 3, "completion_tokens": 4, "total_tokens": 7} + r.raw_representation = raw + u = extract_usage(r) + assert (u.input_tokens, u.output_tokens, u.total_tokens) == (3, 4, 7) + + def test_aggregated_messages(self): + r = _Bag() + msg = _Bag() + c1 = _Bag() + c1.usage_details = {"input_tokens": 2, "output_tokens": 3} + c2 = _Bag() + c2.usage_details = {"input_tokens": 4, "output_tokens": 1} + msg.contents = [c1, c2] + r.messages = [msg] + u = extract_usage(r) + assert u.input_tokens == 6 + assert u.output_tokens == 4 + + def test_none_input_returns_none(self): + assert extract_usage(None) is None + + def test_no_usage_returns_none(self): + assert extract_usage(_Bag()) is None + + def test_mock_input_does_not_raise(self): + """Mock objects expose every attribute as another Mock -- previously + this caused TypeError on iteration of .messages.""" + m = Mock() + # Should silently return None, never raise. + assert extract_usage(m) is None + + +# --------------------------------------------------------------------------- +# extract_usage_from_stream_chunk +# --------------------------------------------------------------------------- +class TestStreamChunk: + def test_chunk_with_metadata_usage(self): + c = _Bag() + c.metadata = {"usage": {"input_tokens": 1, "output_tokens": 2}} + u = extract_usage_from_stream_chunk(c) + assert u.input_tokens == 1 + assert u.output_tokens == 2 + + def test_no_usage_returns_none(self): + assert extract_usage_from_stream_chunk(_Bag()) is None + + +# --------------------------------------------------------------------------- +# extract_realtime_usage +# --------------------------------------------------------------------------- +class TestRealtime: + def test_basic(self): + r = _Bag() + r.usage = { + "input_tokens": 3, "output_tokens": 4, "total_tokens": 7, + "input_token_details": {"audio_tokens": 2, "text_tokens": 1, "cached_tokens": 0}, + "output_token_details": {"audio_tokens": 4, "text_tokens": 0}, + } + u = extract_realtime_usage(r) + assert u.input_audio_tokens == 2 + assert u.output_audio_tokens == 4 + assert u.total_tokens == 7 + + def test_total_derived_when_missing(self): + r = _Bag() + r.usage = {"input_tokens": 3, "output_tokens": 4} + u = extract_realtime_usage(r) + assert u.total_tokens == 7 + + def test_no_usage_returns_none(self): + assert extract_realtime_usage(_Bag()) is None + + +# --------------------------------------------------------------------------- +# detect_invoked_tools +# --------------------------------------------------------------------------- +class TestDetectInvokedTools: + def test_finds_function_calls(self): + r = _Bag() + c1 = _Bag() + c1.type = "function_call" + c1.name = "product_agent" + c2 = _Bag() + c2.type = "text" + c2.name = "n/a" + c3 = _Bag() + c3.type = "function_call" + c3.name = "policy_agent" + msg = _Bag() + msg.contents = [c1, c2, c3] + r.messages = [msg] + assert detect_invoked_tools(r) == {"product_agent", "policy_agent"} + + def test_empty_when_no_messages(self): + assert detect_invoked_tools(_Bag()) == set() + + def test_mock_input_safe(self): + assert detect_invoked_tools(Mock()) == set() + + def test_skips_function_calls_without_name(self): + r = _Bag() + c = _Bag() + c.type = "function_call" + c.name = None + msg = _Bag() + msg.contents = [c] + r.messages = [msg] + assert detect_invoked_tools(r) == set() + + +# --------------------------------------------------------------------------- +# TokenUsageEmitter +# --------------------------------------------------------------------------- +class TestEmitter: + def _make(self, **kw): + captured: list[tuple[str, dict]] = [] + kw.setdefault("connection_string", "fake-conn") + kw.setdefault("event_sink", lambda n, p: captured.append((n, dict(p)))) + em = TokenUsageEmitter(**kw) + return em, captured + + def test_disabled_when_no_connection_string(self): + em = TokenUsageEmitter(connection_string="", event_sink=lambda *a: None) + assert em.enabled is False + + def test_disabled_when_no_sink(self): + em = TokenUsageEmitter(connection_string="x", event_sink=None) + # _default_event_sink may or may not be available; force-disable: + em._sink = None + assert em.enabled is False + + def test_static_dimensions_prestringified_and_merged(self): + em, captured = self._make(static_dimensions={"app": "x", "tenant": 42}) + em.emit("X", user_id="u1") + name, props = captured[0] + assert name == "X" + assert props["app"] == "x" + assert props["tenant"] == "42" # stringified + assert props["user_id"] == "u1" + + def test_call_dimension_overrides_static(self): + em, captured = self._make(static_dimensions={"app": "default"}) + em.emit("X", app="override") + assert captured[0][1]["app"] == "override" + + def test_none_dimension_dropped(self): + em, captured = self._make() + em.emit("X", user_id=None, session_id="s1") + assert "user_id" not in captured[0][1] + assert captured[0][1]["session_id"] == "s1" + + def test_sink_exception_does_not_propagate(self, caplog): + def boom(_n, _p): + raise RuntimeError("sink broken") + em = TokenUsageEmitter(connection_string="x", event_sink=boom) + with caplog.at_level(logging.WARNING): + em.emit("X") # must not raise + + def test_emit_agent_skips_zero_usage(self): + em, captured = self._make() + em.emit_agent(agent_name="a", model_deployment_name="m", usage=TokenUsage()) + assert captured == [] + + def test_emit_agent_populates_props(self): + em, captured = self._make() + em.emit_agent(agent_name="chat", model_deployment_name="gpt-4o", + usage=TokenUsage(10, 20, 30), user_id="u") + name, props = captured[0] + assert name == EVENT_AGENT + assert props["agent_name"] == "chat" + assert props["model_deployment_name"] == "gpt-4o" + assert props["total_tokens"] == "30" + assert props["user_id"] == "u" + + def test_emit_all_emits_summary_agent_and_per_distinct_model(self): + em, captured = self._make() + em.emit_all( + agent_name="orchestrator", + model_deployment_name="gpt-4o", + usage=TokenUsage(10, 20, 30), + additional_agents={"tool_a": "gpt-4o", "tool_b": "gpt-35"}, + user_id="u1", + ) + names = [n for n, _ in captured] + # exactly one summary + one agent + two model events (gpt-4o, gpt-35) + assert names.count(EVENT_SUMMARY) == 1 + assert names.count(EVENT_AGENT) == 1 + assert names.count(EVENT_MODEL) == 2 + # summary records agent + model counts + summary = next(p for n, p in captured if n == EVENT_SUMMARY) + assert summary["agent_count"] == "3" + assert summary["model_count"] == "2" + assert summary["total_input_tokens"] == "10" + + def test_emit_speech_includes_audio_subfields(self): + em, captured = self._make() + em.emit_speech( + model_deployment_name="gpt-4o-realtime", + source="voice_chat", + usage=TokenUsage(1, 2, 3, input_audio_tokens=5, output_audio_tokens=6), + ) + name, props = captured[0] + assert name == EVENT_SPEECH + assert props["source"] == "voice_chat" + assert props["input_audio_tokens"] == "5" + assert props["output_audio_tokens"] == "6" + + +# --------------------------------------------------------------------------- +# Pricing / cost computation +# --------------------------------------------------------------------------- +class TestPricing: + def _make(self, pricing): + captured: list[tuple[str, dict]] = [] + em = TokenUsageEmitter( + connection_string="x", + event_sink=lambda n, p: captured.append((n, dict(p))), + pricing=pricing, + ) + return em, captured + + def test_cost_attached_to_agent_event(self): + em, captured = self._make({"gpt-4o": (0.0025, 0.01)}) + em.emit_agent(agent_name="a", model_deployment_name="gpt-4o", + usage=TokenUsage(1000, 500, 1500)) + # 1000 * 0.0025/1k + 500 * 0.01/1k = 0.0025 + 0.005 = 0.0075 + assert captured[0][1]["estimated_cost_usd"] == "0.007500" + + def test_cost_case_insensitive_model_lookup(self): + em, captured = self._make({"GPT-4o": (0.001, 0.001)}) + em.emit_model(model_deployment_name="gpt-4o", + usage=TokenUsage(1000, 1000, 2000)) + assert "estimated_cost_usd" in captured[0][1] + + def test_no_cost_when_model_unknown(self): + em, captured = self._make({"gpt-4o": (0.001, 0.001)}) + em.emit_agent(agent_name="a", model_deployment_name="gpt-mystery", + usage=TokenUsage(10, 10, 20)) + assert "estimated_cost_usd" not in captured[0][1] + + def test_summary_picks_up_cost_via_emit_all(self): + em, captured = self._make({"gpt-4o": (0.0025, 0.01)}) + em.emit_all(agent_name="chat", model_deployment_name="gpt-4o", + usage=TokenUsage(1000, 500, 1500)) + summary = next(p for n, p in captured if n == EVENT_SUMMARY) + assert summary["estimated_cost_usd"] == "0.007500" + + def test_malformed_pricing_entry_ignored(self, caplog): + with caplog.at_level(logging.WARNING): + em = TokenUsageEmitter( + connection_string="x", + event_sink=lambda *a: None, + pricing={"bad-model": "not-a-tuple"}, # type: ignore[dict-item] + ) + # Emitter still constructs; bad entry skipped. + assert "bad-model" not in em._pricing + + +# --------------------------------------------------------------------------- +# user_id PII hashing +# --------------------------------------------------------------------------- +class TestUserIdHasher: + def _make(self, hasher): + captured: list[tuple[str, dict]] = [] + em = TokenUsageEmitter( + connection_string="x", + event_sink=lambda n, p: captured.append((n, dict(p))), + user_id_hasher=hasher, + ) + return em, captured + + def test_hasher_applied_to_call_kwargs(self): + em, captured = self._make(lambda v: f"H({v})") + em.emit("X", user_id="alice") + assert captured[0][1]["user_id"] == "H(alice)" + + def test_hasher_applied_to_static_dimensions_at_construction(self): + em = TokenUsageEmitter( + connection_string="x", + event_sink=lambda *a: None, + user_id_hasher=lambda v: f"H({v})", + static_dimensions={"user_id": "bob"}, + ) + assert em._static["user_id"] == "H(bob)" + + def test_hasher_exception_falls_back_to_raw(self, caplog): + def boom(_v): + raise RuntimeError("hasher broken") + em, captured = self._make(boom) + with caplog.at_level(logging.WARNING): + em.emit("X", user_id="alice") + # Falls back to original value -- never breaks telemetry. + assert captured[0][1]["user_id"] == "alice" + + def test_no_hasher_passes_through(self): + em, captured = self._make(None) + em.emit("X", user_id="alice") + assert captured[0][1]["user_id"] == "alice" + + def test_empty_user_id_not_hashed_or_emitted(self): + em, captured = self._make(lambda v: f"H({v})") + em.emit("X", user_id="") + # Empty user_id should be dropped, not hashed to "H()". + assert "user_id" not in captured[0][1] + + +# --------------------------------------------------------------------------- +# Sampling +# --------------------------------------------------------------------------- +class TestSampling: + def _make(self, rate): + captured: list[tuple[str, dict]] = [] + em = TokenUsageEmitter( + connection_string="x", + event_sink=lambda n, p: captured.append((n, dict(p))), + sample_rate=rate, + ) + return em, captured + + def test_rate_clamped_to_unit_interval(self): + assert TokenUsageEmitter(connection_string="x", sample_rate=-0.5, + event_sink=lambda *a: None).sample_rate == 0.0 + assert TokenUsageEmitter(connection_string="x", sample_rate=2.0, + event_sink=lambda *a: None).sample_rate == 1.0 + + def test_invalid_rate_defaults_to_one(self): + em = TokenUsageEmitter(connection_string="x", sample_rate="nope", # type: ignore[arg-type] + event_sink=lambda *a: None) + assert em.sample_rate == 1.0 + + def test_zero_rate_drops_agent_event(self): + em, captured = self._make(0.0) + em.emit_agent(agent_name="a", model_deployment_name="m", + usage=TokenUsage(1, 2, 3)) + assert captured == [] + + def test_zero_rate_still_emits_summary(self): + em, captured = self._make(0.0) + em.emit_summary(usage=TokenUsage(1, 2, 3)) + assert captured and captured[0][0] == EVENT_SUMMARY + + def test_summary_records_sample_rate(self): + em, captured = self._make(0.25) + em.emit_summary(usage=TokenUsage(1, 2, 3)) + assert captured[0][1]["sample_rate"] == "0.2500" + + def test_emit_all_with_zero_rate_only_emits_summary(self): + em, captured = self._make(0.0) + em.emit_all(agent_name="chat", model_deployment_name="gpt-4o", + usage=TokenUsage(10, 20, 30)) + assert [n for n, _ in captured] == [EVENT_SUMMARY] + + def test_full_rate_emits_everything(self): + em, captured = self._make(1.0) + em.emit_all(agent_name="chat", model_deployment_name="gpt-4o", + usage=TokenUsage(10, 20, 30), + additional_agents={"a2": "gpt-35"}) + names = [n for n, _ in captured] + assert EVENT_SUMMARY in names + assert EVENT_AGENT in names + assert names.count(EVENT_MODEL) == 2 + + +# --------------------------------------------------------------------------- +# TokenUsageScope (continued) +# --------------------------------------------------------------------------- +class TestScope: + def _emitter(self): + captured: list[tuple[str, dict]] = [] + em = TokenUsageEmitter( + connection_string="x", + event_sink=lambda n, p: captured.append((n, dict(p))), + ) + return em, captured + + def test_happy_path_emits_on_exit(self): + em, captured = self._emitter() + r = _Bag() + r.usage_details = {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} + with TokenUsageScope(em, agent_name="a", model_deployment_name="m") as s: + s.add(r) + assert any(n == EVENT_SUMMARY for n, _ in captured) + assert any(n == EVENT_AGENT for n, _ in captured) + + def test_multi_add_accumulates(self): + em, captured = self._emitter() + r1 = _Bag() + r1.usage_details = {"input_tokens": 1, "output_tokens": 2} + r2 = _Bag() + r2.usage_details = {"input_tokens": 4, "output_tokens": 5} + with TokenUsageScope(em, agent_name="a", model_deployment_name="m") as s: + s.add(r1) + s.add(r2) + agent = next(p for n, p in captured if n == EVENT_AGENT) + assert agent["input_tokens"] == "5" + assert agent["output_tokens"] == "7" + assert agent["total_tokens"] == "12" + + def test_exception_in_body_still_emits(self): + em, captured = self._emitter() + r = _Bag() + r.usage_details = {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} + with pytest.raises(ValueError): + with TokenUsageScope(em, agent_name="a", model_deployment_name="m") as s: + s.add(r) + raise ValueError("boom") + # Emission still happened + assert any(n == EVENT_AGENT for n, _ in captured) + + def test_add_with_mock_does_not_raise(self): + em, _ = self._emitter() + with TokenUsageScope(em, agent_name="a", model_deployment_name="m") as s: + assert s.add(Mock()) is None + + def test_zero_usage_does_not_emit(self): + em, captured = self._emitter() + with TokenUsageScope(em, agent_name="a", model_deployment_name="m"): + pass + assert captured == [] + + def test_dimensions_flow_to_events(self): + em, captured = self._emitter() + r = _Bag() + r.usage_details = {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} + with TokenUsageScope(em, agent_name="a", model_deployment_name="m", + user_id="u1", session_id="s1") as s: + s.add(r) + for _, p in captured: + assert p["user_id"] == "u1" + assert p["session_id"] == "s1" + + def test_additional_agents_after_scope_open(self): + em, captured = self._emitter() + r = _Bag() + r.usage_details = {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} + with TokenUsageScope(em, agent_name="orchestrator", + model_deployment_name="gpt-4o") as s: + s.add(r) + # Mutate additional_agents after the call -- mirrors the + # detect_invoked_tools usage pattern. + s.additional_agents["tool_a"] = "gpt-35" + model_events = [p for n, p in captured if n == EVENT_MODEL] + models = {p["model_deployment_name"] for p in model_events} + assert models == {"gpt-4o", "gpt-35"} + diff --git a/src/processor/src/utils/llm_token_telemetry.py b/src/processor/src/utils/llm_token_telemetry.py new file mode 100644 index 00000000..b3035fc8 --- /dev/null +++ b/src/processor/src/utils/llm_token_telemetry.py @@ -0,0 +1,935 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Cross-accelerator LLM token-usage telemetry helpers. + +A single, dependency-light helper module that can be dropped into any Microsoft +Solution Accelerator to capture LLM token usage and emit standardized custom +events to Application Insights. + +Why this file exists +-------------------- +Seven solution accelerators have independently shipped near-identical +``token_usage_utils.py`` modules (see PRs: content-generation #860, CKM #933, +content-processing #586, Container-Migration #257, agentic-data-foundation +#383, customer-chatbot #218, MACAE #1003). They all: + +* extract token counts from agent_framework / Azure OpenAI responses, +* emit the same three custom events (``LLM_Token_Usage_Summary``, + ``LLM_Agent_Token_Usage``, ``LLM_Model_Token_Usage``), +* defensively swallow telemetry errors, +* duplicate the same KQL queries and Azure Workbook. + +This module consolidates the union of those behaviours behind one stable API +so each accelerator can replace its bespoke helper with an import. + +Public API +---------- +- ``TokenUsage`` -- immutable dataclass for counts +- ``extract_usage(obj)`` -- agent_framework run result / message +- ``extract_usage_from_dict(d)`` -- raw dict from any SDK +- ``extract_usage_from_stream_chunk`` -- streaming chunks +- ``extract_realtime_usage(resp)`` -- Azure AI Voice Live response.done +- ``TokenUsageEmitter`` -- emits the three events + optional + per-user / per-team / speech events +- ``TokenUsageScope`` -- context-manager that accumulates and + auto-emits on exit +- ``track_tokens`` -- decorator wrapper around the scope + +Design rules +------------ +* Telemetry NEVER raises. Extraction failures return ``None``; emission + failures are logged at WARNING. +* No hard dependency on ``azure-monitor-events-extension``; if absent the + emitter degrades to logging only. +* Arbitrary correlation dimensions are passed as ``**dimensions`` kwargs and + surface verbatim as custom-event properties. This is how each accelerator + attaches its own keys (``conversation_id``, ``process_id``, ``team_name``, + ``file_name``, ``tenant``, etc.) without forking the helper. +""" +from __future__ import annotations + +import asyncio +import functools +import logging +import os +import random +from contextlib import AbstractContextManager +from dataclasses import dataclass, field +from typing import Any, Callable, Iterable, Mapping, Optional +from unittest.mock import NonCallableMock + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Event-name constants -- keep these stable; KQL queries and workbooks bind +# to these exact strings. +# --------------------------------------------------------------------------- +EVENT_SUMMARY = "LLM_Token_Usage_Summary" +EVENT_AGENT = "LLM_Agent_Token_Usage" +EVENT_MODEL = "LLM_Model_Token_Usage" +EVENT_USER = "LLM_User_Token_Usage" +EVENT_TEAM = "LLM_Team_Token_Usage" +EVENT_SPEECH = "Speech_Usage" + + +# Token-count field aliases observed across model providers / SDK versions. +_INPUT_KEYS = ( + "input_token_count", + "input_tokens", + "prompt_tokens", + "promptTokens", +) +_OUTPUT_KEYS = ( + "output_token_count", + "output_tokens", + "completion_tokens", + "completionTokens", +) +_TOTAL_KEYS = ( + "total_token_count", + "total_tokens", + "totalTokens", +) + + +# --------------------------------------------------------------------------- +# Data model +# --------------------------------------------------------------------------- +@dataclass(frozen=True) +class TokenUsage: + """Normalized token-usage record.""" + + input_tokens: int = 0 + output_tokens: int = 0 + total_tokens: int = 0 + + # Optional realtime / voice fields (None unless populated) + input_audio_tokens: Optional[int] = None + input_text_tokens: Optional[int] = None + input_cached_tokens: Optional[int] = None + output_audio_tokens: Optional[int] = None + output_text_tokens: Optional[int] = None + + @property + def has_any(self) -> bool: + return bool(self.input_tokens or self.output_tokens or self.total_tokens) + + def __add__(self, other: "TokenUsage") -> "TokenUsage": + if not isinstance(other, TokenUsage): + return NotImplemented + + def _sum(a: Optional[int], b: Optional[int]) -> Optional[int]: + if a is None and b is None: + return None + return (a or 0) + (b or 0) + + return TokenUsage( + input_tokens=self.input_tokens + other.input_tokens, + output_tokens=self.output_tokens + other.output_tokens, + total_tokens=self.total_tokens + other.total_tokens, + input_audio_tokens=_sum(self.input_audio_tokens, other.input_audio_tokens), + input_text_tokens=_sum(self.input_text_tokens, other.input_text_tokens), + input_cached_tokens=_sum(self.input_cached_tokens, other.input_cached_tokens), + output_audio_tokens=_sum(self.output_audio_tokens, other.output_audio_tokens), + output_text_tokens=_sum(self.output_text_tokens, other.output_text_tokens), + ) + + def to_event_props(self) -> dict[str, str]: + """Stringified property bag suitable for App Insights custom events.""" + props: dict[str, str] = { + "input_tokens": str(self.input_tokens), + "output_tokens": str(self.output_tokens), + "total_tokens": str(self.total_tokens), + } + for name in ( + "input_audio_tokens", + "input_text_tokens", + "input_cached_tokens", + "output_audio_tokens", + "output_text_tokens", + ): + value = getattr(self, name) + if value is not None: + props[name] = str(value) + return props + + +# --------------------------------------------------------------------------- +# Low-level coercion helpers +# --------------------------------------------------------------------------- +def _to_int(value: Any, default: int = 0) -> int: + """Best-effort int conversion; bool excluded; never raises.""" + if value is None or isinstance(value, bool): + return default + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str): + s = value.strip() + if s.isdigit(): + return int(s) + try: + return int(value) + except (TypeError, ValueError): + return default + + +def _get(obj: Any, key: str, default: Any = None) -> Any: + """Read an attribute or dict key uniformly.""" + if obj is None: + return default + if isinstance(obj, Mapping): + return obj.get(key, default) + return getattr(obj, key, default) + + +def _is_iterable(obj: Any) -> bool: + """True only for real iterables (lists/tuples/sets/generators), NOT for + arbitrary objects (e.g. ``unittest.mock.Mock``) that happen to expose + ``__iter__`` but blow up on iteration.""" + if obj is None: + return False + if isinstance(obj, (list, tuple, set, frozenset)): + return True + # Strings are iterable but never the right answer for "messages". + if isinstance(obj, (str, bytes, bytearray, Mapping)): + return False + # Fall back to a duck-typed check, but reject Mock instances which would + # otherwise pretend to support iteration. + if isinstance(obj, NonCallableMock): + return False + return hasattr(obj, "__iter__") + + +def _read_counts(usage_obj: Any) -> Optional[TokenUsage]: + """Read ``input/output/total`` from any usage-bearing object/dict.""" + if usage_obj is None: + return None + + inp = out = tot = 0 + for k in _INPUT_KEYS: + v = _get(usage_obj, k) + if v: + inp = _to_int(v) + break + for k in _OUTPUT_KEYS: + v = _get(usage_obj, k) + if v: + out = _to_int(v) + break + for k in _TOTAL_KEYS: + v = _get(usage_obj, k) + if v: + tot = _to_int(v) + break + + if tot == 0 and (inp or out): + tot = inp + out + if not (inp or out or tot): + return None + return TokenUsage(input_tokens=inp, output_tokens=out, total_tokens=tot) + + +# --------------------------------------------------------------------------- +# Extraction -- public +# --------------------------------------------------------------------------- +def extract_usage(result: Any) -> Optional[TokenUsage]: + """Extract usage from an agent_framework run result, ChatMessage, or + OpenAI-style ChatCompletion. + + Checks (in order): + 1. ``result.usage_details`` or ``result.usage`` + 2. ``result.raw_representation.usage`` (OpenAI ChatCompletion shape) + 3. Aggregated ``result.messages[*].contents[*].usage_details`` + + Never raises -- returns ``None`` on any unexpected shape. + """ + if result is None: + return None + + try: + for attr in ("usage_details", "usage"): + found = _read_counts(_get(result, attr)) + if found: + return found + + raw = _get(result, "raw_representation") + if raw is not None: + found = _read_counts(_get(raw, "usage")) + if found: + return found + + aggregated = TokenUsage() + found_any = False + messages = _get(result, "messages") + if not _is_iterable(messages): + return None + for msg in messages: + contents = _get(msg, "contents") + if not _is_iterable(contents): + continue + for content in contents: + usage = _get(content, "usage_details") or _get(content, "usage") + piece = _read_counts(usage) + if piece: + aggregated = aggregated + piece + found_any = True + return aggregated if found_any else None + except Exception as exc: + logger.debug("extract_usage failed: %s", exc, exc_info=True) + return None + + +def extract_usage_from_dict(data: Any) -> Optional[TokenUsage]: + """Extract from a raw dict / SDK usage object.""" + return _read_counts(data) + + +def extract_usage_from_stream_chunk(chunk: Any) -> Optional[TokenUsage]: + """Streaming chunks: try the top-level shape, then ``chunk.metadata.usage``.""" + found = extract_usage(chunk) + if found: + return found + metadata = _get(chunk, "metadata") + if metadata is not None: + return _read_counts(_get(metadata, "usage")) + return None + + +def extract_realtime_usage(response_obj: Any) -> Optional[TokenUsage]: + """Azure AI Voice Live ``response.done`` payload extractor. + + Includes audio / text / cached sub-counts when present. + """ + usage = _get(response_obj, "usage") + if usage is None: + return None + + inp = _to_int(_get(usage, "input_tokens")) + out = _to_int(_get(usage, "output_tokens")) + tot = _to_int(_get(usage, "total_tokens")) + if tot == 0 and (inp or out): + tot = inp + out + + in_details = _get(usage, "input_token_details") or {} + out_details = _get(usage, "output_token_details") or {} + + record = TokenUsage( + input_tokens=inp, + output_tokens=out, + total_tokens=tot, + input_audio_tokens=_to_int(_get(in_details, "audio_tokens")), + input_text_tokens=_to_int(_get(in_details, "text_tokens")), + input_cached_tokens=_to_int(_get(in_details, "cached_tokens")), + output_audio_tokens=_to_int(_get(out_details, "audio_tokens")), + output_text_tokens=_to_int(_get(out_details, "text_tokens")), + ) + # Only return if at least one non-zero count surfaced. + if record.has_any or any( + v for v in ( + record.input_audio_tokens, + record.input_text_tokens, + record.input_cached_tokens, + record.output_audio_tokens, + record.output_text_tokens, + ) + ): + return record + return None + + +# --------------------------------------------------------------------------- +# Tool / sub-agent attribution +# --------------------------------------------------------------------------- +def detect_invoked_tools(result: Any) -> set[str]: + """Return the set of tool/function names invoked in an agent result, + inferred from ``function_call`` content items. + + Used by orchestrators that expose sub-agents via ``.as_tool()`` to attribute + token usage only to the sub-agents that were actually called. Never raises. + """ + invoked: set[str] = set() + try: + messages = _get(result, "messages") + if not _is_iterable(messages): + return invoked + for msg in messages: + contents = _get(msg, "contents") + if not _is_iterable(contents): + continue + for content in contents: + if _get(content, "type") == "function_call": + name = _get(content, "name") + if name: + invoked.add(str(name)) + except Exception as exc: + logger.debug("detect_invoked_tools failed: %s", exc, exc_info=True) + return invoked + + +# --------------------------------------------------------------------------- +# Event sink (optional Application Insights dependency) +# --------------------------------------------------------------------------- +EventSink = Callable[[str, Mapping[str, str]], None] + + +def _default_event_sink() -> Optional[EventSink]: + """Return ``azure.monitor.events.extension.track_event`` if importable, + else ``None``. Resolved lazily so the helper still works in unit tests + without the dependency installed.""" + try: + from azure.monitor.events.extension import track_event # type: ignore + except Exception: # pragma: no cover - optional dep + return None + return track_event + + +# --------------------------------------------------------------------------- +# Emitter +# --------------------------------------------------------------------------- +class TokenUsageEmitter: + """Emit standardized token-usage custom events. + + Parameters + ---------- + connection_string: + Application Insights connection string. If ``None`` (default), the + ``APPLICATIONINSIGHTS_CONNECTION_STRING`` env var is consulted. When + no connection string is configured the emitter logs and skips the + ``track_event`` call. + static_dimensions: + Properties merged into every event (e.g. ``{"app": "customer-chatbot"}``). + event_sink: + Callable ``(event_name, props_dict) -> None``. Defaults to + ``azure.monitor.events.extension.track_event``. Override in tests. + pricing: + Optional mapping ``{model_deployment_name -> (usd_per_1k_input, + usd_per_1k_output)}``. When provided, an ``estimated_cost_usd`` + property is attached to agent / model / summary events. Model lookup + is case-insensitive. Use this to avoid hard-coding rates in KQL. + user_id_hasher: + Optional callable ``str -> str`` applied to any ``user_id`` value + before it leaves the emitter. Use this to satisfy PII / GDPR + requirements (e.g. HMAC-SHA256 with a tenant-scoped salt). Applied + to both ``static_dimensions['user_id']`` (at construction) and + per-call ``user_id`` kwargs. + sample_rate: + Fraction of high-cardinality events (agent / model / user / team / + speech) actually shipped, in ``[0.0, 1.0]``. The cheap **summary + event always fires** regardless of sample_rate so per-request totals + remain accurate; only the per-dimension breakdown is sampled. + Defaults to ``1.0`` (no sampling). + logger: + Override the module logger. + """ + + def __init__( + self, + *, + connection_string: Optional[str] = None, + static_dimensions: Optional[Mapping[str, Any]] = None, + event_sink: Optional[EventSink] = None, + pricing: Optional[Mapping[str, tuple[float, float]]] = None, + user_id_hasher: Optional[Callable[[str], str]] = None, + sample_rate: float = 1.0, + logger: Optional[logging.Logger] = None, + ) -> None: + self._cs = connection_string if connection_string is not None else os.getenv( + "APPLICATIONINSIGHTS_CONNECTION_STRING" + ) + self._sink = event_sink if event_sink is not None else _default_event_sink() + self._log = logger or logging.getLogger(__name__) + + # PII hashing applied to user_id everywhere. + self._user_id_hasher = user_id_hasher + + # Sampling clamp to [0, 1]. + try: + sr = float(sample_rate) + except (TypeError, ValueError): + sr = 1.0 + self._sample_rate = max(0.0, min(1.0, sr)) + + # Case-insensitive pricing lookup. Values stored as a (in, out) tuple. + self._pricing: dict[str, tuple[float, float]] = {} + for model, rates in (pricing or {}).items(): + if not model or rates is None: + continue + try: + inp, out = rates + self._pricing[str(model).lower()] = (float(inp), float(out)) + except (TypeError, ValueError): + self._log.warning("Ignoring malformed pricing entry: %s=%r", model, rates) + + # Pre-stringify static dims once. user_id (if present) is hashed here + # so the raw value is never retained on the emitter. + raw_static = dict(static_dimensions or {}) + if "user_id" in raw_static: + raw_static["user_id"] = self._apply_user_id_hash(raw_static["user_id"]) + self._static: dict[str, str] = { + k: ("" if v is None else str(v)) for k, v in raw_static.items() + } + + # -- public surface --------------------------------------------------- + @property + def enabled(self) -> bool: + return bool(self._cs) and self._sink is not None + + @property + def sample_rate(self) -> float: + return self._sample_rate + + # -- internal helpers ------------------------------------------------- + def _apply_user_id_hash(self, value: Any) -> Any: + """Apply the configured user_id_hasher; never raises.""" + if value is None or value == "" or self._user_id_hasher is None: + return value + try: + return self._user_id_hasher(str(value)) + except Exception as exc: # never let hashing break telemetry + self._log.warning("user_id_hasher raised: %s", exc) + return value + + def _should_sample(self) -> bool: + """Sampling decision for high-cardinality events.""" + if self._sample_rate >= 1.0: + return True + if self._sample_rate <= 0.0: + return False + return random.random() < self._sample_rate + + def _cost_props( + self, model_deployment_name: Optional[str], usage: TokenUsage + ) -> dict[str, str]: + """Return ``{'estimated_cost_usd': '...'}`` when pricing is configured + for the given model, else ``{}``. 6-decimal formatting.""" + if not self._pricing or not model_deployment_name: + return {} + rate = self._pricing.get(model_deployment_name.lower()) + if not rate: + return {} + inp_rate, out_rate = rate + cost = (usage.input_tokens * inp_rate + usage.output_tokens * out_rate) / 1000.0 + return {"estimated_cost_usd": f"{cost:.6f}"} + + def _summary_cost_props( + self, + primary_model: Optional[str], + additional_agents: Mapping[str, str], + usage: TokenUsage, + ) -> dict[str, str]: + """Best-effort cost for the summary event: charge full usage at the + primary model's rate (the SDK aggregates sub-agent tokens to the + orchestrator, so apportioning is not possible without per-agent + usage). Falls back to silent skip when no rate is known.""" + if primary_model: + cost = self._cost_props(primary_model, usage) + if cost: + return cost + for m in additional_agents.values(): + cost = self._cost_props(m, usage) + if cost: + return cost + return {} + + def emit(self, event_name: str, **dimensions: Any) -> None: + """Low-level: emit an event with arbitrary properties. + + Non-string values are stringified. ``None`` values are dropped. Any + ``user_id`` value is passed through the configured hasher. + Never raises. + """ + props = dict(self._static) # cheap shallow copy of pre-stringified dims + for k, v in dimensions.items(): + if v is None: + continue + if k == "user_id": + v = self._apply_user_id_hash(v) + if v is None or v == "": + continue + props[k] = v if isinstance(v, str) else str(v) + + if not self.enabled: + self._log.debug( + "App Insights not configured -- skipping event %s (%s)", + event_name, props, + ) + return + try: + self._sink(event_name, props) # type: ignore[misc] + except Exception as exc: # never break the caller + self._log.warning("track_event(%s) failed: %s", event_name, exc) + + # -- typed convenience emitters -------------------------------------- + def emit_agent( + self, + *, + agent_name: str, + model_deployment_name: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + if not usage.has_any or not self._should_sample(): + return + self.emit( + EVENT_AGENT, + agent_name=agent_name, + model_deployment_name=model_deployment_name, + **usage.to_event_props(), + **self._cost_props(model_deployment_name, usage), + **dimensions, + ) + + def emit_model( + self, + *, + model_deployment_name: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + if not usage.has_any or not self._should_sample(): + return + self.emit( + EVENT_MODEL, + model_deployment_name=model_deployment_name, + **usage.to_event_props(), + **self._cost_props(model_deployment_name, usage), + **dimensions, + ) + + def emit_user( + self, + *, + user_id: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + if not usage.has_any or not user_id or not self._should_sample(): + return + self.emit( + EVENT_USER, + user_id=user_id, + **usage.to_event_props(), + **dimensions, + ) + + def emit_team( + self, + *, + team_name: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + if not usage.has_any or not team_name or not self._should_sample(): + return + self.emit( + EVENT_TEAM, + team_name=team_name, + **usage.to_event_props(), + **dimensions, + ) + + def emit_summary( + self, + *, + usage: TokenUsage, + agent_count: int = 1, + model_count: int = 1, + primary_model: Optional[str] = None, + additional_agents: Optional[Mapping[str, str]] = None, + **dimensions: Any, + ) -> None: + """The summary event always fires (ignores ``sample_rate``) so per- + request totals remain accurate even when high-cardinality events are + sampled.""" + if not usage.has_any: + return + # Summary historically uses ``total_input_tokens`` / ``total_output_tokens`` + # field names; preserve that wire format for backward compatibility. + props = { + "total_input_tokens": str(usage.input_tokens), + "total_output_tokens": str(usage.output_tokens), + "total_tokens": str(usage.total_tokens), + "agent_count": str(agent_count), + "model_count": str(model_count), + "sample_rate": f"{self._sample_rate:.4f}", + } + # Carry over realtime sub-counts if present. + for k, v in usage.to_event_props().items(): + props.setdefault(k, v) + # Optional total cost. + props.update(self._summary_cost_props(primary_model, additional_agents or {}, usage)) + self.emit(EVENT_SUMMARY, **props, **dimensions) + + def emit_speech( + self, + *, + model_deployment_name: str, + source: str, + usage: TokenUsage, + **dimensions: Any, + ) -> None: + """Voice-Live / realtime speech usage event.""" + if not self._should_sample(): + return + self.emit( + EVENT_SPEECH, + model_deployment_name=model_deployment_name, + source=source, + **usage.to_event_props(), + **self._cost_props(model_deployment_name, usage), + **dimensions, + ) + + # -- combined emit: summary + agent + per-distinct-model --------------- + def emit_all( + self, + *, + agent_name: str, + model_deployment_name: str, + usage: TokenUsage, + additional_agents: Optional[Mapping[str, str]] = None, + emit_user_event: bool = False, + emit_team_event: bool = False, + **dimensions: Any, + ) -> None: + """Convenience: emit summary, agent, and one model event per distinct + model deployment in one shot. + + ``additional_agents`` maps sub-agent name -> its model deployment name + so callers can describe orchestrators that involve multiple agents. + + ``emit_user_event`` / ``emit_team_event`` opt in to the user/team + events; ``user_id`` / ``team_name`` must be present in dimensions for + those to fire. + """ + if not usage.has_any: + return + + agents = {agent_name: model_deployment_name} + if additional_agents: + agents.update({k: v for k, v in additional_agents.items() if k}) + models = {m for m in agents.values() if m} + + self.emit_summary( + usage=usage, + agent_count=len(agents), + model_count=len(models) or 1, + primary_model=model_deployment_name, + additional_agents=additional_agents, + **dimensions, + ) + self.emit_agent( + agent_name=agent_name, + model_deployment_name=model_deployment_name, + usage=usage, + **dimensions, + ) + for model in models: + self.emit_model( + model_deployment_name=model, + usage=usage, + **dimensions, + ) + if emit_user_event and dimensions.get("user_id"): + self.emit_user( + user_id=str(dimensions["user_id"]), + usage=usage, + agent_name=agent_name, + model_deployment_name=model_deployment_name, + ) + if emit_team_event and dimensions.get("team_name"): + self.emit_team( + team_name=str(dimensions["team_name"]), + usage=usage, + agent_name=agent_name, + model_deployment_name=model_deployment_name, + ) + + self._log.info( + "[TOKEN USAGE] agent=%s model=%s input=%d output=%d total=%d %s", + agent_name, + model_deployment_name, + usage.input_tokens, + usage.output_tokens, + usage.total_tokens, + " ".join(f"{k}={v}" for k, v in dimensions.items() if v), + ) + + +# --------------------------------------------------------------------------- +# Scope / decorator sugar +# --------------------------------------------------------------------------- +@dataclass +class TokenUsageScope(AbstractContextManager): + """Accumulate usage across multiple results, then emit on exit. + + Example:: + + with TokenUsageScope(emitter, + agent_name="chat", + model_deployment_name=cfg.model, + user_id=user_id) as scope: + result = await agent.run(prompt) + scope.add(result) # extracts and accumulates + """ + + emitter: TokenUsageEmitter + agent_name: str + model_deployment_name: str + dimensions: dict[str, Any] = field(default_factory=dict) + additional_agents: dict[str, str] = field(default_factory=dict) + emit_user_event: bool = False + emit_team_event: bool = False + usage: TokenUsage = field(default_factory=TokenUsage) + + def __init__( + self, + emitter: TokenUsageEmitter, + *, + agent_name: str, + model_deployment_name: str, + additional_agents: Optional[Mapping[str, str]] = None, + emit_user_event: bool = False, + emit_team_event: bool = False, + **dimensions: Any, + ) -> None: + self.emitter = emitter + self.agent_name = agent_name + self.model_deployment_name = model_deployment_name + self.additional_agents = dict(additional_agents or {}) + self.emit_user_event = emit_user_event + self.emit_team_event = emit_team_event + self.dimensions = dict(dimensions) + self.usage = TokenUsage() + + # -- accumulation ----------------------------------------------------- + def add(self, source: Any) -> Optional[TokenUsage]: + """Extract usage from any supported shape and add to the running total. + + Never raises -- extraction failures return ``None`` and are logged + at DEBUG. + """ + try: + found = extract_usage(source) or extract_usage_from_stream_chunk(source) + except Exception as exc: # belt + braces; extractors are already safe + logger.debug("TokenUsageScope.add failed: %s", exc, exc_info=True) + return None + if found: + self.usage = self.usage + found + return found + + def add_usage(self, usage: TokenUsage) -> None: + self.usage = self.usage + usage + + def add_chunks(self, chunks: Iterable[Any]) -> None: + for c in chunks: + self.add(c) + + # -- context manager -------------------------------------------------- + def __exit__(self, exc_type, exc, tb) -> None: + # Always emit (best-effort) regardless of exception status. + try: + self.emitter.emit_all( + agent_name=self.agent_name, + model_deployment_name=self.model_deployment_name, + usage=self.usage, + additional_agents=self.additional_agents, + emit_user_event=self.emit_user_event, + emit_team_event=self.emit_team_event, + **self.dimensions, + ) + except Exception as emit_exc: # pragma: no cover - belt + braces + logger.warning("TokenUsageScope emit failed: %s", emit_exc) + return None # do not suppress exceptions + + +def track_tokens( + emitter: TokenUsageEmitter, + *, + agent_name: str, + model_deployment_name: str, + dimension_args: Optional[Mapping[str, str]] = None, + additional_agents: Optional[Mapping[str, str]] = None, + emit_user_event: bool = False, + emit_team_event: bool = False, +): + """Decorator: wrap an async or sync function that returns an LLM result. + + ``dimension_args`` maps emitted-property-name -> callable-keyword-argument + name so per-call values (e.g. ``user_id``) are forwarded to the event. + + Example:: + + @track_tokens(emitter, + agent_name="chat", + model_deployment_name=settings.model, + dimension_args={"user_id": "user_id", + "session_id": "session_id"}) + async def run_chat(prompt, *, user_id, session_id): ... + """ + + dim_args = dict(dimension_args or {}) + + def _decorator(fn: Callable[..., Any]): + is_coro = _is_coroutine_function(fn) + + if is_coro: + @functools.wraps(fn) + async def _aw(*args, **kwargs) -> Any: + with _scope_for(kwargs) as scope: + result = await fn(*args, **kwargs) + scope.add(result) + return result + return _aw + + @functools.wraps(fn) + def _sw(*args, **kwargs) -> Any: + with _scope_for(kwargs) as scope: + result = fn(*args, **kwargs) + scope.add(result) + return result + return _sw + + def _scope_for(call_kwargs: Mapping[str, Any]) -> TokenUsageScope: + dimensions = { + prop: call_kwargs.get(kw) + for prop, kw in dim_args.items() + if call_kwargs.get(kw) is not None + } + return TokenUsageScope( + emitter, + agent_name=agent_name, + model_deployment_name=model_deployment_name, + additional_agents=additional_agents, + emit_user_event=emit_user_event, + emit_team_event=emit_team_event, + **dimensions, + ) + + return _decorator + + +def _is_coroutine_function(fn: Callable[..., Any]) -> bool: + return asyncio.iscoroutinefunction(fn) + + +__all__ = [ + "EVENT_SUMMARY", + "EVENT_AGENT", + "EVENT_MODEL", + "EVENT_USER", + "EVENT_TEAM", + "EVENT_SPEECH", + "TokenUsage", + "TokenUsageEmitter", + "TokenUsageScope", + "track_tokens", + "extract_usage", + "extract_usage_from_dict", + "extract_usage_from_stream_chunk", + "extract_realtime_usage", + "detect_invoked_tools", +] diff --git a/src/processor/src/utils/token_usage_tracker.py b/src/processor/src/utils/token_usage_tracker.py index 25e5e0b6..0799058a 100644 --- a/src/processor/src/utils/token_usage_tracker.py +++ b/src/processor/src/utils/token_usage_tracker.py @@ -8,20 +8,30 @@ - Per user/process - Per model deployment -Usage data is emitted to Application Insights as custom events and can be -persisted to Cosmos DB via the TelemetryManager. +ses the cross-accelerator ``llm_token_telemetry`` module for extraction +and emission, keeping this tracker as a thin orchestration-specific layer +that adds thread-safe aggregation and per-step tracking. """ from __future__ import annotations import logging import threading -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Any -from utils.event_utils import track_event_if_configured +from utils.llm_token_telemetry import ( + TokenUsage, + TokenUsageEmitter, + extract_usage, + extract_usage_from_dict, + extract_usage_from_stream_chunk, +) logger = logging.getLogger(__name__) +# Module-level emitter instance shared across the processor service. +_emitter = TokenUsageEmitter() + @dataclass class TokenUsageRecord: @@ -60,8 +70,8 @@ class TokenUsageTracker: """Thread-safe tracker that aggregates LLM token usage across multiple dimensions. Accumulates usage per agent, per step (team), per model, and overall per process. - Emits Application Insights custom events for each recorded interaction and - provides summary emission at process completion. + Emits Application Insights custom events via ``TokenUsageEmitter`` from the + cross-accelerator ``llm_token_telemetry`` module. """ def __init__(self, process_id: str, user_id: str = ""): @@ -95,8 +105,8 @@ def record( ) -> None: """Record a single LLM call's token usage. - Accumulates into all relevant dimensions and emits a per-call - Application Insights event. + Accumulates into all relevant dimensions and emits per-call + Application Insights events via ``TokenUsageEmitter``. """ if total_tokens <= 0 and input_tokens <= 0 and output_tokens <= 0: return @@ -132,23 +142,20 @@ def record( self._by_model[model_deployment_name] = AggregatedTokenUsage() self._by_model[model_deployment_name].add(record) - # Emit per-call event to Application Insights - try: - track_event_if_configured( - "LLM_Token_Usage", - { - "process_id": self.process_id, - "user_id": self.user_id, - "agent_name": agent_name, - "step_name": step_name, - "model_deployment_name": model_deployment_name, - "input_tokens": str(input_tokens), - "output_tokens": str(output_tokens), - "total_tokens": str(total_tokens), - }, - ) - except Exception: - logger.debug("Failed to emit per-call token usage event", exc_info=True) + # Emit per-call events via the cross-accelerator emitter + usage = TokenUsage( + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=total_tokens, + ) + _emitter.emit_all( + agent_name=agent_name or "unknown", + model_deployment_name=model_deployment_name or "unknown", + usage=usage, + process_id=self.process_id, + user_id=self.user_id, + step_name=step_name, + ) logger.info( "[TOKEN] Recorded: agent=%s step=%s model=%s input=%d output=%d total=%d | cumulative=%d", @@ -177,72 +184,75 @@ def emit_summary_events(self) -> None: """Emit summary-level Application Insights custom events. Call this at the end of a process/workflow to produce aggregated events - that are easy to query in KQL. + that are easy to query in KQL. Uses ``TokenUsageEmitter`` from + ``llm_token_telemetry`` for all event emission. """ summary = self.get_summary() try: + total = summary["total"] + total_usage = TokenUsage( + input_tokens=total["input_tokens"], + output_tokens=total["output_tokens"], + total_tokens=total["total_tokens"], + ) + # Overall summary - track_event_if_configured( - "LLM_Token_Usage_Summary", - { - "process_id": self.process_id, - "user_id": self.user_id, - "total_input_tokens": str(summary["total"]["input_tokens"]), - "total_output_tokens": str(summary["total"]["output_tokens"]), - "total_tokens": str(summary["total"]["total_tokens"]), - "total_calls": str(summary["total"]["call_count"]), - "agent_count": str(len(summary["by_agent"])), - "model_count": str(len(summary["by_model"])), - "step_count": str(len(summary["by_step"])), - }, + _emitter.emit_summary( + usage=total_usage, + agent_count=len(summary["by_agent"]), + model_count=len(summary["by_model"]), + process_id=self.process_id, + user_id=self.user_id, + total_calls=str(total["call_count"]), + step_count=str(len(summary["by_step"])), ) # Per-agent events - for agent_name, usage in summary["by_agent"].items(): + for agent_name, usage_dict in summary["by_agent"].items(): model = self._agent_model_map.get(agent_name, "") - track_event_if_configured( - "LLM_Agent_Token_Usage", - { - "process_id": self.process_id, - "user_id": self.user_id, - "agent_name": agent_name, - "model_deployment_name": model, - "input_tokens": str(usage["input_tokens"]), - "output_tokens": str(usage["output_tokens"]), - "total_tokens": str(usage["total_tokens"]), - "call_count": str(usage["call_count"]), - }, + agent_usage = TokenUsage( + input_tokens=usage_dict["input_tokens"], + output_tokens=usage_dict["output_tokens"], + total_tokens=usage_dict["total_tokens"], + ) + _emitter.emit_agent( + agent_name=agent_name, + model_deployment_name=model, + usage=agent_usage, + process_id=self.process_id, + user_id=self.user_id, + call_count=str(usage_dict["call_count"]), ) # Per-model events - for model_name, usage in summary["by_model"].items(): - track_event_if_configured( - "LLM_Model_Token_Usage", - { - "process_id": self.process_id, - "user_id": self.user_id, - "model_deployment_name": model_name, - "input_tokens": str(usage["input_tokens"]), - "output_tokens": str(usage["output_tokens"]), - "total_tokens": str(usage["total_tokens"]), - "call_count": str(usage["call_count"]), - }, + for model_name, usage_dict in summary["by_model"].items(): + model_usage = TokenUsage( + input_tokens=usage_dict["input_tokens"], + output_tokens=usage_dict["output_tokens"], + total_tokens=usage_dict["total_tokens"], + ) + _emitter.emit_model( + model_deployment_name=model_name, + usage=model_usage, + process_id=self.process_id, + user_id=self.user_id, + call_count=str(usage_dict["call_count"]), ) # Per-step (team) events - for step_name, usage in summary["by_step"].items(): - track_event_if_configured( - "LLM_Step_Token_Usage", - { - "process_id": self.process_id, - "user_id": self.user_id, - "step_name": step_name, - "input_tokens": str(usage["input_tokens"]), - "output_tokens": str(usage["output_tokens"]), - "total_tokens": str(usage["total_tokens"]), - "call_count": str(usage["call_count"]), - }, + for step_name, usage_dict in summary["by_step"].items(): + step_usage = TokenUsage( + input_tokens=usage_dict["input_tokens"], + output_tokens=usage_dict["output_tokens"], + total_tokens=usage_dict["total_tokens"], + ) + _emitter.emit_team( + team_name=step_name, + usage=step_usage, + process_id=self.process_id, + user_id=self.user_id, + call_count=str(usage_dict["call_count"]), ) logger.info( @@ -259,114 +269,29 @@ def emit_summary_events(self) -> None: def extract_usage_from_response(response: Any) -> TokenUsageRecord | None: """Extract token usage from an agent_framework or OpenAI SDK response object. - Handles multiple response shapes: - 1. response.usage (OpenAI SDK ChatCompletion) - 2. response.usage_details (agent_framework Content objects) - 3. response dict with usage keys - 4. AgentResponseUpdate with contents containing usage + Delegates to ``llm_token_telemetry.extract_usage()`` and converts the + result to a ``TokenUsageRecord`` for backward compatibility. """ - if response is None: + usage = extract_usage(response) + if usage is None: return None - - # 1. Direct .usage attribute (OpenAI ChatCompletion, Responses API) - usage = getattr(response, "usage", None) - if usage is not None: - record = _parse_usage_object(usage) - if record: - return record - - # 2. .usage_details or .details attribute - usage_details = getattr(response, "details", None) or getattr(response, "usage_details", None) - if usage_details is not None: - record = _parse_usage_object(usage_details) - if record: - return record - - # 3. raw_representation with usage - raw = getattr(response, "raw_representation", None) - if raw is not None: - raw_usage = getattr(raw, "usage", None) - if raw_usage is not None: - record = _parse_usage_object(raw_usage) - if record: - return record - if isinstance(raw, dict) and "usage" in raw: - record = _parse_usage_object(raw["usage"]) - if record: - return record - - # 4. contents list with usage items (AgentResponseUpdate) - contents = getattr(response, "contents", None) - if contents: - for item in contents: - # Try usage-typed content items first, then any item with details - ud = None - if getattr(item, "type", None) == "usage": - ud = getattr(item, "details", None) or getattr(item, "usage_details", None) - if ud is None: - ud = getattr(item, "details", None) or getattr(item, "usage_details", None) - if ud is not None: - record = _parse_usage_object(ud) - if record: - return record - # Dict content item - if isinstance(item, dict): - for key in ("details", "usage_details"): - if key in item: - record = _parse_usage_object(item[key]) - if record: - return record - if "input_token_count" in item or "total_token_count" in item: - record = _parse_usage_object(item) - if record: - return record - - # 5. additional_properties - addl = getattr(response, "additional_properties", None) - if isinstance(addl, dict) and "usage" in addl: - record = _parse_usage_object(addl["usage"]) - if record: - return record - - # 6. Dict response - if isinstance(response, dict): - if "usage" in response: - record = _parse_usage_object(response["usage"]) - if record: - return record - record = _parse_usage_object(response) - if record: - return record - - return None - - -def _get_field(obj: Any, *names: str) -> int: - """Read the first non-zero value from *obj* for the given field names. - - Works uniformly for dicts (via ``get``) and objects (via ``getattr``). - """ - getter = obj.get if isinstance(obj, dict) else lambda k, d=0: getattr(obj, k, d) - for name in names: - val = getter(name, 0) - if val: - return int(val) - return 0 + return TokenUsageRecord( + input_tokens=usage.input_tokens, + output_tokens=usage.output_tokens, + total_tokens=usage.total_tokens, + ) def _parse_usage_object(usage: Any) -> TokenUsageRecord | None: - """Parse a usage object (dict or object with attrs) into a TokenUsageRecord.""" - if usage is None: - return None - - inp = _get_field(usage, "input_token_count", "prompt_tokens", "input_tokens") - out = _get_field(usage, "output_token_count", "completion_tokens", "output_tokens") - tot = _get_field(usage, "total_token_count", "total_tokens") or (inp + out) + """Parse a usage object (dict or object with attrs) into a TokenUsageRecord. - if tot > 0 or inp > 0 or out > 0: - return TokenUsageRecord( - input_tokens=inp, - output_tokens=out, - total_tokens=tot if tot > 0 else inp + out, - ) - return None + Delegates to ``llm_token_telemetry.extract_usage_from_dict()``. + """ + result = extract_usage_from_dict(usage) + if result is None: + return None + return TokenUsageRecord( + input_tokens=result.input_tokens, + output_tokens=result.output_tokens, + total_tokens=result.total_tokens, + ) From 73ef8a080626c93d086209e1b1ab5b05273fe476 Mon Sep 17 00:00:00 2001 From: Priyanka-Microsoft Date: Mon, 1 Jun 2026 13:50:53 +0530 Subject: [PATCH 4/5] refactor: update llm_token_telemetry.py to latest cross-accelerator version and remove deployment/workbook changes - Updated llm_token_telemetry.py (processor + backend-api) to match Ajit's latest from customer-chatbot PR #236 (adds perf counters, emit timing, slow-emit warnings, batch overhead tracking) - Removed workbook/dashboard files (deploy-workbooks.ps1, KQL queries, workbook JSON files, tokenUsageWorkbook.bicep) - Reverted infra changes (main.bicep, main_custom.bicep, main.parameters.json) - Reverted Dockerfile changes (frontend, processor) - Removed test file (test_llm_token_telemetry.py) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- infra/dashboards/deploy-workbooks.ps1 | 86 --- infra/dashboards/token-usage-queries.kql | 113 ---- infra/dashboards/workbook-eks-content.json | 1 - infra/dashboards/workbook-gke-content.json | 1 - infra/main.bicep | 12 +- infra/main.parameters.json | 23 +- infra/main_custom.bicep | 12 +- infra/modules/tokenUsageWorkbook.bicep | 458 -------------- .../app/libs/logging/llm_token_telemetry.py | 156 ++++- src/frontend/Dockerfile | 1 - src/processor/Dockerfile | 3 +- .../src/tests/test_llm_token_telemetry.py | 572 ------------------ .../src/utils/llm_token_telemetry.py | 156 ++++- 13 files changed, 265 insertions(+), 1329 deletions(-) delete mode 100644 infra/dashboards/deploy-workbooks.ps1 delete mode 100644 infra/dashboards/token-usage-queries.kql delete mode 100644 infra/dashboards/workbook-eks-content.json delete mode 100644 infra/dashboards/workbook-gke-content.json delete mode 100644 infra/modules/tokenUsageWorkbook.bicep delete mode 100644 src/processor/src/tests/test_llm_token_telemetry.py diff --git a/infra/dashboards/deploy-workbooks.ps1 b/infra/dashboards/deploy-workbooks.ps1 deleted file mode 100644 index caa88198..00000000 --- a/infra/dashboards/deploy-workbooks.ps1 +++ /dev/null @@ -1,86 +0,0 @@ -# ============================================================= -# LLM Token Usage Workbook Deployment Script -# ============================================================= -# Usage: -# .\deploy-workbooks.ps1 -ResourceGroup -AppInsightsResourceId [-Location ] -# -# Example: -# .\deploy-workbooks.ps1 ` -# -ResourceGroup "rg-my-permanent-rg" ` -# -AppInsightsResourceId "/subscriptions//resourcegroups//providers/microsoft.insights/components/" ` -# -Location "australiaeast" -# ============================================================= - -param( - [Parameter(Mandatory=$true)] - [string]$ResourceGroup, - - [Parameter(Mandatory=$true)] - [string]$AppInsightsResourceId, - - [string]$Location = "australiaeast" -) - -$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path - -# Deploy GKE workbook -$gkeContent = Get-Content "$scriptDir\workbook-gke-content.json" -Raw -$gkeId = [guid]::NewGuid().ToString() - -$body = @{ - location = $Location - kind = "shared" - properties = @{ - displayName = "LLM Token Usage Dashboard - GKE" - serializedData = $gkeContent - version = "Notebook/1.0" - sourceId = $AppInsightsResourceId - category = "workbook" - } - tags = @{ - "hidden-title" = "LLM Token Usage Dashboard - GKE" - } -} | ConvertTo-Json -Depth 5 - -$bodyFile = [System.IO.Path]::GetTempFileName() -$body | Set-Content $bodyFile -Encoding UTF8 - -az rest --method PUT ` - --url "https://management.azure.com/subscriptions/$(az account show --query id -o tsv)/resourceGroups/$ResourceGroup/providers/microsoft.insights/workbooks/$($gkeId)?api-version=2022-04-01" ` - --body "@$bodyFile" ` - --headers "Content-Type=application/json" 2>&1 | Out-Null - -Write-Host "Deployed GKE workbook: $gkeId" -Remove-Item $bodyFile - -# Deploy EKS workbook -$eksContent = Get-Content "$scriptDir\workbook-eks-content.json" -Raw -$eksId = [guid]::NewGuid().ToString() - -$body = @{ - location = $Location - kind = "shared" - properties = @{ - displayName = "LLM Token Usage Dashboard - EKS" - serializedData = $eksContent - version = "Notebook/1.0" - sourceId = $AppInsightsResourceId - category = "workbook" - } - tags = @{ - "hidden-title" = "LLM Token Usage Dashboard - EKS" - } -} | ConvertTo-Json -Depth 5 - -$bodyFile = [System.IO.Path]::GetTempFileName() -$body | Set-Content $bodyFile -Encoding UTF8 - -az rest --method PUT ` - --url "https://management.azure.com/subscriptions/$(az account show --query id -o tsv)/resourceGroups/$ResourceGroup/providers/microsoft.insights/workbooks/$($eksId)?api-version=2022-04-01" ` - --body "@$bodyFile" ` - --headers "Content-Type=application/json" 2>&1 | Out-Null - -Write-Host "Deployed EKS workbook: $eksId" -Remove-Item $bodyFile - -Write-Host "`nDone! Both workbooks deployed to $ResourceGroup" diff --git a/infra/dashboards/token-usage-queries.kql b/infra/dashboards/token-usage-queries.kql deleted file mode 100644 index 38a0fc19..00000000 --- a/infra/dashboards/token-usage-queries.kql +++ /dev/null @@ -1,113 +0,0 @@ -// ============================================================================= -// LLM Token Usage Dashboard Queries for Application Insights -// ============================================================================= -// These KQL queries can be used in Azure Application Insights / Log Analytics -// to visualize token usage across agents, models, steps, and users. -// ============================================================================= - -// ---- 1. Overall Token Usage Summary (last 24h) ---- -customEvents -| where name == "LLM_Token_Usage_Summary" -| where timestamp > ago(24h) -| extend process_id = tostring(customDimensions.process_id), - total_input = toint(customDimensions.total_input_tokens), - total_output = toint(customDimensions.total_output_tokens), - total = toint(customDimensions.total_tokens), - call_count = toint(customDimensions.total_calls) -| project timestamp, process_id, total_input, total_output, total, call_count -| order by timestamp desc - -// ---- 2. Per-Agent Token Usage ---- -customEvents -| where name == "LLM_Agent_Token_Usage" -| where timestamp > ago(24h) -| extend agent_name = tostring(customDimensions.agent_name), - input_tokens = toint(customDimensions.input_tokens), - output_tokens = toint(customDimensions.output_tokens), - total_tokens = toint(customDimensions.total_tokens), - calls = toint(customDimensions.call_count), - process_id = tostring(customDimensions.process_id) -| summarize total_input = sum(input_tokens), - total_output = sum(output_tokens), - total = sum(total_tokens), - total_calls = sum(calls) - by agent_name -| order by total desc - -// ---- 3. Per-Model Token Usage ---- -customEvents -| where name == "LLM_Model_Token_Usage" -| where timestamp > ago(24h) -| extend model_name = tostring(customDimensions.model_deployment_name), - input_tokens = toint(customDimensions.input_tokens), - output_tokens = toint(customDimensions.output_tokens), - total_tokens = toint(customDimensions.total_tokens), - calls = toint(customDimensions.call_count), - process_id = tostring(customDimensions.process_id) -| summarize total_input = sum(input_tokens), - total_output = sum(output_tokens), - total = sum(total_tokens), - total_calls = sum(calls) - by model_name -| order by total desc - -// ---- 4. Per-Step (Team) Token Usage ---- -customEvents -| where name == "LLM_Step_Token_Usage" -| where timestamp > ago(24h) -| extend step_name = tostring(customDimensions.step_name), - input_tokens = toint(customDimensions.input_tokens), - output_tokens = toint(customDimensions.output_tokens), - total_tokens = toint(customDimensions.total_tokens), - calls = toint(customDimensions.call_count), - process_id = tostring(customDimensions.process_id) -| summarize total_input = sum(input_tokens), - total_output = sum(output_tokens), - total = sum(total_tokens), - total_calls = sum(calls) - by step_name -| order by total desc - -// ---- 5. Per-User Token Usage (requires user_id in process telemetry) ---- -customEvents -| where name == "LLM_Token_Usage_Summary" -| where timestamp > ago(24h) -| extend process_id = tostring(customDimensions.process_id), - total_tokens = toint(customDimensions.total_tokens), - user_id = tostring(customDimensions.user_id) -| summarize total = sum(total_tokens), runs = count() by user_id -| order by total desc - -// ---- 6. Individual LLM Call Log ---- -customEvents -| where name == "LLM_Token_Usage" -| where timestamp > ago(24h) -| extend agent_name = tostring(customDimensions.agent_name), - step_name = tostring(customDimensions.step_name), - model = tostring(customDimensions.model_deployment_name), - input_tokens = toint(customDimensions.input_tokens), - output_tokens = toint(customDimensions.output_tokens), - total_tokens = toint(customDimensions.total_tokens), - process_id = tostring(customDimensions.process_id) -| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens -| order by timestamp desc - -// ---- 7. Hourly Token Usage Trend ---- -customEvents -| where name == "LLM_Token_Usage" -| where timestamp > ago(7d) -| extend total_tokens = toint(customDimensions.total_tokens) -| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h) -| order by timestamp asc -| render timechart - -// ---- 8. Estimated Cost (GPT-4o pricing: $2.50/1M input, $10/1M output) ---- -customEvents -| where name == "LLM_Token_Usage_Summary" -| where timestamp > ago(24h) -| extend process_id = tostring(customDimensions.process_id), - input_tokens = toint(customDimensions.total_input_tokens), - output_tokens = toint(customDimensions.total_output_tokens) -| extend estimated_cost_usd = (input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0) -| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd -| order by estimated_cost_usd desc diff --git a/infra/dashboards/workbook-eks-content.json b/infra/dashboards/workbook-eks-content.json deleted file mode 100644 index 04433e99..00000000 --- a/infra/dashboards/workbook-eks-content.json +++ /dev/null @@ -1 +0,0 @@ -{"version":"Notebook/1.0","items":[{"type":1,"content":{"json":"# LLM Token Usage Dashboard\n\nThis workbook provides comprehensive visibility into LLM token consumption across agents, models, workflow steps, and users.\n\n---"},"name":"header"},{"type":9,"content":{"version":"KqlParameterItem/1.0","parameters":[{"id":"time-range-param","version":"KqlParameterItem/1.0","name":"TimeRange","type":4,"isRequired":true,"value":{"durationMs":1800000,"endTime":"2026-05-21T06:50:00.000Z"},"typeSettings":{"selectableValues":[{"durationMs":3600000},{"durationMs":14400000},{"durationMs":86400000},{"durationMs":259200000},{"durationMs":604800000},{"durationMs":2592000000}],"allowCustom":true},"label":"Time Range"}],"style":"pills","queryType":0,"resourceType":"microsoft.insights/components"},"name":"parameters"},{"type":1,"content":{"json":"## Overall Token Usage Summary"},"name":"summary-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| summarize \n total_input = sum(toint(customDimensions.total_input_tokens)),\n total_output = sum(toint(customDimensions.total_output_tokens)),\n total = sum(toint(customDimensions.total_tokens)),\n total_calls = sum(toint(customDimensions.total_calls)),\n processes = dcount(tostring(customDimensions.process_id))","size":4,"title":"Token Usage Totals","queryType":0,"resourceType":"microsoft.insights/components","visualization":"tiles","tileSettings":{"titleContent":{"columnMatch":"Column1","formatter":1},"leftContent":{"columnMatch":"total","formatter":12,"formatOptions":{"palette":"auto"},"numberFormat":{"unit":0,"options":{"style":"decimal","maximumFractionDigits":0}}},"showBorder":true}},"name":"summary-tiles"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_input = toint(customDimensions.total_input_tokens),\n total_output = toint(customDimensions.total_output_tokens),\n total = toint(customDimensions.total_tokens),\n call_count = toint(customDimensions.total_calls)\n| project timestamp, process_id, total_input, total_output, total, call_count\n| order by timestamp desc","size":0,"title":"Token Usage by Process","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"summary-table"},{"type":1,"content":{"json":"## Per-Agent Token Usage"},"name":"agent-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by agent_name\n| order by total desc","size":0,"title":"Token Consumption by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"customWidth":"50","name":"agent-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by agent_name\n| order by total desc","size":0,"title":"Token Distribution by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"agent-chart"},{"type":1,"content":{"json":"## Per-Model Token Usage"},"name":"model-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by model_name\n| order by total desc","size":0,"title":"Token Consumption by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"green"}}]}},"customWidth":"50","name":"model-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by model_name\n| order by total desc","size":0,"title":"Token Distribution by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"model-chart"},{"type":1,"content":{"json":"## Per-Step (Team) Token Usage"},"name":"step-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Token Consumption by Workflow Step","queryType":0,"resourceType":"microsoft.insights/components","visualization":"barchart","chartSettings":{"xAxis":"step_name","yAxis":"total","group":"step_name"}},"customWidth":"50","name":"step-chart"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Step Usage Details","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"orange"}}]}},"customWidth":"50","name":"step-table"},{"type":1,"content":{"json":"## Per-User Token Usage"},"name":"user-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_tokens = toint(customDimensions.total_tokens),\n user_id = tostring(customDimensions.user_id)\n| summarize total = sum(total_tokens), runs = count() by user_id\n| order by total desc","size":0,"title":"Token Usage by User","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"purple"}}]}},"name":"user-table"},{"type":1,"content":{"json":"## Token Usage Trends"},"name":"trend-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend total_tokens = toint(customDimensions.total_tokens)\n| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h)\n| order by timestamp asc","size":0,"title":"Hourly Token Consumption","queryType":0,"resourceType":"microsoft.insights/components","visualization":"linechart","chartSettings":{"xAxis":"timestamp","yAxis":"hourly_tokens","showLegend":true}},"name":"trend-chart"},{"type":1,"content":{"json":"## Estimated Cost\n\n> Cost estimates use GPT-4o pricing: **$2.50 / 1M input tokens**, **$10.00 / 1M output tokens**. Adjust for your actual model pricing."},"name":"cost-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n input_tokens = toint(customDimensions.total_input_tokens),\n output_tokens = toint(customDimensions.total_output_tokens)\n| extend estimated_cost_usd = round((input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0), 4)\n| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd\n| order by estimated_cost_usd desc","size":0,"title":"Estimated Cost per Process (USD)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"estimated_cost_usd","formatter":3,"formatOptions":{"palette":"redBright"}}]}},"name":"cost-table"},{"type":1,"content":{"json":"## Individual LLM Call Log"},"name":"calls-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n step_name = tostring(customDimensions.step_name),\n model = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n process_id = tostring(customDimensions.process_id)\n| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens\n| order by timestamp desc\n| take 200","size":0,"title":"Recent LLM Calls (last 200)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total_tokens","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"calls-table"}],"isLocked":false,"fallbackResourceIds":["/subscriptions/1d5876cd-7603-407a-96d2-ae5ca9a9c5f3/resourcegroups/rg-pricmglogp33/providers/microsoft.insights/components/appi-pricmglogp33usmqm"]} diff --git a/infra/dashboards/workbook-gke-content.json b/infra/dashboards/workbook-gke-content.json deleted file mode 100644 index ad05834c..00000000 --- a/infra/dashboards/workbook-gke-content.json +++ /dev/null @@ -1 +0,0 @@ -{"version":"Notebook/1.0","items":[{"type":1,"content":{"json":"# LLM Token Usage Dashboard\n\nThis workbook provides comprehensive visibility into LLM token consumption across agents, models, workflow steps, and users.\n\n---"},"name":"header"},{"type":9,"content":{"version":"KqlParameterItem/1.0","parameters":[{"id":"time-range-param","version":"KqlParameterItem/1.0","name":"TimeRange","type":4,"isRequired":true,"value":{"durationMs":1500000,"endTime":"2026-05-21T06:08:00.000Z"},"typeSettings":{"selectableValues":[{"durationMs":3600000},{"durationMs":14400000},{"durationMs":86400000},{"durationMs":259200000},{"durationMs":604800000},{"durationMs":2592000000}],"allowCustom":true},"label":"Time Range"}],"style":"pills","queryType":0,"resourceType":"microsoft.insights/components"},"name":"parameters"},{"type":1,"content":{"json":"## Overall Token Usage Summary"},"name":"summary-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| summarize \n total_input = sum(toint(customDimensions.total_input_tokens)),\n total_output = sum(toint(customDimensions.total_output_tokens)),\n total = sum(toint(customDimensions.total_tokens)),\n total_calls = sum(toint(customDimensions.total_calls)),\n processes = dcount(tostring(customDimensions.process_id))","size":4,"title":"Token Usage Totals","queryType":0,"resourceType":"microsoft.insights/components","visualization":"tiles","tileSettings":{"titleContent":{"columnMatch":"Column1","formatter":1},"leftContent":{"columnMatch":"total","formatter":12,"formatOptions":{"palette":"auto"},"numberFormat":{"unit":0,"options":{"style":"decimal","maximumFractionDigits":0}}},"showBorder":true}},"name":"summary-tiles"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_input = toint(customDimensions.total_input_tokens),\n total_output = toint(customDimensions.total_output_tokens),\n total = toint(customDimensions.total_tokens),\n call_count = toint(customDimensions.total_calls)\n| project timestamp, process_id, total_input, total_output, total, call_count\n| order by timestamp desc","size":0,"title":"Token Usage by Process","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"summary-table"},{"type":1,"content":{"json":"## Per-Agent Token Usage"},"name":"agent-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by agent_name\n| order by total desc","size":0,"title":"Token Consumption by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"customWidth":"50","name":"agent-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by agent_name\n| order by total desc","size":0,"title":"Token Distribution by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"agent-chart"},{"type":1,"content":{"json":"## Per-Model Token Usage"},"name":"model-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by model_name\n| order by total desc","size":0,"title":"Token Consumption by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"green"}}]}},"customWidth":"50","name":"model-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by model_name\n| order by total desc","size":0,"title":"Token Distribution by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"model-chart"},{"type":1,"content":{"json":"## Per-Step (Team) Token Usage"},"name":"step-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Token Consumption by Workflow Step","queryType":0,"resourceType":"microsoft.insights/components","visualization":"barchart","chartSettings":{"xAxis":"step_name","yAxis":"total","group":"step_name"}},"customWidth":"50","name":"step-chart"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Step Usage Details","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"orange"}}]}},"customWidth":"50","name":"step-table"},{"type":1,"content":{"json":"## Per-User Token Usage"},"name":"user-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_tokens = toint(customDimensions.total_tokens),\n user_id = tostring(customDimensions.user_id)\n| summarize total = sum(total_tokens), runs = count() by user_id\n| order by total desc","size":0,"title":"Token Usage by User","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"purple"}}]}},"name":"user-table"},{"type":1,"content":{"json":"## Token Usage Trends"},"name":"trend-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend total_tokens = toint(customDimensions.total_tokens)\n| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h)\n| order by timestamp asc","size":0,"title":"Hourly Token Consumption","queryType":0,"resourceType":"microsoft.insights/components","visualization":"linechart","chartSettings":{"xAxis":"timestamp","yAxis":"hourly_tokens","showLegend":true}},"name":"trend-chart"},{"type":1,"content":{"json":"## Estimated Cost\n\n> Cost estimates use GPT-4o pricing: **$2.50 / 1M input tokens**, **$10.00 / 1M output tokens**. Adjust for your actual model pricing."},"name":"cost-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n input_tokens = toint(customDimensions.total_input_tokens),\n output_tokens = toint(customDimensions.total_output_tokens)\n| extend estimated_cost_usd = round((input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0), 4)\n| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd\n| order by estimated_cost_usd desc","size":0,"title":"Estimated Cost per Process (USD)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"estimated_cost_usd","formatter":3,"formatOptions":{"palette":"redBright"}}]}},"name":"cost-table"},{"type":1,"content":{"json":"## Individual LLM Call Log"},"name":"calls-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n step_name = tostring(customDimensions.step_name),\n model = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n process_id = tostring(customDimensions.process_id)\n| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens\n| order by timestamp desc\n| take 200","size":0,"title":"Recent LLM Calls (last 200)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total_tokens","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"calls-table"}],"isLocked":false,"fallbackResourceIds":["/subscriptions/1d5876cd-7603-407a-96d2-ae5ca9a9c5f3/resourcegroups/rg-pricmglogp33/providers/microsoft.insights/components/appi-pricmglogp33usmqm"]} diff --git a/infra/main.bicep b/infra/main.bicep index 3306c57a..904fee03 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -91,7 +91,7 @@ param enableTelemetry bool = true param enablePrivateNetworking bool = false @description('Optional. Enable monitoring applicable resources, aligned with the Well Architected Framework recommendations. This setting enables Application Insights and Log Analytics and configures all the resources applicable resources to send logs. Defaults to false.') -param enableMonitoring bool = true +param enableMonitoring bool = false @description('Optional. Enable scalability for applicable resources, aligned with the Well Architected Framework recommendations. Defaults to false.') param enableScalability bool = false @@ -310,16 +310,6 @@ module applicationInsights 'br/public:avm/res/insights/component:0.6.0' = if (en } } -// ========== LLM Token Usage Workbook ========== // -module tokenUsageWorkbook './modules/tokenUsageWorkbook.bicep' = if (enableMonitoring) { - name: take('module.token-usage-workbook.${solutionSuffix}', 64) - params: { - location: solutionLocation - applicationInsightsResourceId: applicationInsights!.outputs.resourceId - tags: allTags - } -} - // ========== Virtual Network ========== // module virtualNetwork './modules/virtualNetwork.bicep' = if (enablePrivateNetworking) { name: take('module.virtual-network.${solutionSuffix}', 64) diff --git a/infra/main.parameters.json b/infra/main.parameters.json index 6df293a4..b4a1a7cc 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -26,17 +26,11 @@ "gptDeploymentCapacity": { "value": "${AZURE_ENV_GPT_MODEL_CAPACITY}" }, - "enableTelemetry": { - "value": true - }, - "enableMonitoring": { - "value": true - }, - "enablePrivateNetworking": { - "value": false + "existingLogAnalyticsWorkspaceId": { + "value": "${AZURE_ENV_EXISTING_LOG_ANALYTICS_WORKSPACE_RID}" }, - "enableScalability": { - "value": false + "existingFoundryProjectResourceId": { + "value": "${AZURE_EXISTING_AIPROJECT_RESOURCE_ID}" }, "vmAdminUsername": { "value": "${AZURE_ENV_VM_ADMIN_USERNAME}" @@ -44,17 +38,8 @@ "vmAdminPassword": { "value": "${AZURE_ENV_VM_ADMIN_PASSWORD}" }, - "existingLogAnalyticsWorkspaceId": { - "value": "${AZURE_ENV_EXISTING_LOG_ANALYTICS_WORKSPACE_RID}" - }, - "existingFoundryProjectResourceId": { - "value": "${AZURE_EXISTING_AIPROJECT_RESOURCE_ID}" - }, "imageTag": { "value": "${AZURE_ENV_IMAGE_TAG}" - }, - "vmSize": { - "value": "${AZURE_ENV_VM_SIZE}" } } } diff --git a/infra/main_custom.bicep b/infra/main_custom.bicep index 9e106e59..f93b93ab 100644 --- a/infra/main_custom.bicep +++ b/infra/main_custom.bicep @@ -84,7 +84,7 @@ param enableTelemetry bool = true param enablePrivateNetworking bool = false @description('Optional. Enable monitoring applicable resources, aligned with the Well Architected Framework recommendations. This setting enables Application Insights and Log Analytics and configures all the resources applicable resources to send logs. Defaults to false.') -param enableMonitoring bool = true +param enableMonitoring bool = false @description('Optional. Enable scalability for applicable resources, aligned with the Well Architected Framework recommendations. Defaults to false.') param enableScalability bool = false @@ -288,16 +288,6 @@ module applicationInsights 'br/public:avm/res/insights/component:0.6.0' = if (en } } -// ========== LLM Token Usage Workbook ========== // -module tokenUsageWorkbook './modules/tokenUsageWorkbook.bicep' = if (enableMonitoring) { - name: take('module.token-usage-workbook.${solutionSuffix}', 64) - params: { - location: solutionLocation - applicationInsightsResourceId: applicationInsights!.outputs.resourceId - tags: allTags - } -} - // ========== Virtual Network ========== // module virtualNetwork './modules/virtualNetwork.bicep' = if (enablePrivateNetworking) { name: take('module.virtual-network.${solutionSuffix}', 64) diff --git a/infra/modules/tokenUsageWorkbook.bicep b/infra/modules/tokenUsageWorkbook.bicep deleted file mode 100644 index 6531bdda..00000000 --- a/infra/modules/tokenUsageWorkbook.bicep +++ /dev/null @@ -1,458 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -@description('Required. The location for the workbook resource.') -param location string - -@description('Required. The resource ID of the Application Insights instance to query.') -param applicationInsightsResourceId string - -@description('Optional. Tags to apply to the workbook resource.') -param tags object = {} - -@description('Optional. Display name for the workbook.') -param workbookDisplayName string = 'LLM Token Usage Dashboard' - -// Generate a deterministic GUID for the workbook based on resource group and name -var workbookId = guid(resourceGroup().id, 'token-usage-workbook') - -var workbookContent = { - version: 'Notebook/1.0' - items: [ - { - type: 1 - content: { - json: '# LLM Token Usage Dashboard\n\nThis workbook provides comprehensive visibility into LLM token consumption across agents, models, workflow steps, and users.\n\n---' - } - name: 'header' - } - { - type: 9 - content: { - version: 'KqlParameterItem/1.0' - parameters: [ - { - id: 'time-range-param' - version: 'KqlParameterItem/1.0' - name: 'TimeRange' - type: 4 - isRequired: true - value: { - durationMs: 86400000 - } - typeSettings: { - selectableValues: [ - { durationMs: 3600000 } - { durationMs: 14400000 } - { durationMs: 86400000 } - { durationMs: 259200000 } - { durationMs: 604800000 } - { durationMs: 2592000000 } - ] - allowCustom: true - } - label: 'Time Range' - } - ] - style: 'pills' - queryType: 0 - resourceType: 'microsoft.insights/components' - } - name: 'parameters' - } - // ===== Row 1: Summary Tiles ===== - { - type: 1 - content: { - json: '## Overall Token Usage Summary' - } - name: 'summary-header' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Token_Usage_Summary"\n| where timestamp {TimeRange}\n| summarize \n total_input = sum(toint(customDimensions.total_input_tokens)),\n total_output = sum(toint(customDimensions.total_output_tokens)),\n total = sum(toint(customDimensions.total_tokens)),\n total_calls = sum(toint(customDimensions.total_calls)),\n processes = dcount(tostring(customDimensions.process_id))' - size: 4 - title: 'Token Usage Totals' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'tiles' - tileSettings: { - titleContent: { - columnMatch: 'Column1' - formatter: 1 - } - leftContent: { - columnMatch: 'total' - formatter: 12 - formatOptions: { - palette: 'auto' - } - numberFormat: { - unit: 0 - options: { - style: 'decimal' - maximumFractionDigits: 0 - } - } - } - showBorder: true - } - } - name: 'summary-tiles' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Token_Usage_Summary"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_input = toint(customDimensions.total_input_tokens),\n total_output = toint(customDimensions.total_output_tokens),\n total = toint(customDimensions.total_tokens),\n call_count = toint(customDimensions.total_calls)\n| project timestamp, process_id, total_input, total_output, total, call_count\n| order by timestamp desc' - size: 0 - title: 'Token Usage by Process' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'table' - gridSettings: { - formatters: [ - { - columnMatch: 'total' - formatter: 3 - formatOptions: { - palette: 'blue' - } - } - ] - } - } - name: 'summary-table' - } - // ===== Row 2: Per-Agent Token Usage ===== - { - type: 1 - content: { - json: '## Per-Agent Token Usage' - } - name: 'agent-header' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Agent_Token_Usage"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by agent_name\n| order by total desc' - size: 0 - title: 'Token Consumption by Agent' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'table' - gridSettings: { - formatters: [ - { - columnMatch: 'total' - formatter: 3 - formatOptions: { - palette: 'blue' - } - } - ] - } - } - customWidth: '50' - name: 'agent-table' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Agent_Token_Usage"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by agent_name\n| order by total desc' - size: 0 - title: 'Token Distribution by Agent' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'piechart' - } - customWidth: '50' - name: 'agent-chart' - } - // ===== Row 3: Per-Model Token Usage ===== - { - type: 1 - content: { - json: '## Per-Model Token Usage' - } - name: 'model-header' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Model_Token_Usage"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by model_name\n| order by total desc' - size: 0 - title: 'Token Consumption by Model' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'table' - gridSettings: { - formatters: [ - { - columnMatch: 'total' - formatter: 3 - formatOptions: { - palette: 'green' - } - } - ] - } - } - customWidth: '50' - name: 'model-table' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Model_Token_Usage"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by model_name\n| order by total desc' - size: 0 - title: 'Token Distribution by Model' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'piechart' - } - customWidth: '50' - name: 'model-chart' - } - // ===== Row 4: Per-Step (Team) Token Usage ===== - { - type: 1 - content: { - json: '## Per-Step (Team) Token Usage' - } - name: 'step-header' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Step_Token_Usage"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc' - size: 0 - title: 'Token Consumption by Workflow Step' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'barchart' - chartSettings: { - xAxis: 'step_name' - yAxis: 'total' - group: 'step_name' - } - } - customWidth: '50' - name: 'step-chart' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Step_Token_Usage"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc' - size: 0 - title: 'Step Usage Details' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'table' - gridSettings: { - formatters: [ - { - columnMatch: 'total' - formatter: 3 - formatOptions: { - palette: 'orange' - } - } - ] - } - } - customWidth: '50' - name: 'step-table' - } - // ===== Row 5: Per-User Token Usage ===== - { - type: 1 - content: { - json: '## Per-User Token Usage' - } - name: 'user-header' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Token_Usage_Summary"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_tokens = toint(customDimensions.total_tokens),\n user_id = tostring(customDimensions.user_id)\n| summarize total = sum(total_tokens), runs = count() by user_id\n| order by total desc' - size: 0 - title: 'Token Usage by User' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'table' - gridSettings: { - formatters: [ - { - columnMatch: 'total' - formatter: 3 - formatOptions: { - palette: 'purple' - } - } - ] - } - } - name: 'user-table' - } - // ===== Row 6: Hourly Token Usage Trend ===== - { - type: 1 - content: { - json: '## Token Usage Trends' - } - name: 'trend-header' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Token_Usage"\n| where timestamp {TimeRange}\n| extend total_tokens = toint(customDimensions.total_tokens)\n| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h)\n| order by timestamp asc' - size: 0 - title: 'Hourly Token Consumption' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'linechart' - chartSettings: { - xAxis: 'timestamp' - yAxis: 'hourly_tokens' - showLegend: true - } - } - name: 'trend-chart' - } - // ===== Row 7: Estimated Cost ===== - { - type: 1 - content: { - json: '## Estimated Cost\n\n> Cost estimates use GPT-4o pricing: **$2.50 / 1M input tokens**, **$10.00 / 1M output tokens**. Adjust for your actual model pricing.' - } - name: 'cost-header' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Token_Usage_Summary"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n input_tokens = toint(customDimensions.total_input_tokens),\n output_tokens = toint(customDimensions.total_output_tokens)\n| extend estimated_cost_usd = round((input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0), 4)\n| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd\n| order by estimated_cost_usd desc' - size: 0 - title: 'Estimated Cost per Process (USD)' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'table' - gridSettings: { - formatters: [ - { - columnMatch: 'estimated_cost_usd' - formatter: 3 - formatOptions: { - palette: 'redBright' - } - } - ] - } - } - name: 'cost-table' - } - // ===== Row 8: Individual LLM Call Log ===== - { - type: 1 - content: { - json: '## Individual LLM Call Log' - } - name: 'calls-header' - } - { - type: 3 - content: { - version: 'KqlItem/1.0' - query: 'customEvents\n| where name == "LLM_Token_Usage"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n step_name = tostring(customDimensions.step_name),\n model = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n process_id = tostring(customDimensions.process_id)\n| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens\n| order by timestamp desc\n| take 200' - size: 0 - title: 'Recent LLM Calls (last 200)' - queryType: 0 - resourceType: 'microsoft.insights/components' - crossComponentResources: [ - applicationInsightsResourceId - ] - visualization: 'table' - gridSettings: { - formatters: [ - { - columnMatch: 'total_tokens' - formatter: 3 - formatOptions: { - palette: 'blue' - } - } - ] - } - } - name: 'calls-table' - } - ] - isLocked: false - fallbackResourceIds: [ - applicationInsightsResourceId - ] -} - -resource tokenUsageWorkbook 'Microsoft.Insights/workbooks@2023-06-01' = { - name: workbookId - location: location - tags: tags - kind: 'shared' - properties: { - displayName: workbookDisplayName - category: 'workbook' - version: '1.0' - serializedData: string(workbookContent) - sourceId: applicationInsightsResourceId - } -} - -@description('The resource ID of the created workbook.') -output resourceId string = tokenUsageWorkbook.id - -@description('The name of the created workbook.') -output name string = tokenUsageWorkbook.name diff --git a/src/backend-api/src/app/libs/logging/llm_token_telemetry.py b/src/backend-api/src/app/libs/logging/llm_token_telemetry.py index b3035fc8..91f670c5 100644 --- a/src/backend-api/src/app/libs/logging/llm_token_telemetry.py +++ b/src/backend-api/src/app/libs/logging/llm_token_telemetry.py @@ -53,6 +53,7 @@ import logging import os import random +import time from contextlib import AbstractContextManager from dataclasses import dataclass, field from typing import Any, Callable, Iterable, Mapping, Optional @@ -472,6 +473,16 @@ def __init__( k: ("" if v is None else str(v)) for k, v in raw_static.items() } + # Performance counters. ``perf_*`` accumulate wall-clock nanoseconds + # spent inside ``emit()`` so callers can verify telemetry overhead is + # negligible. ``perf_slow_emit_threshold_ms`` is the soft threshold + # above which a WARNING is logged for an individual emit (default + # 50 ms -- emits should normally take well under 1 ms). + self._perf_total_ns: int = 0 + self._perf_emit_count: int = 0 + self._perf_max_ns: int = 0 + self.perf_slow_emit_threshold_ms: float = 50.0 + # -- public surface --------------------------------------------------- @property def enabled(self) -> bool: @@ -539,28 +550,74 @@ def emit(self, event_name: str, **dimensions: Any) -> None: Non-string values are stringified. ``None`` values are dropped. Any ``user_id`` value is passed through the configured hasher. - Never raises. + Never raises. Wall-clock duration is recorded for performance audit + (see :meth:`perf_stats`). """ - props = dict(self._static) # cheap shallow copy of pre-stringified dims - for k, v in dimensions.items(): - if v is None: - continue - if k == "user_id": - v = self._apply_user_id_hash(v) - if v is None or v == "": + start_ns = time.perf_counter_ns() + try: + props = dict(self._static) # cheap shallow copy of pre-stringified dims + for k, v in dimensions.items(): + if v is None: continue - props[k] = v if isinstance(v, str) else str(v) + if k == "user_id": + v = self._apply_user_id_hash(v) + if v is None or v == "": + continue + props[k] = v if isinstance(v, str) else str(v) + + if not self.enabled: + self._log.debug( + "App Insights not configured -- skipping event %s (%s)", + event_name, props, + ) + return + try: + self._sink(event_name, props) # type: ignore[misc] + except Exception as exc: # never break the caller + self._log.warning("track_event(%s) failed: %s", event_name, exc) + finally: + elapsed_ns = time.perf_counter_ns() - start_ns + self._perf_total_ns += elapsed_ns + self._perf_emit_count += 1 + if elapsed_ns > self._perf_max_ns: + self._perf_max_ns = elapsed_ns + elapsed_ms = elapsed_ns / 1_000_000.0 + if elapsed_ms > self.perf_slow_emit_threshold_ms: + self._log.warning( + "Token telemetry emit slow: event=%s duration_ms=%.3f", + event_name, elapsed_ms, + ) + else: + self._log.debug( + "Token telemetry emit: event=%s duration_ms=%.3f", + event_name, elapsed_ms, + ) + + # -- performance audit ------------------------------------------------ + def perf_stats(self) -> dict[str, float]: + """Return cumulative telemetry-overhead stats since process start + (or since :meth:`reset_perf_stats`). + + Keys: + ``emit_count`` -- number of events emitted + ``total_ms`` -- total wall-clock time spent inside ``emit`` + ``avg_ms`` -- mean per-event duration + ``max_ms`` -- slowest single emit observed + """ + count = self._perf_emit_count + total_ms = self._perf_total_ns / 1_000_000.0 + return { + "emit_count": float(count), + "total_ms": total_ms, + "avg_ms": (total_ms / count) if count else 0.0, + "max_ms": self._perf_max_ns / 1_000_000.0, + } - if not self.enabled: - self._log.debug( - "App Insights not configured -- skipping event %s (%s)", - event_name, props, - ) - return - try: - self._sink(event_name, props) # type: ignore[misc] - except Exception as exc: # never break the caller - self._log.warning("track_event(%s) failed: %s", event_name, exc) + def reset_perf_stats(self) -> None: + """Zero the perf counters (useful for tests and load-tests).""" + self._perf_total_ns = 0 + self._perf_emit_count = 0 + self._perf_max_ns = 0 # -- typed convenience emitters -------------------------------------- def emit_agent( @@ -713,14 +770,12 @@ def emit_all( agents.update({k: v for k, v in additional_agents.items() if k}) models = {m for m in agents.values() if m} - self.emit_summary( - usage=usage, - agent_count=len(agents), - model_count=len(models) or 1, - primary_model=model_deployment_name, - additional_agents=additional_agents, - **dimensions, - ) + # Wall-clock timing of the whole emit_all path so callers (or tests) + # can verify the telemetry path stays cheap relative to the LLM call + # it instruments. + batch_start_ns = time.perf_counter_ns() + + # Defer summary until last so we can stamp the batch overhead on it. self.emit_agent( agent_name=agent_name, model_deployment_name=model_deployment_name, @@ -748,6 +803,17 @@ def emit_all( model_deployment_name=model_deployment_name, ) + batch_overhead_ms = (time.perf_counter_ns() - batch_start_ns) / 1_000_000.0 + self.emit_summary( + usage=usage, + agent_count=len(agents), + model_count=len(models) or 1, + primary_model=model_deployment_name, + additional_agents=additional_agents, + telemetry_overhead_ms=f"{batch_overhead_ms:.3f}", + **dimensions, + ) + self._log.info( "[TOKEN USAGE] agent=%s model=%s input=%d output=%d total=%d %s", agent_name, @@ -804,6 +870,12 @@ def __init__( self.emit_team_event = emit_team_event self.dimensions = dict(dimensions) self.usage = TokenUsage() + # Wall-clock nanoseconds spent inside extraction (``add*``) and the + # final ``__exit__`` emit, respectively. Surfaced for callers that + # want to verify the helper doesn't add measurable latency. Available + # as ``scope.extract_ms`` / ``scope.emit_ms`` after the scope closes. + self._extract_ns: int = 0 + self._emit_ns: int = 0 # -- accumulation ----------------------------------------------------- def add(self, source: Any) -> Optional[TokenUsage]: @@ -812,11 +884,14 @@ def add(self, source: Any) -> Optional[TokenUsage]: Never raises -- extraction failures return ``None`` and are logged at DEBUG. """ + start_ns = time.perf_counter_ns() try: found = extract_usage(source) or extract_usage_from_stream_chunk(source) except Exception as exc: # belt + braces; extractors are already safe logger.debug("TokenUsageScope.add failed: %s", exc, exc_info=True) return None + finally: + self._extract_ns += time.perf_counter_ns() - start_ns if found: self.usage = self.usage + found return found @@ -828,9 +903,26 @@ def add_chunks(self, chunks: Iterable[Any]) -> None: for c in chunks: self.add(c) + # -- timing properties ----------------------------------------------- + @property + def extract_ms(self) -> float: + """Total ms spent inside :meth:`add` / :meth:`add_chunks`.""" + return self._extract_ns / 1_000_000.0 + + @property + def emit_ms(self) -> float: + """Total ms spent in the on-exit emit batch.""" + return self._emit_ns / 1_000_000.0 + + @property + def total_overhead_ms(self) -> float: + """Total telemetry overhead added by this scope (extract + emit).""" + return self.extract_ms + self.emit_ms + # -- context manager -------------------------------------------------- def __exit__(self, exc_type, exc, tb) -> None: # Always emit (best-effort) regardless of exception status. + emit_start_ns = time.perf_counter_ns() try: self.emitter.emit_all( agent_name=self.agent_name, @@ -843,6 +935,16 @@ def __exit__(self, exc_type, exc, tb) -> None: ) except Exception as emit_exc: # pragma: no cover - belt + braces logger.warning("TokenUsageScope emit failed: %s", emit_exc) + finally: + self._emit_ns += time.perf_counter_ns() - emit_start_ns + logger.debug( + "TokenUsageScope overhead: agent=%s extract_ms=%.3f " + "emit_ms=%.3f total_ms=%.3f", + self.agent_name, + self.extract_ms, + self.emit_ms, + self.total_overhead_ms, + ) return None # do not suppress exceptions diff --git a/src/frontend/Dockerfile b/src/frontend/Dockerfile index c7c40439..0ad16303 100644 --- a/src/frontend/Dockerfile +++ b/src/frontend/Dockerfile @@ -13,7 +13,6 @@ RUN npm install COPY . . # Build the app -ENV NODE_OPTIONS="--max-old-space-size=4096" RUN npm run build # Runtime stage diff --git a/src/processor/Dockerfile b/src/processor/Dockerfile index 317c8e03..afe293a5 100644 --- a/src/processor/Dockerfile +++ b/src/processor/Dockerfile @@ -36,8 +36,7 @@ RUN curl -fsSLo /tmp/node.tar.gz "https://nodejs.org/dist/v${NODE_VERSION}/node- COPY pyproject.toml uv.lock ./ # Install dependencies using UV -# Re-lock to pick up any pyproject.toml changes (e.g. new deps), then install. -RUN uv lock --python 3.12 && uv sync --frozen --python 3.12 +RUN uv sync --frozen --python 3.12 # Copy the entire source code COPY src/ ./src/ diff --git a/src/processor/src/tests/test_llm_token_telemetry.py b/src/processor/src/tests/test_llm_token_telemetry.py deleted file mode 100644 index 6a24f5b1..00000000 --- a/src/processor/src/tests/test_llm_token_telemetry.py +++ /dev/null @@ -1,572 +0,0 @@ -"""Unit tests for app.utils.llm_token_telemetry. - -Covers: -- TokenUsage arithmetic and realtime sub-fields -- All extractors (dict / object / raw_representation / aggregated messages / - streaming chunks / realtime / Mock-input safety) -- detect_invoked_tools -- TokenUsageEmitter: enabled/disabled, sink-throws-doesn't-propagate, - static_dimensions merge, all typed emitters, emit_all distinct models -- TokenUsageScope: happy path, exception in body still emits, multi-add -""" -from __future__ import annotations - -import logging -from unittest.mock import Mock - -import pytest - -from app.utils.llm_token_telemetry import ( - EVENT_AGENT, - EVENT_MODEL, - EVENT_SPEECH, - EVENT_SUMMARY, - TokenUsage, - TokenUsageEmitter, - TokenUsageScope, - detect_invoked_tools, - extract_realtime_usage, - extract_usage, - extract_usage_from_dict, - extract_usage_from_stream_chunk, -) - - -# --------------------------------------------------------------------------- -# TokenUsage -# --------------------------------------------------------------------------- -class TestTokenUsage: - def test_has_any_false_when_zero(self): - assert TokenUsage().has_any is False - - def test_has_any_true_when_any_nonzero(self): - assert TokenUsage(input_tokens=1).has_any is True - assert TokenUsage(total_tokens=5).has_any is True - - def test_addition_basic(self): - a = TokenUsage(1, 2, 3) - b = TokenUsage(4, 5, 9) - assert a + b == TokenUsage(5, 7, 12) - - def test_addition_realtime_subfields(self): - a = TokenUsage(1, 2, 3, input_audio_tokens=10) - b = TokenUsage(4, 5, 9, input_audio_tokens=20, output_audio_tokens=7) - c = a + b - assert c.input_audio_tokens == 30 - assert c.output_audio_tokens == 7 # None + 7 -> 7 - - def test_addition_returns_notimplemented_for_other_types(self): - assert TokenUsage(1).__add__("nope") is NotImplemented - - def test_to_event_props_omits_none_subfields(self): - props = TokenUsage(1, 2, 3).to_event_props() - assert props == {"input_tokens": "1", "output_tokens": "2", "total_tokens": "3"} - - def test_to_event_props_includes_realtime_when_present(self): - props = TokenUsage(1, 2, 3, input_audio_tokens=4).to_event_props() - assert props["input_audio_tokens"] == "4" - - -# --------------------------------------------------------------------------- -# extract_usage_from_dict -# --------------------------------------------------------------------------- -class TestExtractFromDict: - @pytest.mark.parametrize("data,expected", [ - ({"prompt_tokens": 12, "completion_tokens": 8}, (12, 8, 20)), - ({"input_tokens": 5, "output_tokens": 7, "total_tokens": 12}, (5, 7, 12)), - ({"input_token_count": 3, "output_token_count": 4}, (3, 4, 7)), - ({"promptTokens": 1, "completionTokens": 2, "totalTokens": 3}, (1, 2, 3)), - ]) - def test_aliases(self, data, expected): - u = extract_usage_from_dict(data) - assert (u.input_tokens, u.output_tokens, u.total_tokens) == expected - - def test_none_returns_none(self): - assert extract_usage_from_dict(None) is None - - def test_empty_returns_none(self): - assert extract_usage_from_dict({}) is None - - def test_total_falls_back_to_sum(self): - u = extract_usage_from_dict({"input_tokens": 4, "output_tokens": 6}) - assert u.total_tokens == 10 - - def test_string_digits_coerced(self): - u = extract_usage_from_dict({"input_tokens": "10", "output_tokens": "20"}) - assert u.input_tokens == 10 - assert u.output_tokens == 20 - - -# --------------------------------------------------------------------------- -# extract_usage (object shapes) -# --------------------------------------------------------------------------- -class _Bag: - """Minimal attribute bag (acts like an SDK model object).""" - pass - - -class TestExtractUsage: - def test_usage_details_dict(self): - r = _Bag() - r.usage_details = {"input_token_count": 5, "output_token_count": 7} - u = extract_usage(r) - assert u.total_tokens == 12 - - def test_usage_details_object(self): - r = _Bag() - details = _Bag() - details.input_token_count = 5 - details.output_token_count = 7 - details.total_token_count = 12 - r.usage_details = details - u = extract_usage(r) - assert u.total_tokens == 12 - - def test_raw_representation_openai_shape(self): - r = _Bag() - raw = _Bag() - raw.usage = {"prompt_tokens": 3, "completion_tokens": 4, "total_tokens": 7} - r.raw_representation = raw - u = extract_usage(r) - assert (u.input_tokens, u.output_tokens, u.total_tokens) == (3, 4, 7) - - def test_aggregated_messages(self): - r = _Bag() - msg = _Bag() - c1 = _Bag() - c1.usage_details = {"input_tokens": 2, "output_tokens": 3} - c2 = _Bag() - c2.usage_details = {"input_tokens": 4, "output_tokens": 1} - msg.contents = [c1, c2] - r.messages = [msg] - u = extract_usage(r) - assert u.input_tokens == 6 - assert u.output_tokens == 4 - - def test_none_input_returns_none(self): - assert extract_usage(None) is None - - def test_no_usage_returns_none(self): - assert extract_usage(_Bag()) is None - - def test_mock_input_does_not_raise(self): - """Mock objects expose every attribute as another Mock -- previously - this caused TypeError on iteration of .messages.""" - m = Mock() - # Should silently return None, never raise. - assert extract_usage(m) is None - - -# --------------------------------------------------------------------------- -# extract_usage_from_stream_chunk -# --------------------------------------------------------------------------- -class TestStreamChunk: - def test_chunk_with_metadata_usage(self): - c = _Bag() - c.metadata = {"usage": {"input_tokens": 1, "output_tokens": 2}} - u = extract_usage_from_stream_chunk(c) - assert u.input_tokens == 1 - assert u.output_tokens == 2 - - def test_no_usage_returns_none(self): - assert extract_usage_from_stream_chunk(_Bag()) is None - - -# --------------------------------------------------------------------------- -# extract_realtime_usage -# --------------------------------------------------------------------------- -class TestRealtime: - def test_basic(self): - r = _Bag() - r.usage = { - "input_tokens": 3, "output_tokens": 4, "total_tokens": 7, - "input_token_details": {"audio_tokens": 2, "text_tokens": 1, "cached_tokens": 0}, - "output_token_details": {"audio_tokens": 4, "text_tokens": 0}, - } - u = extract_realtime_usage(r) - assert u.input_audio_tokens == 2 - assert u.output_audio_tokens == 4 - assert u.total_tokens == 7 - - def test_total_derived_when_missing(self): - r = _Bag() - r.usage = {"input_tokens": 3, "output_tokens": 4} - u = extract_realtime_usage(r) - assert u.total_tokens == 7 - - def test_no_usage_returns_none(self): - assert extract_realtime_usage(_Bag()) is None - - -# --------------------------------------------------------------------------- -# detect_invoked_tools -# --------------------------------------------------------------------------- -class TestDetectInvokedTools: - def test_finds_function_calls(self): - r = _Bag() - c1 = _Bag() - c1.type = "function_call" - c1.name = "product_agent" - c2 = _Bag() - c2.type = "text" - c2.name = "n/a" - c3 = _Bag() - c3.type = "function_call" - c3.name = "policy_agent" - msg = _Bag() - msg.contents = [c1, c2, c3] - r.messages = [msg] - assert detect_invoked_tools(r) == {"product_agent", "policy_agent"} - - def test_empty_when_no_messages(self): - assert detect_invoked_tools(_Bag()) == set() - - def test_mock_input_safe(self): - assert detect_invoked_tools(Mock()) == set() - - def test_skips_function_calls_without_name(self): - r = _Bag() - c = _Bag() - c.type = "function_call" - c.name = None - msg = _Bag() - msg.contents = [c] - r.messages = [msg] - assert detect_invoked_tools(r) == set() - - -# --------------------------------------------------------------------------- -# TokenUsageEmitter -# --------------------------------------------------------------------------- -class TestEmitter: - def _make(self, **kw): - captured: list[tuple[str, dict]] = [] - kw.setdefault("connection_string", "fake-conn") - kw.setdefault("event_sink", lambda n, p: captured.append((n, dict(p)))) - em = TokenUsageEmitter(**kw) - return em, captured - - def test_disabled_when_no_connection_string(self): - em = TokenUsageEmitter(connection_string="", event_sink=lambda *a: None) - assert em.enabled is False - - def test_disabled_when_no_sink(self): - em = TokenUsageEmitter(connection_string="x", event_sink=None) - # _default_event_sink may or may not be available; force-disable: - em._sink = None - assert em.enabled is False - - def test_static_dimensions_prestringified_and_merged(self): - em, captured = self._make(static_dimensions={"app": "x", "tenant": 42}) - em.emit("X", user_id="u1") - name, props = captured[0] - assert name == "X" - assert props["app"] == "x" - assert props["tenant"] == "42" # stringified - assert props["user_id"] == "u1" - - def test_call_dimension_overrides_static(self): - em, captured = self._make(static_dimensions={"app": "default"}) - em.emit("X", app="override") - assert captured[0][1]["app"] == "override" - - def test_none_dimension_dropped(self): - em, captured = self._make() - em.emit("X", user_id=None, session_id="s1") - assert "user_id" not in captured[0][1] - assert captured[0][1]["session_id"] == "s1" - - def test_sink_exception_does_not_propagate(self, caplog): - def boom(_n, _p): - raise RuntimeError("sink broken") - em = TokenUsageEmitter(connection_string="x", event_sink=boom) - with caplog.at_level(logging.WARNING): - em.emit("X") # must not raise - - def test_emit_agent_skips_zero_usage(self): - em, captured = self._make() - em.emit_agent(agent_name="a", model_deployment_name="m", usage=TokenUsage()) - assert captured == [] - - def test_emit_agent_populates_props(self): - em, captured = self._make() - em.emit_agent(agent_name="chat", model_deployment_name="gpt-4o", - usage=TokenUsage(10, 20, 30), user_id="u") - name, props = captured[0] - assert name == EVENT_AGENT - assert props["agent_name"] == "chat" - assert props["model_deployment_name"] == "gpt-4o" - assert props["total_tokens"] == "30" - assert props["user_id"] == "u" - - def test_emit_all_emits_summary_agent_and_per_distinct_model(self): - em, captured = self._make() - em.emit_all( - agent_name="orchestrator", - model_deployment_name="gpt-4o", - usage=TokenUsage(10, 20, 30), - additional_agents={"tool_a": "gpt-4o", "tool_b": "gpt-35"}, - user_id="u1", - ) - names = [n for n, _ in captured] - # exactly one summary + one agent + two model events (gpt-4o, gpt-35) - assert names.count(EVENT_SUMMARY) == 1 - assert names.count(EVENT_AGENT) == 1 - assert names.count(EVENT_MODEL) == 2 - # summary records agent + model counts - summary = next(p for n, p in captured if n == EVENT_SUMMARY) - assert summary["agent_count"] == "3" - assert summary["model_count"] == "2" - assert summary["total_input_tokens"] == "10" - - def test_emit_speech_includes_audio_subfields(self): - em, captured = self._make() - em.emit_speech( - model_deployment_name="gpt-4o-realtime", - source="voice_chat", - usage=TokenUsage(1, 2, 3, input_audio_tokens=5, output_audio_tokens=6), - ) - name, props = captured[0] - assert name == EVENT_SPEECH - assert props["source"] == "voice_chat" - assert props["input_audio_tokens"] == "5" - assert props["output_audio_tokens"] == "6" - - -# --------------------------------------------------------------------------- -# Pricing / cost computation -# --------------------------------------------------------------------------- -class TestPricing: - def _make(self, pricing): - captured: list[tuple[str, dict]] = [] - em = TokenUsageEmitter( - connection_string="x", - event_sink=lambda n, p: captured.append((n, dict(p))), - pricing=pricing, - ) - return em, captured - - def test_cost_attached_to_agent_event(self): - em, captured = self._make({"gpt-4o": (0.0025, 0.01)}) - em.emit_agent(agent_name="a", model_deployment_name="gpt-4o", - usage=TokenUsage(1000, 500, 1500)) - # 1000 * 0.0025/1k + 500 * 0.01/1k = 0.0025 + 0.005 = 0.0075 - assert captured[0][1]["estimated_cost_usd"] == "0.007500" - - def test_cost_case_insensitive_model_lookup(self): - em, captured = self._make({"GPT-4o": (0.001, 0.001)}) - em.emit_model(model_deployment_name="gpt-4o", - usage=TokenUsage(1000, 1000, 2000)) - assert "estimated_cost_usd" in captured[0][1] - - def test_no_cost_when_model_unknown(self): - em, captured = self._make({"gpt-4o": (0.001, 0.001)}) - em.emit_agent(agent_name="a", model_deployment_name="gpt-mystery", - usage=TokenUsage(10, 10, 20)) - assert "estimated_cost_usd" not in captured[0][1] - - def test_summary_picks_up_cost_via_emit_all(self): - em, captured = self._make({"gpt-4o": (0.0025, 0.01)}) - em.emit_all(agent_name="chat", model_deployment_name="gpt-4o", - usage=TokenUsage(1000, 500, 1500)) - summary = next(p for n, p in captured if n == EVENT_SUMMARY) - assert summary["estimated_cost_usd"] == "0.007500" - - def test_malformed_pricing_entry_ignored(self, caplog): - with caplog.at_level(logging.WARNING): - em = TokenUsageEmitter( - connection_string="x", - event_sink=lambda *a: None, - pricing={"bad-model": "not-a-tuple"}, # type: ignore[dict-item] - ) - # Emitter still constructs; bad entry skipped. - assert "bad-model" not in em._pricing - - -# --------------------------------------------------------------------------- -# user_id PII hashing -# --------------------------------------------------------------------------- -class TestUserIdHasher: - def _make(self, hasher): - captured: list[tuple[str, dict]] = [] - em = TokenUsageEmitter( - connection_string="x", - event_sink=lambda n, p: captured.append((n, dict(p))), - user_id_hasher=hasher, - ) - return em, captured - - def test_hasher_applied_to_call_kwargs(self): - em, captured = self._make(lambda v: f"H({v})") - em.emit("X", user_id="alice") - assert captured[0][1]["user_id"] == "H(alice)" - - def test_hasher_applied_to_static_dimensions_at_construction(self): - em = TokenUsageEmitter( - connection_string="x", - event_sink=lambda *a: None, - user_id_hasher=lambda v: f"H({v})", - static_dimensions={"user_id": "bob"}, - ) - assert em._static["user_id"] == "H(bob)" - - def test_hasher_exception_falls_back_to_raw(self, caplog): - def boom(_v): - raise RuntimeError("hasher broken") - em, captured = self._make(boom) - with caplog.at_level(logging.WARNING): - em.emit("X", user_id="alice") - # Falls back to original value -- never breaks telemetry. - assert captured[0][1]["user_id"] == "alice" - - def test_no_hasher_passes_through(self): - em, captured = self._make(None) - em.emit("X", user_id="alice") - assert captured[0][1]["user_id"] == "alice" - - def test_empty_user_id_not_hashed_or_emitted(self): - em, captured = self._make(lambda v: f"H({v})") - em.emit("X", user_id="") - # Empty user_id should be dropped, not hashed to "H()". - assert "user_id" not in captured[0][1] - - -# --------------------------------------------------------------------------- -# Sampling -# --------------------------------------------------------------------------- -class TestSampling: - def _make(self, rate): - captured: list[tuple[str, dict]] = [] - em = TokenUsageEmitter( - connection_string="x", - event_sink=lambda n, p: captured.append((n, dict(p))), - sample_rate=rate, - ) - return em, captured - - def test_rate_clamped_to_unit_interval(self): - assert TokenUsageEmitter(connection_string="x", sample_rate=-0.5, - event_sink=lambda *a: None).sample_rate == 0.0 - assert TokenUsageEmitter(connection_string="x", sample_rate=2.0, - event_sink=lambda *a: None).sample_rate == 1.0 - - def test_invalid_rate_defaults_to_one(self): - em = TokenUsageEmitter(connection_string="x", sample_rate="nope", # type: ignore[arg-type] - event_sink=lambda *a: None) - assert em.sample_rate == 1.0 - - def test_zero_rate_drops_agent_event(self): - em, captured = self._make(0.0) - em.emit_agent(agent_name="a", model_deployment_name="m", - usage=TokenUsage(1, 2, 3)) - assert captured == [] - - def test_zero_rate_still_emits_summary(self): - em, captured = self._make(0.0) - em.emit_summary(usage=TokenUsage(1, 2, 3)) - assert captured and captured[0][0] == EVENT_SUMMARY - - def test_summary_records_sample_rate(self): - em, captured = self._make(0.25) - em.emit_summary(usage=TokenUsage(1, 2, 3)) - assert captured[0][1]["sample_rate"] == "0.2500" - - def test_emit_all_with_zero_rate_only_emits_summary(self): - em, captured = self._make(0.0) - em.emit_all(agent_name="chat", model_deployment_name="gpt-4o", - usage=TokenUsage(10, 20, 30)) - assert [n for n, _ in captured] == [EVENT_SUMMARY] - - def test_full_rate_emits_everything(self): - em, captured = self._make(1.0) - em.emit_all(agent_name="chat", model_deployment_name="gpt-4o", - usage=TokenUsage(10, 20, 30), - additional_agents={"a2": "gpt-35"}) - names = [n for n, _ in captured] - assert EVENT_SUMMARY in names - assert EVENT_AGENT in names - assert names.count(EVENT_MODEL) == 2 - - -# --------------------------------------------------------------------------- -# TokenUsageScope (continued) -# --------------------------------------------------------------------------- -class TestScope: - def _emitter(self): - captured: list[tuple[str, dict]] = [] - em = TokenUsageEmitter( - connection_string="x", - event_sink=lambda n, p: captured.append((n, dict(p))), - ) - return em, captured - - def test_happy_path_emits_on_exit(self): - em, captured = self._emitter() - r = _Bag() - r.usage_details = {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} - with TokenUsageScope(em, agent_name="a", model_deployment_name="m") as s: - s.add(r) - assert any(n == EVENT_SUMMARY for n, _ in captured) - assert any(n == EVENT_AGENT for n, _ in captured) - - def test_multi_add_accumulates(self): - em, captured = self._emitter() - r1 = _Bag() - r1.usage_details = {"input_tokens": 1, "output_tokens": 2} - r2 = _Bag() - r2.usage_details = {"input_tokens": 4, "output_tokens": 5} - with TokenUsageScope(em, agent_name="a", model_deployment_name="m") as s: - s.add(r1) - s.add(r2) - agent = next(p for n, p in captured if n == EVENT_AGENT) - assert agent["input_tokens"] == "5" - assert agent["output_tokens"] == "7" - assert agent["total_tokens"] == "12" - - def test_exception_in_body_still_emits(self): - em, captured = self._emitter() - r = _Bag() - r.usage_details = {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} - with pytest.raises(ValueError): - with TokenUsageScope(em, agent_name="a", model_deployment_name="m") as s: - s.add(r) - raise ValueError("boom") - # Emission still happened - assert any(n == EVENT_AGENT for n, _ in captured) - - def test_add_with_mock_does_not_raise(self): - em, _ = self._emitter() - with TokenUsageScope(em, agent_name="a", model_deployment_name="m") as s: - assert s.add(Mock()) is None - - def test_zero_usage_does_not_emit(self): - em, captured = self._emitter() - with TokenUsageScope(em, agent_name="a", model_deployment_name="m"): - pass - assert captured == [] - - def test_dimensions_flow_to_events(self): - em, captured = self._emitter() - r = _Bag() - r.usage_details = {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} - with TokenUsageScope(em, agent_name="a", model_deployment_name="m", - user_id="u1", session_id="s1") as s: - s.add(r) - for _, p in captured: - assert p["user_id"] == "u1" - assert p["session_id"] == "s1" - - def test_additional_agents_after_scope_open(self): - em, captured = self._emitter() - r = _Bag() - r.usage_details = {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3} - with TokenUsageScope(em, agent_name="orchestrator", - model_deployment_name="gpt-4o") as s: - s.add(r) - # Mutate additional_agents after the call -- mirrors the - # detect_invoked_tools usage pattern. - s.additional_agents["tool_a"] = "gpt-35" - model_events = [p for n, p in captured if n == EVENT_MODEL] - models = {p["model_deployment_name"] for p in model_events} - assert models == {"gpt-4o", "gpt-35"} - diff --git a/src/processor/src/utils/llm_token_telemetry.py b/src/processor/src/utils/llm_token_telemetry.py index b3035fc8..91f670c5 100644 --- a/src/processor/src/utils/llm_token_telemetry.py +++ b/src/processor/src/utils/llm_token_telemetry.py @@ -53,6 +53,7 @@ import logging import os import random +import time from contextlib import AbstractContextManager from dataclasses import dataclass, field from typing import Any, Callable, Iterable, Mapping, Optional @@ -472,6 +473,16 @@ def __init__( k: ("" if v is None else str(v)) for k, v in raw_static.items() } + # Performance counters. ``perf_*`` accumulate wall-clock nanoseconds + # spent inside ``emit()`` so callers can verify telemetry overhead is + # negligible. ``perf_slow_emit_threshold_ms`` is the soft threshold + # above which a WARNING is logged for an individual emit (default + # 50 ms -- emits should normally take well under 1 ms). + self._perf_total_ns: int = 0 + self._perf_emit_count: int = 0 + self._perf_max_ns: int = 0 + self.perf_slow_emit_threshold_ms: float = 50.0 + # -- public surface --------------------------------------------------- @property def enabled(self) -> bool: @@ -539,28 +550,74 @@ def emit(self, event_name: str, **dimensions: Any) -> None: Non-string values are stringified. ``None`` values are dropped. Any ``user_id`` value is passed through the configured hasher. - Never raises. + Never raises. Wall-clock duration is recorded for performance audit + (see :meth:`perf_stats`). """ - props = dict(self._static) # cheap shallow copy of pre-stringified dims - for k, v in dimensions.items(): - if v is None: - continue - if k == "user_id": - v = self._apply_user_id_hash(v) - if v is None or v == "": + start_ns = time.perf_counter_ns() + try: + props = dict(self._static) # cheap shallow copy of pre-stringified dims + for k, v in dimensions.items(): + if v is None: continue - props[k] = v if isinstance(v, str) else str(v) + if k == "user_id": + v = self._apply_user_id_hash(v) + if v is None or v == "": + continue + props[k] = v if isinstance(v, str) else str(v) + + if not self.enabled: + self._log.debug( + "App Insights not configured -- skipping event %s (%s)", + event_name, props, + ) + return + try: + self._sink(event_name, props) # type: ignore[misc] + except Exception as exc: # never break the caller + self._log.warning("track_event(%s) failed: %s", event_name, exc) + finally: + elapsed_ns = time.perf_counter_ns() - start_ns + self._perf_total_ns += elapsed_ns + self._perf_emit_count += 1 + if elapsed_ns > self._perf_max_ns: + self._perf_max_ns = elapsed_ns + elapsed_ms = elapsed_ns / 1_000_000.0 + if elapsed_ms > self.perf_slow_emit_threshold_ms: + self._log.warning( + "Token telemetry emit slow: event=%s duration_ms=%.3f", + event_name, elapsed_ms, + ) + else: + self._log.debug( + "Token telemetry emit: event=%s duration_ms=%.3f", + event_name, elapsed_ms, + ) + + # -- performance audit ------------------------------------------------ + def perf_stats(self) -> dict[str, float]: + """Return cumulative telemetry-overhead stats since process start + (or since :meth:`reset_perf_stats`). + + Keys: + ``emit_count`` -- number of events emitted + ``total_ms`` -- total wall-clock time spent inside ``emit`` + ``avg_ms`` -- mean per-event duration + ``max_ms`` -- slowest single emit observed + """ + count = self._perf_emit_count + total_ms = self._perf_total_ns / 1_000_000.0 + return { + "emit_count": float(count), + "total_ms": total_ms, + "avg_ms": (total_ms / count) if count else 0.0, + "max_ms": self._perf_max_ns / 1_000_000.0, + } - if not self.enabled: - self._log.debug( - "App Insights not configured -- skipping event %s (%s)", - event_name, props, - ) - return - try: - self._sink(event_name, props) # type: ignore[misc] - except Exception as exc: # never break the caller - self._log.warning("track_event(%s) failed: %s", event_name, exc) + def reset_perf_stats(self) -> None: + """Zero the perf counters (useful for tests and load-tests).""" + self._perf_total_ns = 0 + self._perf_emit_count = 0 + self._perf_max_ns = 0 # -- typed convenience emitters -------------------------------------- def emit_agent( @@ -713,14 +770,12 @@ def emit_all( agents.update({k: v for k, v in additional_agents.items() if k}) models = {m for m in agents.values() if m} - self.emit_summary( - usage=usage, - agent_count=len(agents), - model_count=len(models) or 1, - primary_model=model_deployment_name, - additional_agents=additional_agents, - **dimensions, - ) + # Wall-clock timing of the whole emit_all path so callers (or tests) + # can verify the telemetry path stays cheap relative to the LLM call + # it instruments. + batch_start_ns = time.perf_counter_ns() + + # Defer summary until last so we can stamp the batch overhead on it. self.emit_agent( agent_name=agent_name, model_deployment_name=model_deployment_name, @@ -748,6 +803,17 @@ def emit_all( model_deployment_name=model_deployment_name, ) + batch_overhead_ms = (time.perf_counter_ns() - batch_start_ns) / 1_000_000.0 + self.emit_summary( + usage=usage, + agent_count=len(agents), + model_count=len(models) or 1, + primary_model=model_deployment_name, + additional_agents=additional_agents, + telemetry_overhead_ms=f"{batch_overhead_ms:.3f}", + **dimensions, + ) + self._log.info( "[TOKEN USAGE] agent=%s model=%s input=%d output=%d total=%d %s", agent_name, @@ -804,6 +870,12 @@ def __init__( self.emit_team_event = emit_team_event self.dimensions = dict(dimensions) self.usage = TokenUsage() + # Wall-clock nanoseconds spent inside extraction (``add*``) and the + # final ``__exit__`` emit, respectively. Surfaced for callers that + # want to verify the helper doesn't add measurable latency. Available + # as ``scope.extract_ms`` / ``scope.emit_ms`` after the scope closes. + self._extract_ns: int = 0 + self._emit_ns: int = 0 # -- accumulation ----------------------------------------------------- def add(self, source: Any) -> Optional[TokenUsage]: @@ -812,11 +884,14 @@ def add(self, source: Any) -> Optional[TokenUsage]: Never raises -- extraction failures return ``None`` and are logged at DEBUG. """ + start_ns = time.perf_counter_ns() try: found = extract_usage(source) or extract_usage_from_stream_chunk(source) except Exception as exc: # belt + braces; extractors are already safe logger.debug("TokenUsageScope.add failed: %s", exc, exc_info=True) return None + finally: + self._extract_ns += time.perf_counter_ns() - start_ns if found: self.usage = self.usage + found return found @@ -828,9 +903,26 @@ def add_chunks(self, chunks: Iterable[Any]) -> None: for c in chunks: self.add(c) + # -- timing properties ----------------------------------------------- + @property + def extract_ms(self) -> float: + """Total ms spent inside :meth:`add` / :meth:`add_chunks`.""" + return self._extract_ns / 1_000_000.0 + + @property + def emit_ms(self) -> float: + """Total ms spent in the on-exit emit batch.""" + return self._emit_ns / 1_000_000.0 + + @property + def total_overhead_ms(self) -> float: + """Total telemetry overhead added by this scope (extract + emit).""" + return self.extract_ms + self.emit_ms + # -- context manager -------------------------------------------------- def __exit__(self, exc_type, exc, tb) -> None: # Always emit (best-effort) regardless of exception status. + emit_start_ns = time.perf_counter_ns() try: self.emitter.emit_all( agent_name=self.agent_name, @@ -843,6 +935,16 @@ def __exit__(self, exc_type, exc, tb) -> None: ) except Exception as emit_exc: # pragma: no cover - belt + braces logger.warning("TokenUsageScope emit failed: %s", emit_exc) + finally: + self._emit_ns += time.perf_counter_ns() - emit_start_ns + logger.debug( + "TokenUsageScope overhead: agent=%s extract_ms=%.3f " + "emit_ms=%.3f total_ms=%.3f", + self.agent_name, + self.extract_ms, + self.emit_ms, + self.total_overhead_ms, + ) return None # do not suppress exceptions From 4393540fa73d310b9b071407adeeed404a902be8 Mon Sep 17 00:00:00 2001 From: Priyanka-Microsoft Date: Mon, 1 Jun 2026 14:10:27 +0530 Subject: [PATCH 5/5] fix: update uv.lock to include azure-monitor-events-extension The lockfile was missing the azure-monitor-events-extension and azure-monitor-opentelemetry packages, causing uv sync --frozen in Docker to skip them. This made the emitter silently disabled. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/processor/uv.lock | 474 +++++++++++++++++++++++++++++++++++------- 1 file changed, 398 insertions(+), 76 deletions(-) diff --git a/src/processor/uv.lock b/src/processor/uv.lock index dec50656..f95de739 100644 --- a/src/processor/uv.lock +++ b/src/processor/uv.lock @@ -424,9 +424,9 @@ name = "aiologic" version = "0.16.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "sniffio" }, + { name = "sniffio", marker = "python_full_version < '3.13'" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, - { name = "wrapt" }, + { name = "wrapt", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/13/50b91a3ea6b030d280d2654be97c48b6ed81753a50286ee43c646ba36d3c/aiologic-0.16.0.tar.gz", hash = "sha256:c267ccbd3ff417ec93e78d28d4d577ccca115d5797cdbd16785a551d9658858f", size = 225952, upload-time = "2025-11-27T23:48:41.195Z" } wheels = [ @@ -505,6 +505,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/67/29/57b06fdb3abdf52c621d3ca3caea735e2db4c8d48288ebd26af448e8e247/art-6.5-py3-none-any.whl", hash = "sha256:70706408144c45c666caab690627d5c74aea7b6c7ce8cc968408ddeef8d84afd", size = 610382, upload-time = "2025-04-12T17:02:21.97Z" }, ] +[[package]] +name = "asgiref" +version = "3.11.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/63/40/f03da1264ae8f7cfdbf9146542e5e7e8100a4c66ab48e791df9a03d3f6c0/asgiref-3.11.1.tar.gz", hash = "sha256:5f184dc43b7e763efe848065441eac62229c9f7b0475f41f80e207a114eda4ce", size = 38550, upload-time = "2026-02-03T13:30:14.33Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/0a/a72d10ed65068e115044937873362e6e32fab1b7dce0046aeb224682c989/asgiref-3.11.1-py3-none-any.whl", hash = "sha256:e8667a091e69529631969fd45dc268fa79b99c92c5fcdda727757e52146ec133", size = 24345, upload-time = "2026-02-03T13:30:13.039Z" }, +] + [[package]] name = "asyncio" version = "4.0.0" @@ -627,6 +636,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/d8/b8fcba9464f02b121f39de2db2bf57f0b216fe11d014513d666e8634380d/azure_core-1.38.0-py3-none-any.whl", hash = "sha256:ab0c9b2cd71fecb1842d52c965c95285d3cfb38902f6766e4a471f1cd8905335", size = 217825, upload-time = "2026-01-12T17:03:07.291Z" }, ] +[[package]] +name = "azure-core-tracing-opentelemetry" +version = "1.0.0b13" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "opentelemetry-api" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/ab/a937e4af8afec9d437d55252f2a3a4419fc3fc7d5e5d54022622bd11b2b6/azure_core_tracing_opentelemetry-1.0.0b13.tar.gz", hash = "sha256:6cb2f8dfd5dee6c11843db0205fc92e2434e1a272c169c953afe92483aafc7eb", size = 25832, upload-time = "2026-05-01T00:59:57.941Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/01/8898c2506cae6a57c1b76d930d2af94764a65354bc863feb2684235851ce/azure_core_tracing_opentelemetry-1.0.0b13-py3-none-any.whl", hash = "sha256:4dacd3a9f117f11f98e89305e161c951b8df85b984f3b56130614de9cd9887f9", size = 12112, upload-time = "2026-05-01T00:59:59.149Z" }, +] + [[package]] name = "azure-cosmos" version = "4.15.0" @@ -706,6 +728,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/28/af9ef022f21e3b51b3718d4348f771b490678c1116563895547c0a771362/azure_identity-1.26.0b1-py3-none-any.whl", hash = "sha256:dc608b59ae628a38611208ee761adeb1a2b9390258b58d6edcda2d24c50a4348", size = 197227, upload-time = "2025-11-07T03:04:16.923Z" }, ] +[[package]] +name = "azure-monitor-events-extension" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/51/976c8cd4a76d41bcd4d3f6400aeed8fdd70d516d271badf9c4a5893a558d/azure-monitor-events-extension-0.1.0.tar.gz", hash = "sha256:094773685171a50aa5cc548279c9141c8a26682f6acef397815c528b53b838b5", size = 4165, upload-time = "2023-09-19T20:01:17.887Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/09/44/cbb68c55505a604de61caa44375be7371368e71aa8386b1576be5b789e11/azure_monitor_events_extension-0.1.0-py2.py3-none-any.whl", hash = "sha256:5d92abb5e6a32ab23b12c726def9f9607c6fa1d84900d493b906ff9ec489af4a", size = 4514, upload-time = "2023-09-19T20:01:16.162Z" }, +] + +[[package]] +name = "azure-monitor-opentelemetry" +version = "1.8.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "azure-core-tracing-opentelemetry" }, + { name = "azure-monitor-opentelemetry-exporter" }, + { name = "opentelemetry-instrumentation-django" }, + { name = "opentelemetry-instrumentation-fastapi" }, + { name = "opentelemetry-instrumentation-flask" }, + { name = "opentelemetry-instrumentation-logging" }, + { name = "opentelemetry-instrumentation-psycopg2" }, + { name = "opentelemetry-instrumentation-requests" }, + { name = "opentelemetry-instrumentation-urllib" }, + { name = "opentelemetry-instrumentation-urllib3" }, + { name = "opentelemetry-resource-detector-azure" }, + { name = "opentelemetry-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/89/42/ea67bebb400a7561b1ad1dd59d06b67e880daf8081ec0d41d3b0ce8fcc26/azure_monitor_opentelemetry-1.8.7.tar.gz", hash = "sha256:d0a430c69451f8fa09362769d2d65471713989fb78e4ad0f50832b597921efbb", size = 76970, upload-time = "2026-03-19T21:43:57.056Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/22/245a4f75a834430759a6fab9c5ab10e18719786ae684cf234c7bb6a693d1/azure_monitor_opentelemetry-1.8.7-py3-none-any.whl", hash = "sha256:0d3a228a183d76cf22698a3eed6e836d1cf57608b8ee879c634609b26f384eb2", size = 41268, upload-time = "2026-03-19T21:43:58.188Z" }, +] + +[[package]] +name = "azure-monitor-opentelemetry-exporter" +version = "1.0.0b52" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "azure-identity" }, + { name = "msrest" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-sdk" }, + { name = "psutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7c/7e/bfc03436b88c48f5adc21a3ebbf4392b6b7fbbfe33ef3b1e88d07ba9f380/azure_monitor_opentelemetry_exporter-1.0.0b52.tar.gz", hash = "sha256:7eac679fca32dee9e426df65f2a538161db4514fc322fc66107f7826567d86e1", size = 326179, upload-time = "2026-05-11T22:47:02.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/e8/d13e6a74c98ecc3011bce9ab09fc2e75aec48ab46288f72be57c2fa21460/azure_monitor_opentelemetry_exporter-1.0.0b52-py2.py3-none-any.whl", hash = "sha256:a38c503e5e2cc0ec8a4bf336b23cce23488719f5361a45cdd01a514080f0e7fc", size = 244751, upload-time = "2026-05-11T22:47:04.304Z" }, +] + [[package]] name = "azure-search-documents" version = "11.7.0b2" @@ -1015,7 +1091,7 @@ name = "clr-loader" version = "0.2.10" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cffi" }, + { name = "cffi", marker = "python_full_version < '3.14'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/18/24/c12faf3f61614b3131b5c98d3bf0d376b49c7feaa73edca559aeb2aee080/clr_loader-0.2.10.tar.gz", hash = "sha256:81f114afbc5005bafc5efe5af1341d400e22137e275b042a8979f3feb9fc9446", size = 83605, upload-time = "2026-01-03T23:13:06.984Z" } wheels = [ @@ -1097,7 +1173,7 @@ name = "culsans" version = "0.11.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "aiologic" }, + { name = "aiologic", marker = "python_full_version < '3.13'" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d9/e3/49afa1bc180e0d28008ec6bcdf82a4072d1c7a41032b5b759b60814ca4b0/culsans-0.11.0.tar.gz", hash = "sha256:0b43d0d05dce6106293d114c86e3fb4bfc63088cfe8ff08ed3fe36891447fe33", size = 107546, upload-time = "2025-12-31T23:15:38.196Z" } @@ -2124,6 +2200,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" }, ] +[[package]] +name = "msrest" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "certifi" }, + { name = "isodate" }, + { name = "requests" }, + { name = "requests-oauthlib" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/68/77/8397c8fb8fc257d8ea0fa66f8068e073278c65f05acb17dcb22a02bfdc42/msrest-0.7.1.zip", hash = "sha256:6e7661f46f3afd88b75667b7187a92829924446c7ea1d169be8c4bb7eeb788b9", size = 175332, upload-time = "2022-06-13T22:41:25.111Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/cf/f2966a2638144491f8696c27320d5219f48a072715075d168b31d3237720/msrest-0.7.1-py3-none-any.whl", hash = "sha256:21120a810e1233e5e6cc7fe40b474eeb4ec6f757a15d7cf86702c369f9567c32", size = 85384, upload-time = "2022-06-13T22:41:22.42Z" }, +] + [[package]] name = "multidict" version = "6.7.1" @@ -2336,6 +2428,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, ] +[[package]] +name = "oauthlib" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, +] + [[package]] name = "ollama" version = "0.6.2" @@ -2416,42 +2517,237 @@ wheels = [ [[package]] name = "opentelemetry-api" -version = "1.41.1" +version = "1.40.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "importlib-metadata" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fa/fc/b7564cbef36601aef0d6c9bc01f7badb64be8e862c2e1c3c5c3b43b53e4f/opentelemetry_api-1.41.1.tar.gz", hash = "sha256:0ad1814d73b875f84494387dae86ce0b12c68556331ce6ce8fe789197c949621", size = 71416, upload-time = "2026-04-24T13:15:38.262Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/1d/4049a9e8698361cc1a1aa03a6c59e4fa4c71e0c0f94a30f988a6876a2ae6/opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f", size = 70851, upload-time = "2026-03-04T14:17:21.555Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/37/6bf8e66bfcee5d3c6515b79cb2ee9ad05fe573c20f7ceb288d0e7eeec28c/opentelemetry_instrumentation-0.61b0.tar.gz", hash = "sha256:cb21b48db738c9de196eba6b805b4ff9de3b7f187e4bbf9a466fa170514f1fc7", size = 32606, upload-time = "2026-03-04T14:20:16.825Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/3e/f6f10f178b6316de67f0dfdbbb699a24fbe8917cf1743c1595fb9dcdd461/opentelemetry_instrumentation-0.61b0-py3-none-any.whl", hash = "sha256:92a93a280e69788e8f88391247cc530fd81f16f2b011979d4d6398f805cfbc63", size = 33448, upload-time = "2026-03-04T14:19:02.447Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asgiref" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/3e/143cf5c034e58037307e6a24f06e0dd64b2c49ae60a965fc580027581931/opentelemetry_instrumentation_asgi-0.61b0.tar.gz", hash = "sha256:9d08e127244361dc33976d39dd4ca8f128b5aa5a7ae425208400a80a095019b5", size = 26691, upload-time = "2026-03-04T14:20:21.038Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/78/154470cf9d741a7487fbb5067357b87386475bbb77948a6707cae982e158/opentelemetry_instrumentation_asgi-0.61b0-py3-none-any.whl", hash = "sha256:e4b3ce6b66074e525e717efff20745434e5efd5d9df6557710856fba356da7a4", size = 16980, upload-time = "2026-03-04T14:19:10.894Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-dbapi" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d6/ed/ba91c9e4a3ec65781e9c59982109f0a36de9fa574f622596b33d1985dab5/opentelemetry_instrumentation_dbapi-0.61b0.tar.gz", hash = "sha256:02fa800682c1de87dcad0e59f2092b3b6fb8b8ea0636518f989e1166b418dcb9", size = 16761, upload-time = "2026-03-04T14:20:29.782Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/a5/d26c68f3fd33eb7410985cef7700bb426e2c4a26de9207902cbbffb19a3f/opentelemetry_instrumentation_dbapi-0.61b0-py3-none-any.whl", hash = "sha256:8f762c39c8edd20c6aef3282550a2cfbfec76c3f431bf5c36327dcf9ece2e5a0", size = 14134, upload-time = "2026-03-04T14:19:24.718Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-django" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-wsgi" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/ef/6bc1a6560630f26b1c010af86b28f42bfbe6a601bd1647d1436e0d3436aa/opentelemetry_instrumentation_django-0.61b0.tar.gz", hash = "sha256:9885154dc128578de0e6b5ce49e965c786f8ab071175bec005dcd454510be951", size = 25996, upload-time = "2026-03-04T14:20:30.453Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/3b/74dad6d98fdee1d137f1c2748548d4159578508f21e3aef581c110e64041/opentelemetry_instrumentation_django-0.61b0-py3-none-any.whl", hash = "sha256:26c1b0b325a9783d4a2f4df660ba05cf929c3eda2ae9b07916b649bb44e1c5b6", size = 20773, upload-time = "2026-03-04T14:19:25.675Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-fastapi" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-asgi" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/37/35/aa727bb6e6ef930dcdc96a617b83748fece57b43c47d83ba8d83fbeca657/opentelemetry_instrumentation_fastapi-0.61b0.tar.gz", hash = "sha256:3a24f35b07c557ae1bbc483bf8412221f25d79a405f8b047de8b670722e2fa9f", size = 24800, upload-time = "2026-03-04T14:20:32.759Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/05/acfeb2cccd434242a0a7d0ea29afaf077e04b42b35b485d89aee4e0d9340/opentelemetry_instrumentation_fastapi-0.61b0-py3-none-any.whl", hash = "sha256:a1a844d846540d687d377516b2ff698b51d87c781b59f47c214359c4a241047c", size = 13485, upload-time = "2026-03-04T14:19:30.351Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-flask" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-wsgi" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d9/33/d6852d8f2c3eef86f2f8c858d6f5315983c7063e07e595519e96d4c31c06/opentelemetry_instrumentation_flask-0.61b0.tar.gz", hash = "sha256:e9faf58dfd9860a1868442d180142645abdafc1a652dd73d469a5efd106a7d49", size = 24071, upload-time = "2026-03-04T14:20:33.437Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3e/41/619f3530324a58491f2d20f216a10dd7393629b29db4610dda642a27f4ed/opentelemetry_instrumentation_flask-0.61b0-py3-none-any.whl", hash = "sha256:e8ce474d7ce543bfbbb3e93f8a6f8263348af9d7b45502f387420cf3afa71253", size = 15996, upload-time = "2026-03-04T14:19:31.304Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-logging" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/e0/69473f925acfe2d4edf5c23bcced36906ac3627aa7c5722a8e3f60825f3b/opentelemetry_instrumentation_logging-0.61b0.tar.gz", hash = "sha256:feaa30b700acd2a37cc81db5f562ab0c3a5b6cc2453595e98b72c01dcf649584", size = 17906, upload-time = "2026-03-04T14:20:37.398Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/59/3e7118ed140f76b0982ba4321bdaed1997a0473f9720de2d10788a577033/opentelemetry_api-1.41.1-py3-none-any.whl", hash = "sha256:a22df900e75c76dc08440710e51f52f1aa6b451b429298896023e60db5b3139f", size = 69007, upload-time = "2026-04-24T13:15:15.662Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0e/2137db5239cc5e564495549a4d11488a7af9b48fc76520a0eea20e69ddae/opentelemetry_instrumentation_logging-0.61b0-py3-none-any.whl", hash = "sha256:6d87e5ded6a0128d775d41511f8380910a1b610671081d16efb05ac3711c0074", size = 17076, upload-time = "2026-03-04T14:19:36.765Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-psycopg2" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-dbapi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/28/f28d52b1088e7a09761566f8700507b54d3d83a6f9c93c0ce02f53619e83/opentelemetry_instrumentation_psycopg2-0.61b0.tar.gz", hash = "sha256:863ccf9687b71e73dd489c7bb117278768bdf26aa0dafe7dc974a2425e05b5d7", size = 11676, upload-time = "2026-03-04T14:20:41.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/f1/4341d0584c288765c73e28c30ba58e7aedb50c01108f17f947b872657f79/opentelemetry_instrumentation_psycopg2-0.61b0-py3-none-any.whl", hash = "sha256:36b96983beda05c927179bb66b6c72f07a8d9a591f76ce9da88b1dd1587cb083", size = 11491, upload-time = "2026-03-04T14:19:42.018Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-requests" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/c7/7a47cb85c7aa93a9c820552e414889185bcf91245271d12e5d443e5f834d/opentelemetry_instrumentation_requests-0.61b0.tar.gz", hash = "sha256:15f879ce8fb206bd7e6fdc61663ea63481040a845218c0cf42902ce70bd7e9d9", size = 18379, upload-time = "2026-03-04T14:20:46.959Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/a1/a7a133b273d1f53950f16a370fc94367eff472c9c2576e8e9e28c62dcc9f/opentelemetry_instrumentation_requests-0.61b0-py3-none-any.whl", hash = "sha256:cce19b379949fe637eb73ba39b02c57d2d0805447ca6d86534aa33fcb141f683", size = 14207, upload-time = "2026-03-04T14:19:51.765Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-urllib" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/37/77cd326b083390e74280c08bbd585153809619dad068e2d1b253fec1164d/opentelemetry_instrumentation_urllib-0.61b0.tar.gz", hash = "sha256:6a15ff862fc1603e0ea5ea75558f76f36436b02e0ae48daecedcb5e574cce160", size = 16894, upload-time = "2026-03-04T14:20:52.726Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/fc/a88fbfd8b9eb16ba1c21f0514c12696441be7fc42c7e319f3ee793bf9e96/opentelemetry_instrumentation_urllib-0.61b0-py3-none-any.whl", hash = "sha256:d7e409876580fb41102e3522ce81a756e53a74073c036a267a1c280cc0fa09b0", size = 13970, upload-time = "2026-03-04T14:20:01.24Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-urllib3" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/80/7ad8da30f479c6117768e72d6f2f3f0bd3495338707d6f61de042149578a/opentelemetry_instrumentation_urllib3-0.61b0.tar.gz", hash = "sha256:f00037bc8ff813153c4b79306f55a14618c40469a69c6c03a3add29dc7e8b928", size = 19325, upload-time = "2026-03-04T14:20:53.386Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/0c/01359e55b9f2fb2b1d4d9e85e77773a96697207895118533f3be718a3326/opentelemetry_instrumentation_urllib3-0.61b0-py3-none-any.whl", hash = "sha256:9644f8c07870266e52f129e6226859ff3a35192555abe46fa0ef9bbbf5b6b46d", size = 14339, upload-time = "2026-03-04T14:20:02.681Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-wsgi" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/89/e5/189f2845362cfe78e356ba127eab21456309def411c6874aa4800c3de816/opentelemetry_instrumentation_wsgi-0.61b0.tar.gz", hash = "sha256:380f2ae61714e5303275a80b2e14c58571573cd1fddf496d8c39fb9551c5e532", size = 19898, upload-time = "2026-03-04T14:20:54.068Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/75/d6b42ba26f3c921be6d01b16561b7bb863f843bad7ac3a5011f62617bcab/opentelemetry_instrumentation_wsgi-0.61b0-py3-none-any.whl", hash = "sha256:bd33b0824166f24134a3400648805e8d2e6a7951f070241294e8b8866611d7fa", size = 14628, upload-time = "2026-03-04T14:20:03.934Z" }, +] + +[[package]] +name = "opentelemetry-resource-detector-azure" +version = "0.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/67/e4/0d359d48d03d447225b30c3dd889d5d454e3b413763ff721f9b0e4ac2e59/opentelemetry_resource_detector_azure-0.1.5.tar.gz", hash = "sha256:e0ba658a87c69eebc806e75398cd0e9f68a8898ea62de99bc1b7083136403710", size = 11503, upload-time = "2024-05-16T21:54:58.994Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/ae/c26d8da88ba2e438e9653a408b0c2ad6f17267801250a8f3cc6405a93a72/opentelemetry_resource_detector_azure-0.1.5-py3-none-any.whl", hash = "sha256:4dcc5d54ab5c3b11226af39509bc98979a8b9e0f8a24c1b888783755d3bf00eb", size = 14252, upload-time = "2024-05-16T21:54:57.208Z" }, ] [[package]] name = "opentelemetry-sdk" -version = "1.41.1" +version = "1.40.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "opentelemetry-semantic-conventions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/58/d0/54ee30dab82fb0acda23d144502771ff76ef8728459c83c3e89ef9fb1825/opentelemetry_sdk-1.41.1.tar.gz", hash = "sha256:724b615e1215b5aeacda0abb8a6a8922c9a1853068948bd0bd225a56d0c792e6", size = 230180, upload-time = "2026-04-24T13:15:50.991Z" } +sdist = { url = "https://files.pythonhosted.org/packages/58/fd/3c3125b20ba18ce2155ba9ea74acb0ae5d25f8cd39cfd37455601b7955cc/opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2", size = 184252, upload-time = "2026-03-04T14:17:31.87Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/e7/a1420b698aad018e1cf60fdbaaccbe49021fb415e2a0d81c242f4c518f54/opentelemetry_sdk-1.41.1-py3-none-any.whl", hash = "sha256:edee379c126c1bce952b0c812b48fe8ff35b30df0eecf17e98afa4d598b7d85d", size = 180213, upload-time = "2026-04-24T13:15:33.767Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c5/6a852903d8bfac758c6dc6e9a68b015d3c33f2f1be5e9591e0f4b69c7e0a/opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1", size = 141951, upload-time = "2026-03-04T14:17:17.961Z" }, ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.62b1" +version = "0.61b0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9e/de/911ac9e309052aca1b20b2d5549d3db45d1011e1a610e552c6ccdd1b64f8/opentelemetry_semantic_conventions-0.62b1.tar.gz", hash = "sha256:c5cc6e04a7f8c7cdd30be2ed81499fa4e75bfbd52c9cb70d40af1f9cd3619802", size = 145750, upload-time = "2026-04-24T13:15:52.236Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/c0/4ae7973f3c2cfd2b6e321f1675626f0dab0a97027cc7a297474c9c8f3d04/opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a", size = 145755, upload-time = "2026-03-04T14:17:32.664Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/a6/83dc2ab6fa397ee66fba04fe2e74bdf7be3b3870005359ceb7689103c058/opentelemetry_semantic_conventions-0.62b1-py3-none-any.whl", hash = "sha256:cf506938103d331fbb78eded0d9788095f7fd59016f2bda813c3324e5a74a93c", size = 231620, upload-time = "2026-04-24T13:15:35.454Z" }, + { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" }, ] [[package]] @@ -2467,6 +2763,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/55/22/41fb05f1dc5fda2c468e05a41814c20859016c85117b66c8a257cae814f6/opentelemetry_semantic_conventions_ai-0.5.1-py3-none-any.whl", hash = "sha256:25aeb22bd261543b4898a73824026d96770e5351209c7d07a0b1314762b1f6e4", size = 11250, upload-time = "2026-03-26T14:20:37.108Z" }, ] +[[package]] +name = "opentelemetry-util-http" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/3c/f0196223efc5c4ca19f8fad3d5462b171ac6333013335ce540c01af419e9/opentelemetry_util_http-0.61b0.tar.gz", hash = "sha256:1039cb891334ad2731affdf034d8fb8b48c239af9b6dd295e5fabd07f1c95572", size = 11361, upload-time = "2026-03-04T14:20:57.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/e5/c08aaaf2f64288d2b6ef65741d2de5454e64af3e050f34285fb1907492fe/opentelemetry_util_http-0.61b0-py3-none-any.whl", hash = "sha256:8e715e848233e9527ea47e275659ea60a57a75edf5206a3b937e236a6da5fc33", size = 9281, upload-time = "2026-03-04T14:20:08.364Z" }, +] + [[package]] name = "orderedmultidict" version = "1.0.2" @@ -2574,8 +2879,8 @@ name = "powerfx" version = "0.0.34" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cffi" }, - { name = "pythonnet" }, + { name = "cffi", marker = "python_full_version < '3.14'" }, + { name = "pythonnet", marker = "python_full_version < '3.14'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/9f/fb/6c4bf87e0c74ca1c563921ce89ca1c5785b7576bca932f7255cdf81082a7/powerfx-0.0.34.tar.gz", hash = "sha256:956992e7afd272657ed16d80f4cad24ec95d9e4a79fb9dfa4a068a09e136af32", size = 3237555, upload-time = "2025-12-22T15:50:59.682Z" } wheels = [ @@ -2613,6 +2918,8 @@ dependencies = [ { name = "azure-core" }, { name = "azure-cosmos" }, { name = "azure-identity" }, + { name = "azure-monitor-events-extension" }, + { name = "azure-monitor-opentelemetry" }, { name = "azure-storage-blob" }, { name = "azure-storage-file-datalake" }, { name = "azure-storage-queue" }, @@ -2646,6 +2953,8 @@ requires-dist = [ { name = "azure-core", specifier = "==1.38.0" }, { name = "azure-cosmos", specifier = "==4.15.0" }, { name = "azure-identity", specifier = "==1.26.0b1" }, + { name = "azure-monitor-events-extension", specifier = "==0.1.0" }, + { name = "azure-monitor-opentelemetry", specifier = "==1.8.7" }, { name = "azure-storage-blob", specifier = "==12.28.0" }, { name = "azure-storage-file-datalake", specifier = "==12.23.0" }, { name = "azure-storage-queue", specifier = "==12.15.0" }, @@ -3184,7 +3493,7 @@ name = "pythonnet" version = "3.0.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "clr-loader" }, + { name = "clr-loader", marker = "python_full_version < '3.14'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/9a/d6/1afd75edd932306ae9bd2c2d961d603dc2b52fcec51b04afea464f1f6646/pythonnet-3.0.5.tar.gz", hash = "sha256:48e43ca463941b3608b32b4e236db92d8d40db4c58a75ace902985f76dac21cf", size = 239212, upload-time = "2024-12-13T08:30:44.393Z" } wheels = [ @@ -3389,6 +3698,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/53/ddb8b8fa96367976cf52bb0610ffd529bd7d2795b2e4c1724724d071718c/requests-2.34.0.dev1-py3-none-any.whl", hash = "sha256:c8749aeb3c4b204f80fd288f7507378c9afe66a3f189fb43fd77ea33e74d7564", size = 73077, upload-time = "2026-05-03T20:21:40.509Z" }, ] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, +] + [[package]] name = "requests-toolbelt" version = "1.0.0" @@ -3985,66 +4307,66 @@ wheels = [ [[package]] name = "wrapt" -version = "2.2.0rc11" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/78/d0/9c3b43631321c0fe61b9e2873b0542165a8f90393f49006f115d1e06eefc/wrapt-2.2.0rc11.tar.gz", hash = "sha256:fee2cf69591f32f16e5242ae4909bc9f43c66688c1f73f837c9c81313771ceba", size = 125088, upload-time = "2026-04-24T10:15:19.951Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cd/15/7b058ae7e7fe5bc042b3b0904a06a4038143113aca92684eed3e02f6a663/wrapt-2.2.0rc11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b9f264afa18c5fc12983698b4dcf66bdd521ff268f40d34db575b651c891d1e", size = 80950, upload-time = "2026-04-24T10:16:57.967Z" }, - { url = "https://files.pythonhosted.org/packages/b3/5e/605d3425b7533ee881ad4a3130699a7c48aba6e7134975438530ae7610a3/wrapt-2.2.0rc11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b5eddef9db8eb34e277b2dcf9ab4bd7898fcd8246380516cc34180496655e335", size = 81604, upload-time = "2026-04-24T10:17:04.913Z" }, - { url = "https://files.pythonhosted.org/packages/9b/e2/91be7dcc9519fc35ce46b3b7955219ff99e219cd62eb43de89fa4d6653b4/wrapt-2.2.0rc11-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:469c67a6326a6a269f2cf391035bec7b4ab1aeac6acf56645e3b6c721a3153cc", size = 168642, upload-time = "2026-04-24T10:17:20.819Z" }, - { url = "https://files.pythonhosted.org/packages/f7/11/65135058543b659be3ae772c7510f56792968a752dfc617ac2577b0a5a5d/wrapt-2.2.0rc11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b4cd66a906667355bb72fbb7e2a7a1fe688671d6d68dad7efdbfa22ae165366", size = 170942, upload-time = "2026-04-24T10:16:25.138Z" }, - { url = "https://files.pythonhosted.org/packages/d3/f8/83a59d35982ef5f26a2f0301d2f642b2063be8d20ec276b718a3f951b52c/wrapt-2.2.0rc11-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5df0ac11591b554fb028c93586cfb6991e3cefd5d8d5d0b0f6881dd1b3c1814c", size = 159962, upload-time = "2026-04-24T10:15:58.563Z" }, - { url = "https://files.pythonhosted.org/packages/0a/1a/8f905774a951cc976d4a772c15329a3f542105265c24d6ce1e718d65dbaf/wrapt-2.2.0rc11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d74d14da54197a9b35eb6992212f7cdd6106e3bc75e69e2a9dc031e6ec806d9f", size = 168785, upload-time = "2026-04-24T10:17:35.07Z" }, - { url = "https://files.pythonhosted.org/packages/c8/a3/61f54ea74c6d797271aa8e312bb5e0c98ef0cf39110aac863f579f2b5a83/wrapt-2.2.0rc11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:b3c93ee7a1721234c00020662ecddab4fdd248fa8a2a12c9f0268c4bad085855", size = 158119, upload-time = "2026-04-24T10:16:54.622Z" }, - { url = "https://files.pythonhosted.org/packages/44/fc/4c36aaf560f273d53467a38fd91fda8ecc5aa5b4c96e495a9b33022d6daa/wrapt-2.2.0rc11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ee95ef8d643ddc7fbb76d8a1bffde5bb8c0f8a707d821fa9e95fde76e870d5d", size = 167068, upload-time = "2026-04-24T10:16:40.351Z" }, - { url = "https://files.pythonhosted.org/packages/5a/6c/5aeae8600e23ddd5e31f096351912caf5548bbff3799f846799d6b86f9d3/wrapt-2.2.0rc11-cp312-cp312-win32.whl", hash = "sha256:36a2f254c1e183d4404d8b816d453f639c1422d9a374cffb6e9e90e5ccb2a40c", size = 77804, upload-time = "2026-04-24T10:16:21.994Z" }, - { url = "https://files.pythonhosted.org/packages/99/70/c39167f608e1c8b03bf607e8902cbd663961a007d9eb7e2847bd96ce696c/wrapt-2.2.0rc11-cp312-cp312-win_amd64.whl", hash = "sha256:c1f27bb9866a53445fe28dabb5e0770c8a625f00072537f5981afd08f5188e64", size = 80769, upload-time = "2026-04-24T10:16:56.208Z" }, - { url = "https://files.pythonhosted.org/packages/29/91/ecfff0b6dbdd1598b347baacbe6c57f9b4ffc67bf737618baef8cc3be36a/wrapt-2.2.0rc11-cp312-cp312-win_arm64.whl", hash = "sha256:bf0e904769d96b1d68971ae4015771e2ceecf1cbba2dff606468cc312444a258", size = 79038, upload-time = "2026-04-24T10:15:25.801Z" }, - { url = "https://files.pythonhosted.org/packages/53/b8/de8018f7bafa5b550654f3eb564645c7c5e7bf853e0c3f90546a7ec49e54/wrapt-2.2.0rc11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ca1d5666fc5a26452ff369085b605a8f791532eed00c62af272ea330d636c16", size = 80773, upload-time = "2026-04-24T10:16:06.936Z" }, - { url = "https://files.pythonhosted.org/packages/49/56/2afbe0bdb0f31a6a29292eadd037ebe4d2ad5a9dc51cd375b5e82589d332/wrapt-2.2.0rc11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f759fff25b56481ae16aadb0c91dfcd77ff1e66be6da3bd5664766ed4adde59f", size = 81303, upload-time = "2026-04-24T10:16:23.695Z" }, - { url = "https://files.pythonhosted.org/packages/15/c5/e30fe36a5f5f4a4dbc3bceba9d0fa5271ec8d7639bc59f096bac47dfd198/wrapt-2.2.0rc11-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0d4445e090535c73af539aad56f681d8a0e40bc02d4717d5ea1c39fcbf367bc1", size = 166728, upload-time = "2026-04-24T10:15:27.724Z" }, - { url = "https://files.pythonhosted.org/packages/f5/b0/cae82622e6e834eaa40b21a16918d7cc8bec32550d2cc9aa0d386b8697f5/wrapt-2.2.0rc11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb8690e53d5801bf8a79446dd3898a957f34d2384593daf78dd181fda716319e", size = 166782, upload-time = "2026-04-24T10:17:16.491Z" }, - { url = "https://files.pythonhosted.org/packages/4c/41/20556af3b9f9c605c0eac3432c724135064b6ed4a21b475dd8666c636276/wrapt-2.2.0rc11-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:25ef8e95c76c82821342336b1979e11583ff0bca1d4402e9a7f9a9b689d81f65", size = 157843, upload-time = "2026-04-24T10:15:31.307Z" }, - { url = "https://files.pythonhosted.org/packages/15/d2/8711d5c936d8328dbaf650291f59a979ce14ff49b52d8b6aabfa8cfb8acb/wrapt-2.2.0rc11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9d4d055acfe55633ff260d8c268a230ddf6cd408ac5a3bacfaeaa59ca13431cb", size = 165748, upload-time = "2026-04-24T10:17:03.385Z" }, - { url = "https://files.pythonhosted.org/packages/86/fa/f4dc4f9b1c8c8d61b7efbf4d53e1aa6193073199749a796115f2d8f1b0e6/wrapt-2.2.0rc11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:4d97d727dcd414391c476dfdfb77ebe45082d9499d8b1cbc1149a96991d5e2b2", size = 156532, upload-time = "2026-04-24T10:16:20.511Z" }, - { url = "https://files.pythonhosted.org/packages/75/4d/4d876bf16b89278b269550aece6520406d58669f0651e6852b3c0293336e/wrapt-2.2.0rc11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9af734e2a6d48ef84e5c6f48e5b18d6e5904447aa9298d1e3457ba9d8865689d", size = 165901, upload-time = "2026-04-24T10:15:53.283Z" }, - { url = "https://files.pythonhosted.org/packages/4e/6a/54c60b18148ca3d017137de3f902d308bdb4ec31f5a1f9465f03afc98724/wrapt-2.2.0rc11-cp313-cp313-win32.whl", hash = "sha256:ce1dbe00d7aad0205d469f85b69f5dc1532e8e85f4989cc1258376c6b98d8246", size = 77763, upload-time = "2026-04-24T10:17:31.486Z" }, - { url = "https://files.pythonhosted.org/packages/a1/8d/9c7e7d01704e8a2b6d49c4ccc641a4dca8f37700f22a888fd3ba5937bc41/wrapt-2.2.0rc11-cp313-cp313-win_amd64.whl", hash = "sha256:abfb2f5f455783a03390a5b020efa7590a0c9a9059cc5b6a6badb5f15dad38c9", size = 80678, upload-time = "2026-04-24T10:17:23.695Z" }, - { url = "https://files.pythonhosted.org/packages/a4/1a/645faa10a61f8d75806fa2031f509fd0720142a1f24cedb7e79bcc97c03a/wrapt-2.2.0rc11-cp313-cp313-win_arm64.whl", hash = "sha256:3b3efc5b5325e7f63982a52246f58b2c3eb0afc360017dad22485cfc7ecc6b40", size = 79036, upload-time = "2026-04-24T10:16:26.444Z" }, - { url = "https://files.pythonhosted.org/packages/1c/fa/806052da337fc153603eb461c6ffd67af4ec4e52adaa60a882af2a0d786c/wrapt-2.2.0rc11-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:923490056004b62079d810a8214457329c4bcf9eecf6e3a3508fd387c0d0ed4c", size = 82706, upload-time = "2026-04-24T10:17:22.198Z" }, - { url = "https://files.pythonhosted.org/packages/16/60/ee692cec34f3b91e904e358dc29b4554d00ef47aef71cad1166a89f3f1d4/wrapt-2.2.0rc11-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:785f93eb4a8a417ab732338708dcf0d2cca3ac5d1df0b749ba9d79a7d9d8c3fa", size = 83277, upload-time = "2026-04-24T10:15:29.471Z" }, - { url = "https://files.pythonhosted.org/packages/51/01/4ea8f9e9098277dce4953f2ac2cfac7e9dff3b48de312685c5b8cb4ab237/wrapt-2.2.0rc11-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f49955a6ad62c2011d2049d4cb80d903aa7fa0c75a4a092a0e12d26f6234d005", size = 203709, upload-time = "2026-04-24T10:17:24.938Z" }, - { url = "https://files.pythonhosted.org/packages/7e/ea/a03697f3b18b5bb07c304ba16027707dcf9beded31c8f0898db0b969c9ba/wrapt-2.2.0rc11-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:00fc51db6261d47b97b12d2b377347c15e3ffccc3a18bc1ac2bc296e420088aa", size = 209622, upload-time = "2026-04-24T10:17:06.603Z" }, - { url = "https://files.pythonhosted.org/packages/4b/06/e41ef42c16adb7ad783d03854d6ca90353780d468f21ff9b52a1ffa772d8/wrapt-2.2.0rc11-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8d4cc5fceffc47390dc1ce0acd2163e7d6d1f145ee57d489e7f099d1876c6e2e", size = 194637, upload-time = "2026-04-24T10:15:09.927Z" }, - { url = "https://files.pythonhosted.org/packages/23/8c/099a117f155423dd0be7d90c46b63757fb6e6d62f71950c36fcb966040b4/wrapt-2.2.0rc11-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60b7ac5a746713f034d3b0e6c65f215d90f872b170b82efa118df8af31838799", size = 205321, upload-time = "2026-04-24T10:16:31.031Z" }, - { url = "https://files.pythonhosted.org/packages/33/8f/0b652dd807fd25769cc8b19fe82bc39bcf65b225c69be038a92c3d5e7518/wrapt-2.2.0rc11-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:cb85f8211d1bda71cf8b6f9a425e0824573a799c293a6b79e906c33d7fb296ba", size = 192095, upload-time = "2026-04-24T10:16:32.605Z" }, - { url = "https://files.pythonhosted.org/packages/62/61/fb913f197e647fbbc49399c3fbb9addc1f6cee953185da1014bbad6c5c2e/wrapt-2.2.0rc11-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:eb3dd184ab1ad3f4b0692a599dc91ebdf2e4a689e8d4a73014b5c7e333c8ee8d", size = 199180, upload-time = "2026-04-24T10:16:08.82Z" }, - { url = "https://files.pythonhosted.org/packages/47/82/7cb3d889191203250d3ab9335fac20c0eb70ea25146d68789d0ff74a7eb1/wrapt-2.2.0rc11-cp313-cp313t-win32.whl", hash = "sha256:08dc26d3d7a6efb21fdd644b91235a509ad9cdaa158a25fb5c3eff0d64e31450", size = 79380, upload-time = "2026-04-24T10:15:11.961Z" }, - { url = "https://files.pythonhosted.org/packages/33/ee/63cbd676d011a78077ad03062c3b32deeac07a52bf61078b4a2964fc006b/wrapt-2.2.0rc11-cp313-cp313t-win_amd64.whl", hash = "sha256:3883b31768f3381c96b8f59c75cf0f3070b457d44e2ed9ed41896f0725df4a35", size = 82963, upload-time = "2026-04-24T10:15:43.899Z" }, - { url = "https://files.pythonhosted.org/packages/5a/7c/dfd8135e5988740eff4e41a87155508db28bc5877c0fdc27942354304d31/wrapt-2.2.0rc11-cp313-cp313t-win_arm64.whl", hash = "sha256:9dd990000f133f2961a5bfee6a4aad07ad075792122df8e797b0854d31373a58", size = 80224, upload-time = "2026-04-24T10:17:14.992Z" }, - { url = "https://files.pythonhosted.org/packages/6d/61/fbf6a0f4193b9beef222a14638d176d346532971bc7df499d120538e71ce/wrapt-2.2.0rc11-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:6decf7275b26ed3397b4a3beefe2436ebd75e2348c15f75e3a5223e65231a1d7", size = 80817, upload-time = "2026-04-24T10:17:17.818Z" }, - { url = "https://files.pythonhosted.org/packages/af/5c/02ee0ddd25f2e8d7f1b61646858ea48748c08603d38b45192b32c2bc4765/wrapt-2.2.0rc11-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:21686c1d2625346c90a6a8abb019ae2e985f77b51d4b28be9290dcbde0036f81", size = 81398, upload-time = "2026-04-24T10:16:41.631Z" }, - { url = "https://files.pythonhosted.org/packages/0c/a6/41ff243e781d127e429f79f2e8ecd907efeb0bb990412b7bb05c945ef57d/wrapt-2.2.0rc11-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5481f1406125cc9cdffd8c054e1ba45213f58a28d62cb5854654bc37dbc1ffb9", size = 166614, upload-time = "2026-04-24T10:16:37.217Z" }, - { url = "https://files.pythonhosted.org/packages/68/28/47ae8e1bfe412762f08b97a824ee7d2e4bb9284951a1e280921fe112c414/wrapt-2.2.0rc11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbc9681f2adaf789cf04688430169969c206c9b67904feba092cea53377f0919", size = 166215, upload-time = "2026-04-24T10:15:05.466Z" }, - { url = "https://files.pythonhosted.org/packages/cf/c0/67b6f568ae1858983c1702f303be4bb009bc551b3a48c2e52161bd60056e/wrapt-2.2.0rc11-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fb24cc8134bd03be435e0272c692fbe7450658939291501c3496c65f155c1b7b", size = 157651, upload-time = "2026-04-24T10:15:33.278Z" }, - { url = "https://files.pythonhosted.org/packages/f6/48/88982438be70262037eaca70dd128f03abd9600694d114c8671e8cde4c78/wrapt-2.2.0rc11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:294f8ed73cc4f498150903553f50f582772cc194c72fc7c60382c7de30410ecf", size = 165992, upload-time = "2026-04-24T10:16:18.995Z" }, - { url = "https://files.pythonhosted.org/packages/80/32/fa7f70286cdc235af0239535d8ec5da4c2049c83e0ec2b2d6c44d89231eb/wrapt-2.2.0rc11-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:8bd9c2b5d8f799aca53a0a1a8f81355447c42b00826f93fc7a1ca20325c2139e", size = 156394, upload-time = "2026-04-24T10:15:35.033Z" }, - { url = "https://files.pythonhosted.org/packages/9b/f7/b58a85a4fd651ad540eda37eedcbe3a4abdc70c1981ea2674eee8b0f005d/wrapt-2.2.0rc11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7c4076d31907715869df3a97366d114e02f909d3e41ce0b1c3b6b00df82a6226", size = 165448, upload-time = "2026-04-24T10:17:37.199Z" }, - { url = "https://files.pythonhosted.org/packages/f1/87/904307947657b2b1cce7304968c69e72fa6195e87435288e970942e8a385/wrapt-2.2.0rc11-cp314-cp314-win32.whl", hash = "sha256:d0fe901e422671d45c09bd1a8a5f36130eeea1711ec10a0c5e017c7af4a4d044", size = 78284, upload-time = "2026-04-24T10:17:19.081Z" }, - { url = "https://files.pythonhosted.org/packages/7f/06/d0de22123f64259518baa385b2e7fc8c5913547cca37072174f4bc2f6f23/wrapt-2.2.0rc11-cp314-cp314-win_amd64.whl", hash = "sha256:8109f72963b6b6e15fa8511be18bbb3a369f5033b444b5b97c853deb813b0553", size = 81086, upload-time = "2026-04-24T10:16:38.819Z" }, - { url = "https://files.pythonhosted.org/packages/b4/b2/44f0e04cadb1f57890235ed2aa57e2519518ccbb1d1bb88bcaf80cc18693/wrapt-2.2.0rc11-cp314-cp314-win_arm64.whl", hash = "sha256:51c87d3285669347383705118347b7f446cdc23cb13cc4b0baed5b04032df106", size = 79516, upload-time = "2026-04-24T10:16:14.585Z" }, - { url = "https://files.pythonhosted.org/packages/7e/b8/015cd6157537d9c80f60783fc6df2240af3b12b382732ab7eeecb46febff/wrapt-2.2.0rc11-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:703b2f8c21d1be1027742ba4f34536f5b5717e34077bb04e09b205eb6c493a3a", size = 82801, upload-time = "2026-04-24T10:15:54.77Z" }, - { url = "https://files.pythonhosted.org/packages/2e/ba/cb228a7c98be16d4920b5230693cadceb3feadbd6e658466dc79f0de0049/wrapt-2.2.0rc11-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1bfe526ca947c4d830bb0a18caabc5d1aee52a7714cfe898981434a2e03f1002", size = 83276, upload-time = "2026-04-24T10:16:12.756Z" }, - { url = "https://files.pythonhosted.org/packages/0e/b7/15976c633431310c955c2a935211b734e236136d9f4475e2b5212536dadc/wrapt-2.2.0rc11-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:700978189597d950cf7714fb50923afa5c98f931da804bafbc5b41d83dcbb0a8", size = 203698, upload-time = "2026-04-24T10:15:56.75Z" }, - { url = "https://files.pythonhosted.org/packages/6a/71/45592fa1517ddabb5ddef0331f4938077e3c672e59de5a352341579e4349/wrapt-2.2.0rc11-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78a7447b83cfb007b2b09e7f32131b43a9a662072701fed68cec42a835025214", size = 209628, upload-time = "2026-04-24T10:16:43.389Z" }, - { url = "https://files.pythonhosted.org/packages/95/b5/86f46e4a1c7cfbe456984be10593b5a871aa69e853b3ef5640021e3d4f0d/wrapt-2.2.0rc11-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4853b4ed7c806985bc366a5b3600b83a7c7c4609f8ea5599df45ddc94a32db94", size = 194677, upload-time = "2026-04-24T10:17:09.417Z" }, - { url = "https://files.pythonhosted.org/packages/1b/61/28184784b6ea7b17e6bd5b3253055665c907feb1fbacc7633908b9e82738/wrapt-2.2.0rc11-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:77cc036f79eaf72861329bab07f180b9ca192e3b17d17f3466b88b4f04372b33", size = 205291, upload-time = "2026-04-24T10:15:39.848Z" }, - { url = "https://files.pythonhosted.org/packages/af/c7/8afd82fc060d1e958a958c0be505cf983da0f7949b05a55c9cc8c1847490/wrapt-2.2.0rc11-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:bd6dc7339f6eb2b3e5556125d202bb2172ea8c9ebe68f0abbca67e6e1661a3c8", size = 192127, upload-time = "2026-04-24T10:16:05.053Z" }, - { url = "https://files.pythonhosted.org/packages/c6/80/18ae952432ffec22ae9e1f37cec4570fb3f321c83d05527813dae31fcc26/wrapt-2.2.0rc11-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b5623b1f2495cae98baadb2f4e4f37323128050c43b7e994047cf3618a5227af", size = 199157, upload-time = "2026-04-24T10:15:16.586Z" }, - { url = "https://files.pythonhosted.org/packages/f8/94/291693ae8e6706a08ed5e9368d883f14da8aab408bfa88117f4945c0db7c/wrapt-2.2.0rc11-cp314-cp314t-win32.whl", hash = "sha256:e6f4e23aadd29401414ae9c8ee12189cf93ceac63814bb7c2e54e38d42b1da79", size = 80146, upload-time = "2026-04-24T10:16:10.093Z" }, - { url = "https://files.pythonhosted.org/packages/40/08/cee79e056b80f510bf30a86b2f44649a2aa07e0331e77afa226df18ab9d6/wrapt-2.2.0rc11-cp314-cp314t-win_amd64.whl", hash = "sha256:4c03de92788b3b9f7d862212d93c8b8f19328a97f1371e9c8560ce6178b21d48", size = 83770, upload-time = "2026-04-24T10:17:32.965Z" }, - { url = "https://files.pythonhosted.org/packages/3e/53/8f4348643e9b3fef1efede571b0f3aa282846e73b1e2bd16289d9cbba180/wrapt-2.2.0rc11-cp314-cp314t-win_arm64.whl", hash = "sha256:be23d203b7cbbf35147efae0db17feffee59d540138989cd3838c233505db8a3", size = 80650, upload-time = "2026-04-24T10:16:11.574Z" }, - { url = "https://files.pythonhosted.org/packages/42/d9/bee80519aaf88101996d653050e6d78aa3a63d87d6f735fd63955414f7c9/wrapt-2.2.0rc11-py3-none-any.whl", hash = "sha256:48a0ea119e937ec94452b4b6a4301bb6a435f18262298e141cc49b7e495df782", size = 60936, upload-time = "2026-04-24T10:16:48.108Z" }, +version = "1.17.4rc1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/61/e0/c6c3e66c6ca371728de87b44102b61f3fdacc03c8b0b1e4ac5f30d71c5ce/wrapt-1.17.4rc1.tar.gz", hash = "sha256:19c0363cb46f42cf5536c7b9d9c921cc1ae24e55fe4d45c3a19315e9f2aa8964", size = 55653, upload-time = "2026-03-06T05:27:09.446Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/09/f4b0c4c5098ee0a4e89542d259f2ce2a15124efb43cbd0aae442d284d4f8/wrapt-1.17.4rc1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3ed597307c29facdfe47eb5521123b121d189a2bbae3e34dfaf10a1f8ebb9bc1", size = 39033, upload-time = "2026-03-06T05:28:00.852Z" }, + { url = "https://files.pythonhosted.org/packages/c6/61/779692b7228a9e2f430edc2137737821f7e249f73be30d589ddc3c92532c/wrapt-1.17.4rc1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:97a6d6b40c2347fc6ea5017c715a4ac0a29716ae17b70060f24c1ca22757289a", size = 39296, upload-time = "2026-03-06T05:27:42.668Z" }, + { url = "https://files.pythonhosted.org/packages/9d/20/6cf5d4cae58fd19a0b89f977aaac957795930123c917d44536d6a04c0745/wrapt-1.17.4rc1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c5e9e219bd65d89356da8af2168fb23e3480736949ffad617d6d73a16039a5dc", size = 88141, upload-time = "2026-03-06T05:27:44.037Z" }, + { url = "https://files.pythonhosted.org/packages/77/fb/eba8b87158819858fcd2a6d1c80276a22085292866554bb82faa731e042f/wrapt-1.17.4rc1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:647fae8af1ac1789023ba267fd84522096db737a522597b53fbf3fe2b45482db", size = 88256, upload-time = "2026-03-06T05:27:40.31Z" }, + { url = "https://files.pythonhosted.org/packages/a4/b5/d547471fd5eb77280157f70698ae5e91913d6fecc1bc2eb9a90ccb7e27f1/wrapt-1.17.4rc1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4d8ed3e2538fbacd8b62462c58676aabba38ca8e9e8ad6c11ed94ec0db926e29", size = 84248, upload-time = "2026-03-06T05:28:15.363Z" }, + { url = "https://files.pythonhosted.org/packages/46/ce/cc8e75f1bcc230031037940cf33e0361fca3229296ebf706459598ef00da/wrapt-1.17.4rc1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ae3dabc85555f5a8330b324d466d577f3cc60669150fe8e381719b5b680113b0", size = 87208, upload-time = "2026-03-06T05:28:23.392Z" }, + { url = "https://files.pythonhosted.org/packages/4a/62/f879b4f4c320049708ba5e02fda0756dd93c78e8831ef881323074594404/wrapt-1.17.4rc1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:e27719a9b75517191cbe23e5f54dd410f39076d6e8369c259a1b990c6ac924f4", size = 83645, upload-time = "2026-03-06T05:21:23.051Z" }, + { url = "https://files.pythonhosted.org/packages/35/c8/ff8bf340cf45aaf300ac864772085a1fac27a265b6565081da91294b0176/wrapt-1.17.4rc1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c53fcd7cf09a223eaab9f425dda6e38929f4534112df0def102ffa5ef9da6086", size = 87828, upload-time = "2026-03-06T05:21:20.84Z" }, + { url = "https://files.pythonhosted.org/packages/2e/ed/e3c750db3c19eae7bcd2506291d85595127637b37e180be72cf65951f779/wrapt-1.17.4rc1-cp312-cp312-win32.whl", hash = "sha256:7062f45cc386554e94521da25cf1b89b65e72ff5e1b62c2c6735a5c4dfe61b19", size = 36803, upload-time = "2026-03-06T05:21:07.939Z" }, + { url = "https://files.pythonhosted.org/packages/58/07/a58ebce46e2258989c4147b26bcf4926aeebc9aea2a21f581a3a6a4ba3ac/wrapt-1.17.4rc1-cp312-cp312-win_amd64.whl", hash = "sha256:aaf599f8535cbc8c7c016763e72486cfeae933382f23b2c1b632952bee4f11ae", size = 38968, upload-time = "2026-03-06T05:21:12.013Z" }, + { url = "https://files.pythonhosted.org/packages/c3/52/f3a464bc629690b8d0551be8187fdbce57e26337eabc11acda67c2bf18bd/wrapt-1.17.4rc1-cp312-cp312-win_arm64.whl", hash = "sha256:488c903c475c54ef062f6a2c0c49dffd608d501bc8d05e061ff19eb794f31fb2", size = 36940, upload-time = "2026-03-06T05:26:57.759Z" }, + { url = "https://files.pythonhosted.org/packages/94/c9/bfb0840b9d1a3e9478c9d6bd1b5e2fb82fdca7c046bc10e8c44f9273cd46/wrapt-1.17.4rc1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5d1a4c658bea05c1b22ae374f74c25b400535f3dccbf795b121153d5628216f0", size = 39037, upload-time = "2026-03-06T05:28:18.986Z" }, + { url = "https://files.pythonhosted.org/packages/41/82/1e234ad6b64cd705557a0a682dbdce499db082a1932f9c95f200ed0843da/wrapt-1.17.4rc1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:491d11b84ac47568ee88777304c42d047d33307ec82162235d7e8261ee983eaa", size = 39295, upload-time = "2026-03-06T05:28:26.945Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d9/2143f5825ef49046b376a2d9136621a7aa66a9e93ccc82b162d9e79ab678/wrapt-1.17.4rc1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:16426870299de6370b93760a50ae5bc813548f4666e6e515dcce3ec7601b9c59", size = 88175, upload-time = "2026-03-06T05:27:25.018Z" }, + { url = "https://files.pythonhosted.org/packages/f3/dd/8add4d24770a2e960f2bb8cb062a83f880a6aa91664b01d6de1e62917e45/wrapt-1.17.4rc1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a35158b0bf2c2d2033eba3c56832e803d73658dc4e92f14f1ea4c92ab0dfaafe", size = 88320, upload-time = "2026-03-06T05:27:37.675Z" }, + { url = "https://files.pythonhosted.org/packages/9c/34/c47fd4837b07b9f8ae8cfe749ea0d6fe5ea506c2d324850f3067f5f66ca2/wrapt-1.17.4rc1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a33bff65de96bc32f7f1df1492c2808068070ed0f42f1fcef2b47846f6a6a03a", size = 84302, upload-time = "2026-03-06T05:21:09.738Z" }, + { url = "https://files.pythonhosted.org/packages/9d/b0/20542954e5929383f55da30d4b9a47764866a2d253d8bea0a5d366ea1e7c/wrapt-1.17.4rc1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:931dae558932c8ba8e4de77ce92ed505fe5a8fd9dab66a2cbbc9d5d3a3a32bb4", size = 87210, upload-time = "2026-03-06T05:27:39.024Z" }, + { url = "https://files.pythonhosted.org/packages/d6/ff/37a5295d7f01b270186191035f67142f3052882210f673b9a62f82fcfc9f/wrapt-1.17.4rc1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:42ea3a5f62f5980031aaf6e28074cb17cea8df06cb828bcd2882d525f7ccc2f9", size = 83709, upload-time = "2026-03-06T05:27:17.469Z" }, + { url = "https://files.pythonhosted.org/packages/05/ff/11f668fba8ab6436c3a0167d0dc2aacb1c9fca675c1268a83d9106457b0b/wrapt-1.17.4rc1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b302dc5e126057f74b82223c3b19a41dfeead10292667be1538985ef75034f3b", size = 87866, upload-time = "2026-03-06T05:27:21.872Z" }, + { url = "https://files.pythonhosted.org/packages/fe/41/b7e49896146dd95fc8e9ecda84ede824c5d34105dd84aa5f4e108a0c137a/wrapt-1.17.4rc1-cp313-cp313-win32.whl", hash = "sha256:27bf0d37ebcd4a43e8369eaf60dd9ea45f30933a921453f61bd6476ffe39bbfb", size = 36810, upload-time = "2026-03-06T05:28:25.862Z" }, + { url = "https://files.pythonhosted.org/packages/86/8d/ba014ec122b07b6441eb9ed341514045a4c79677186623733be460c379b3/wrapt-1.17.4rc1-cp313-cp313-win_amd64.whl", hash = "sha256:22e85eab852e7182c41acef5f9d95d5d63a1b115910951fb38feccf67b514818", size = 38977, upload-time = "2026-03-06T05:26:55.473Z" }, + { url = "https://files.pythonhosted.org/packages/f4/ef/6561940fba308d086f5967827c63bce7dbf8c54717bc33c7f523f0018400/wrapt-1.17.4rc1-cp313-cp313-win_arm64.whl", hash = "sha256:5be27331b6eae2317350c4adee1cf92edc0866cd7db726f574f10c8db227c134", size = 36944, upload-time = "2026-03-06T05:27:00.79Z" }, + { url = "https://files.pythonhosted.org/packages/58/3e/1ab40e5f926d0650fd5b7e23cebcdd4eab6bed961ac6e7ed5307638ddd27/wrapt-1.17.4rc1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:474a45ee2dfa6bb8c1a2a63fbc91c53da010caece85464a334fdb9aabafb6ecb", size = 40438, upload-time = "2026-03-06T05:27:58.196Z" }, + { url = "https://files.pythonhosted.org/packages/af/26/8d288da55259592a9aff160af4192db56799a74d3389ce032f54c8c8b74c/wrapt-1.17.4rc1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3dcf7b65ca203123c8613ae609441812b53ae047495e72b0dc423e5d31510128", size = 40586, upload-time = "2026-03-06T05:27:03.329Z" }, + { url = "https://files.pythonhosted.org/packages/bf/d4/cd7b78cb59ea4d348a77906dfac3d30ed1c598732d9ee3cd8edcf7762bca/wrapt-1.17.4rc1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1713dac1faf01465058481dd07f7632847ca8867e77347527788aff0bdb32d8a", size = 108627, upload-time = "2026-03-06T05:27:28.884Z" }, + { url = "https://files.pythonhosted.org/packages/0b/26/6ae3790d46b56010f01dd74a207af7aebb7357b95487e222d1a6ad912f84/wrapt-1.17.4rc1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:094bef74a0ef4c04416775a4f1965b2a29847d6aafa935229c1bf9d18f1d8c58", size = 113179, upload-time = "2026-03-06T05:27:48.59Z" }, + { url = "https://files.pythonhosted.org/packages/9f/0e/c0b0b05de9ebf705ca8daa1e86c20a244ce0862f08faf1b23784d3abf766/wrapt-1.17.4rc1-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9e1f828a32b4e71b6349a00a0a3bcc9e41413e0005160fb70601b83cb171ce6e", size = 103238, upload-time = "2026-03-06T05:21:24.306Z" }, + { url = "https://files.pythonhosted.org/packages/ec/33/b2cd9f6b86bf322cb1711c6070b9efa6b28a8e8c063f56d165b30c8d1668/wrapt-1.17.4rc1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e24a05dae0ba49ce5f490bffc4e369a5770663c789c0bc862de8ac235b18394d", size = 110742, upload-time = "2026-03-06T05:27:46.881Z" }, + { url = "https://files.pythonhosted.org/packages/9c/f4/b9709eea1e0087c8ccb1c7a38076a76ec3eb6f0555f74c3a65eaadf5c987/wrapt-1.17.4rc1-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:7d00e2453975a6519cbdde4812234ab0183860011aae2316acbad46f3b8e84e9", size = 102364, upload-time = "2026-03-06T05:27:51.288Z" }, + { url = "https://files.pythonhosted.org/packages/87/c5/bd4a00aef4d4b1a7eff25456b2f9c15de8ec9a3f4ccf98f0acdb2c48c879/wrapt-1.17.4rc1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:89b5fe920975e4e63099aeb194b51ad0ad84b45995dada353aa1e5a551462fd0", size = 107013, upload-time = "2026-03-06T05:28:03.308Z" }, + { url = "https://files.pythonhosted.org/packages/86/73/aedee294890bde90b262b21156c20eb36450ee812a20384ea5df9ba49bbd/wrapt-1.17.4rc1-cp313-cp313t-win32.whl", hash = "sha256:c94efd8ca87b9333590b6ee0384a0863ad92b54646232396c3c8043b0d115d49", size = 38129, upload-time = "2026-03-06T05:27:12.074Z" }, + { url = "https://files.pythonhosted.org/packages/96/17/dbf146893d31705872d2e515cd2ef70e01e305aa441a1736cdeee856deb9/wrapt-1.17.4rc1-cp313-cp313t-win_amd64.whl", hash = "sha256:db3ea738ffd95b88a5874ed6c7d26ffad1b482a5b8036e7b4b667926d3d5d728", size = 40751, upload-time = "2026-03-06T05:27:18.843Z" }, + { url = "https://files.pythonhosted.org/packages/70/a1/2bafa54d3621ca0c8a0b7cd78150d6239e83553f8f2bf8e6fc17286bac34/wrapt-1.17.4rc1-cp313-cp313t-win_arm64.whl", hash = "sha256:d8f67707f553821691228bf3596bf60cf83e112c230ca4ebfae759feed20cf57", size = 38262, upload-time = "2026-03-06T05:26:54.324Z" }, + { url = "https://files.pythonhosted.org/packages/84/e2/203c4a94a4f2cb5bd1b2180261f213b6ecf386839d9c4a7b03b187e1d973/wrapt-1.17.4rc1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:4384529d0f82bcdebec1d01f7b714b31ea34ee1b43a8399df5ed0db443bf6551", size = 39210, upload-time = "2026-03-06T05:21:13.2Z" }, + { url = "https://files.pythonhosted.org/packages/b9/de/0f3940df4cf001cc79cfd321c7e7856e6cdeac4c53b8292b4d318884a9be/wrapt-1.17.4rc1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d665e1f4bdeb551c55a56fe076f3da2aa4acea9b5108723adf4347b9af17bb70", size = 39339, upload-time = "2026-03-06T05:28:28.027Z" }, + { url = "https://files.pythonhosted.org/packages/28/87/1b13a950ad90919078951cadc8c8418241f55f6355bc1b64420072453d2f/wrapt-1.17.4rc1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95be0b13dcde68f73921026c66b4bb464a299683365a7243b5db49f220e5463f", size = 87262, upload-time = "2026-03-06T05:27:30.624Z" }, + { url = "https://files.pythonhosted.org/packages/00/a9/c3015e3929b715ae2737eb332dc5e056bb0a3a450d26dca962dc93da8a32/wrapt-1.17.4rc1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7e86063ed1d5b46e2c6ac7c3c8c9bb1b47e47d3ceb804a93f566d1294810505", size = 88061, upload-time = "2026-03-06T05:27:33.243Z" }, + { url = "https://files.pythonhosted.org/packages/15/8f/83d676e926c2c6390e6019aacb3f598c929426d67d1d97d3ed26536a0ac9/wrapt-1.17.4rc1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c710707166eed80e37242d754a204f4c07b8f3ab8024b07d583f48024d260a05", size = 84543, upload-time = "2026-03-06T05:28:12.622Z" }, + { url = "https://files.pythonhosted.org/packages/87/8d/f48862187bcee1d7d0a6c2c8cf4830ecd9e06bf0d770e6efbd2a78b70dad/wrapt-1.17.4rc1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c85cf9d6017e5188697a5947dd76f29ba1c56707ea612173b1b1ee1bc27b9601", size = 87050, upload-time = "2026-03-06T05:27:31.958Z" }, + { url = "https://files.pythonhosted.org/packages/b3/34/1e3c265902f02b3c1644568be86ddc3cf0d76552723ae71b7ca11e10bdc3/wrapt-1.17.4rc1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:44edeaf45e144c2de1102427530790c32eeb0084451f7816a58d744d077e0b3c", size = 83965, upload-time = "2026-03-06T05:27:08.164Z" }, + { url = "https://files.pythonhosted.org/packages/ca/4c/24a7c0fa058212cb53a7f582c9631b1b9ce9d5a81400095c745a1cb7a4be/wrapt-1.17.4rc1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:201acefeff4fc6d497f411595c46f79eb91e562fa4883847db8148474a1e3d80", size = 86958, upload-time = "2026-03-06T05:28:24.737Z" }, + { url = "https://files.pythonhosted.org/packages/99/85/445569dc31ee7a23c199afae532a41cc2f446d434d288e7544b1a38fbd19/wrapt-1.17.4rc1-cp314-cp314-win32.whl", hash = "sha256:73016054d0e32a65fa5da708e839be3036c786416adca00a0444aec5837b1b83", size = 37276, upload-time = "2026-03-06T05:28:21.776Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a8/1636a670886dec6c59fa60a8112fc3fd56c194b23b07106dbee465af73c2/wrapt-1.17.4rc1-cp314-cp314-win_amd64.whl", hash = "sha256:66b0485668cff7bfac0eaccccb3a991dba3f0d5205d6bc5a9c69aa120b2b6ccf", size = 39405, upload-time = "2026-03-06T05:26:51.717Z" }, + { url = "https://files.pythonhosted.org/packages/b9/5e/9f820a1d60ea579b048a8486c319918fdf06b83cc37f67f8dd4c53b80df6/wrapt-1.17.4rc1-cp314-cp314-win_arm64.whl", hash = "sha256:2712e6caad2a5032d6496612eeca5cdb65fadd6da55c5f931d556ac656e3ebdd", size = 37367, upload-time = "2026-03-06T05:27:23.446Z" }, + { url = "https://files.pythonhosted.org/packages/14/92/617f98da4517f2bf2a63b1a929f5bec029292d6bd31c7fd79ee25d54635e/wrapt-1.17.4rc1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:3102bbdc650a7e8fd8672e51c6d204688fc75257e2d3c6a12172a8e05c2ab0cd", size = 40565, upload-time = "2026-03-06T05:26:50.47Z" }, + { url = "https://files.pythonhosted.org/packages/6b/80/8c4444c471d90f9cfe1b453e5bf605fccadb2d3399d2ed60ed3240c188b3/wrapt-1.17.4rc1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a3ef8f9aad3593f3b00527da3815e15941caf169c51da5da18e64d1949da3f29", size = 40585, upload-time = "2026-03-06T05:21:14.419Z" }, + { url = "https://files.pythonhosted.org/packages/a5/fb/c3938d7fef6ce445d32e5a757268adc4e5c298d1985dff95c535e1ceca38/wrapt-1.17.4rc1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:033b67f5cc44d992221617ea6be6f12d8857b90a5d0901738f4f6c92498d3298", size = 108671, upload-time = "2026-03-06T05:28:16.715Z" }, + { url = "https://files.pythonhosted.org/packages/ad/54/d5ae3c39c871ff63c973848558c1657fa09cf84c19e5242e25f57e8b251a/wrapt-1.17.4rc1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b6b3c400c7c7b6346e9d3d22f036443ff033fa924d472715d127f169e8f9e137", size = 113193, upload-time = "2026-03-06T05:27:16.153Z" }, + { url = "https://files.pythonhosted.org/packages/18/c0/37f69e1231e8cfd3e642ff24f002cd71cbe477fca2abe6ec43978426f09a/wrapt-1.17.4rc1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8b3a9ed0f966b6a199e251800f5ee895bb41694ad1bb92f19446cbb90e68cdec", size = 103256, upload-time = "2026-03-06T05:27:52.645Z" }, + { url = "https://files.pythonhosted.org/packages/e0/5b/71f5f63bb3c4bfa909ae320ebcf290250cd86207d54cdffc3b12c1a57b8a/wrapt-1.17.4rc1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:076702de22f5df07bfaeb67ac750aabe2167fd703ed60ac8e2edb42a082119e8", size = 110756, upload-time = "2026-03-06T05:26:59.375Z" }, + { url = "https://files.pythonhosted.org/packages/fe/52/6ef9887520e0038cacb97bfd4375a83e3cf947d82a11e4017af2a98647cb/wrapt-1.17.4rc1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:1374e2051eff90875b3331dc5930209807db9e03ba863c2a9009ab7ba77daa7c", size = 102369, upload-time = "2026-03-06T05:26:52.912Z" }, + { url = "https://files.pythonhosted.org/packages/8b/95/670237dcee12fb293cb4674f93db112806783a33cc8cc18fa64214c12614/wrapt-1.17.4rc1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6a02b14dfc3ded8f1be82d824628ccda63ac37d1833c8328adf7a6b019f6a230", size = 107045, upload-time = "2026-03-06T05:27:06.879Z" }, + { url = "https://files.pythonhosted.org/packages/1a/15/2ecc4112171d195ff1c4f0baf7d345ca5f0ec464381bc7024857b3db47d5/wrapt-1.17.4rc1-cp314-cp314t-win32.whl", hash = "sha256:2bdf836e6c8e8f26c85716c08a0063309a2d9362e090b499f32fc4de8f2c651d", size = 38809, upload-time = "2026-03-06T05:21:15.397Z" }, + { url = "https://files.pythonhosted.org/packages/d7/45/81fec744e8c88f6255a5ccc317997a01b1a08fa925b211e2078fa8bfbddf/wrapt-1.17.4rc1-cp314-cp314t-win_amd64.whl", hash = "sha256:f75df0a7f1dab354cd092ee9c466efb3556f87ecf103683cecc0f7488e9dbf77", size = 41427, upload-time = "2026-03-06T05:28:17.885Z" }, + { url = "https://files.pythonhosted.org/packages/3d/72/d6ecf86cb5f3574a55fd2ba58c6eca447bee90a8757f1f32fba4b14ff9d5/wrapt-1.17.4rc1-cp314-cp314t-win_arm64.whl", hash = "sha256:3e2f5e602d656b53118bfdc9d5d94b840069f1753923e48726f0bc02dd65deb8", size = 38531, upload-time = "2026-03-06T05:27:57.157Z" }, + { url = "https://files.pythonhosted.org/packages/29/b2/367cc462b6ad84bfb7a93b00f5c4b01c7bc880a0e7ce36c1a3900eee153a/wrapt-1.17.4rc1-py3-none-any.whl", hash = "sha256:9cc3fb27bc5f564895c967b9b06dd2b799ee107b33a7f8ad8b8346b5d6b35b60", size = 23719, upload-time = "2026-03-06T05:27:55.715Z" }, ] [[package]]