Skip to content

Commit 4b2f1f2

Browse files
feat(pii): add redaction timing metrics across sidecar and persist path
- Log per-request duration in the Presidio sidecar (/analyze, /anonymize) - Add durationMs to the mask-batch endpoint log line - Emit per-execution PII redaction timing (stringCount, totalBytes, durationMs, scrubbed)
1 parent f5116f4 commit 4b2f1f2

3 files changed

Lines changed: 36 additions & 1 deletion

File tree

apps/pii/server.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
endpoints so a single PRESIDIO_URL serves both.
66
"""
77

8+
import logging
9+
import time
810
from typing import Any
911

1012
from fastapi import FastAPI
@@ -133,6 +135,9 @@ def build_analyzer() -> AnalyzerEngine:
133135
analyzer = build_analyzer()
134136
anonymizer = AnonymizerEngine()
135137

138+
# Propagates to uvicorn's root handler, so timing lands in the container log stream.
139+
logger = logging.getLogger("sim.pii")
140+
136141
app = FastAPI(title="Sim Presidio", docs_url=None, redoc_url=None)
137142

138143

@@ -163,18 +168,27 @@ def supported_entities(language: str = "en") -> list[str]:
163168

164169
@app.post("/analyze")
165170
def analyze(req: AnalyzeRequest) -> list[dict[str, Any]]:
171+
started = time.perf_counter()
166172
results = analyzer.analyze(
167173
text=req.text,
168174
language=req.language,
169175
entities=req.entities or None,
170176
score_threshold=req.score_threshold,
171177
return_decision_process=req.return_decision_process,
172178
)
179+
logger.info(
180+
"analyze lang=%s chars=%d entities=%d duration_ms=%.1f",
181+
req.language,
182+
len(req.text),
183+
len(results),
184+
(time.perf_counter() - started) * 1000,
185+
)
173186
return [r.to_dict() for r in results]
174187

175188

176189
@app.post("/anonymize")
177190
def anonymize(req: AnonymizeRequest) -> dict[str, Any]:
191+
started = time.perf_counter()
178192
analyzer_results = [
179193
RecognizerResult(
180194
entity_type=r["entity_type"],
@@ -197,6 +211,12 @@ def anonymize(req: AnonymizeRequest) -> dict[str, Any]:
197211
analyzer_results=analyzer_results,
198212
operators=operators,
199213
)
214+
logger.info(
215+
"anonymize chars=%d spans=%d duration_ms=%.1f",
216+
len(req.text),
217+
len(analyzer_results),
218+
(time.perf_counter() - started) * 1000,
219+
)
200220
return {
201221
"text": result.text,
202222
"items": [

apps/sim/app/api/guardrails/mask-batch/route.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
2727
const { texts, entityTypes, language } = parsed.data.body
2828

2929
try {
30+
const startedAt = performance.now()
3031
const masked = await maskPIIBatch(texts, entityTypes, language)
31-
logger.info('Masked PII batch', { count: texts.length })
32+
logger.info('Masked PII batch', {
33+
count: texts.length,
34+
durationMs: Math.round(performance.now() - startedAt),
35+
})
3236
return NextResponse.json({ masked })
3337
} catch (error) {
3438
// An unreachable/misconfigured Presidio sidecar makes maskPIIBatch throw; fail

apps/sim/lib/logs/execution/pii-redaction.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ export async function redactPIIFromExecution(
132132
): Promise<RedactablePayload> {
133133
const { entityTypes } = options
134134
const language = options.language ?? 'en'
135+
const startedAt = performance.now()
135136

136137
const units = REDACTABLE_KEYS.filter((key) => payload[key] !== undefined).map((key) => ({
137138
key,
@@ -151,12 +152,14 @@ export async function redactPIIFromExecution(
151152
if (collected.length === 0) return payload
152153

153154
let masked: string[]
155+
let scrubbed = false
154156
if (totalBytes > PII_MAX_TOTAL_BYTES) {
155157
logger.warn('Execution exceeds PII redaction ceiling; scrubbing text', {
156158
totalBytes,
157159
ceiling: PII_MAX_TOTAL_BYTES,
158160
})
159161
masked = collected.map(() => REDACTION_FAILED_MARKER)
162+
scrubbed = true
160163
} else {
161164
try {
162165
// Presidio runs only in the app container; the persist path also runs in
@@ -168,6 +171,7 @@ export async function redactPIIFromExecution(
168171
stringCount: collected.length,
169172
})
170173
masked = collected.map(() => REDACTION_FAILED_MARKER)
174+
scrubbed = true
171175
}
172176
}
173177

@@ -176,5 +180,12 @@ export async function redactPIIFromExecution(
176180
for (const unit of units) {
177181
result[unit.key] = transformUnit(unit.key, unit.value, () => masked[index++])
178182
}
183+
184+
logger.info('PII redaction completed', {
185+
stringCount: collected.length,
186+
totalBytes,
187+
durationMs: Math.round(performance.now() - startedAt),
188+
scrubbed,
189+
})
179190
return result
180191
}

0 commit comments

Comments
 (0)