diff --git a/manifests/nodejs.yml b/manifests/nodejs.yml index 43b9643e991..fa453e1447c 100644 --- a/manifests/nodejs.yml +++ b/manifests/nodejs.yml @@ -1920,6 +1920,7 @@ manifest: tests/otel/test_context_propagation.py::Test_Otel_Context_Propagation_Default_Propagator_Api::test_propagation_extract: incomplete_test_app (Node.js extract endpoint doesn't seem to be working.) tests/otel/test_context_propagation.py::Test_Otel_Context_Propagation_Default_Propagator_Api::test_propagation_inject: incomplete_test_app (Node.js inject endpoint doesn't seem to be working.) tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: *ref_5_99_0 + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP::test_128bit_trace_id_consistent_across_spans: missing_feature (128-bit trace ID not propagated to all spans in a trace) tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP::test_unsampled_trace: - weblog_declaration: nextjs: missing_feature (AppSec/IAST force-samples traces, overriding unsampled traceparent) diff --git a/manifests/python.yml b/manifests/python.yml index 3efde1b87d2..27a76853e56 100644 --- a/manifests/python.yml +++ b/manifests/python.yml @@ -1687,6 +1687,7 @@ manifest: uds-flask: v4.3.1 # Modified by easy win activation script uwsgi-poc: v4.3.1 # Modified by easy win activation script tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: v4.8.0 + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP::test_128bit_trace_id_consistent_across_spans: missing_feature (128-bit trace ID not propagated to all spans in a trace) tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/tests/otel/test_tracing_otlp.py b/tests/otel/test_tracing_otlp.py index e2082fd2da8..88995268123 100644 --- a/tests/otel/test_tracing_otlp.py +++ b/tests/otel/test_tracing_otlp.py @@ -2,6 +2,8 @@ # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2024 Datadog, Inc. +import base64 +import binascii import re from utils import context, features, interfaces, scenarios, weblog @@ -9,6 +11,26 @@ from utils.dd_constants import SpanKind, StatusCode +def _trace_id_to_hex(tid: str | None) -> str: + """Normalize an OTLP traceId field to a 32-char lowercase hex string. + + JSON Protobuf encoding emits the field as a hex string. Binary Protobuf encoding emits + 16 raw bytes, which the proxy renders as a standard-base64 string. Returns "" if the + input is empty or doesn't decode to a 16-byte ID. + """ + if not tid: + return "" + if re.fullmatch(r"[0-9a-fA-F]{32}", tid): + return tid.lower() + try: + decoded = base64.b64decode(tid, validate=True) + except (ValueError, binascii.Error): + return "" + if len(decoded) != 16: + return "" + return decoded.hex() + + # @scenarios.apm_tracing_e2e_otel @features.otel_api @scenarios.apm_tracing_otlp @@ -107,3 +129,65 @@ def test_unsampled_trace(self): # Assert that the span from this test case was not exported assert len(data) == 0, f"Expected no weblog spans in the OTLP trace payload, got {data}" + + def setup_128bit_trace_id_consistent_across_spans(self): + self.req = weblog.get("/make_distant_call", params={"url": "http://weblog:7777/"}) + + def test_128bit_trace_id_consistent_across_spans(self): + """Validates that every span in a trace carries the same full 128-bit OTLP traceId. + + DD tracers emit 128-bit trace IDs by default but the v04/v05 msgpack wire format only + carries the lower 64 bits per span; the upper 64 bits live in the `_dd.p.tid` meta tag, + which RFC #85 sets on the chunk root only. The OTLP exporter must apply that value to + every span in the chunk, otherwise child spans are exported with the upper 64 bits zeroed, + resulting in two distinct trace IDs in the OTLP backend. The /make_distant_call endpoint + produces a multi-span trace (server + client + nested server) so we can verify this + propagation. + """ + data = list(interfaces.open_telemetry.get_otel_spans(self.req)) + + # `get_otel_spans` yields the server span, identified by the user-agent header + assert len(data) >= 1, f"Expected at least one matching OTLP span, got {data}" + _, content, server_span = data[0] + + root_span_tid = server_span.get("traceId") + root_span_hex_id = _trace_id_to_hex(root_span_tid) + assert root_span_hex_id, ( + f"server span has unrecognized traceId encoding (expected hex or base64-bytes): {root_span_tid!r}" + ) + + # The upper 64 bits must be non-zero — if they're zero the tracer is either emitting + # 64-bit-only IDs (misconfiguration for this scenario) or, more importantly, the OTLP + # exporter dropped the high bits on the root span itself. + upper_hex = root_span_hex_id[:16] + assert upper_hex != "0" * 16, ( + f"server traceId upper 64 bits are zero (expected a 128-bit ID): {root_span_tid!r}" + ) + + # Group every span in the OTLP payload by the lower 64 bits of its traceId. + # If spans have a matching lower 64 bits, we expect them to have a matching full 128-bit traceId + anchor_lower_hex = root_span_hex_id[16:] + single_trace_spans = [] + for resource_span in content.get("resourceSpans", []): + for scope_span in resource_span.get("scopeSpans", []): + for s in scope_span.get("spans", []): + hex_tid = _trace_id_to_hex(s.get("traceId")) + if hex_tid and hex_tid[16:] == anchor_lower_hex: + single_trace_spans.append((s, hex_tid)) + + # The /make_distant_call trace produces a server entry span plus at least one child span. + # (the outbound HTTP client span). + assert len(single_trace_spans) >= 2, ( + f"Expected at least two spans in the same trace for the OTLP payload, found " + f"{len(single_trace_spans)}. The /make_distant_call endpoint must produce a multi-span trace " + f"for this test to exercise _dd.p.tid propagation." + ) + + mismatched = [(s.get("spanId"), hex_tid) for s, hex_tid in single_trace_spans if hex_tid != root_span_hex_id] + assert not mismatched, ( + f"Found {len(mismatched)} span(s) in the same logical trace with a different " + f"128-bit traceId than the server span (server traceId={root_span_hex_id}). " + f"Mismatched (span_id, trace_id_hex): {mismatched}. This indicates the OTLP " + f"exporter is not propagating _dd.p.tid (high 64 bits) from the chunk root to " + f"the remaining chunk spans." + ) diff --git a/utils/interfaces/_open_telemetry.py b/utils/interfaces/_open_telemetry.py index aa893b9a510..14cbbee6bdb 100644 --- a/utils/interfaces/_open_telemetry.py +++ b/utils/interfaces/_open_telemetry.py @@ -50,6 +50,8 @@ def get_otel_spans(self, request: HttpResponse): if rid: logger.debug(f"Try to find traces related to request {rid}") + parent_spans = set() + for data in self.get_data(path_filters=paths): content = data.get("request").get("content") logger.debug(f"[get_otel_spans] content: {content}") @@ -58,9 +60,17 @@ def get_otel_spans(self, request: HttpResponse): scope_spans = resource_span.get("scopeSpans") for scope_span in scope_spans: for span in scope_span.get("spans"): + parent_span_id = span.get("parentId") attributes = span.get("attributes", {}) request_headers_user_agent_value = attributes.get("http.request.headers.user-agent", "") user_agent_value = attributes.get("http.useragent", "") - if rid in request_headers_user_agent_value or rid in user_agent_value: + if ( + rid in request_headers_user_agent_value + or rid in user_agent_value + or parent_span_id in parent_spans + ): + span_id = span.get("spanId") + if span_id: + parent_spans.add(span_id) yield data.get("request"), content, span break # Skip to next span