DataDog · leoromanovsky · May 21, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
@@ -1026,6 +1026,20 @@ The endpoint must accept a query string parameter `code`, which should be an int
 This endpoint is used for client-stats tests to provide a separate "resource" via the endpoint path `stats-unique` to disambiguate those tests from other
 stats generating tests.
 
+### POST /ffe
+
+This endpoint is used by the Feature Flags & Experimentation scenario. It must
+accept a JSON body with these fields:
+
+- `flag`: the feature flag key to evaluate.
+- `variationType`: the expected variation type.
+- `defaultValue`: the value to return when evaluation cannot resolve the flag.
+- `targetingKey`: the evaluation subject key.
+- `attributes`: flat scalar targeting attributes.
+
+The response must be JSON and include at least `value` and `reason`. Error
+responses should also include `errorCode` and `errorMessage`.
+
 ### GET /healthcheck
 
 Returns a JSON dict, with those values :

@@ -605,9 +605,9 @@ manifest:
       component_version: <1.12.0
   tests/docker_ssi/test_docker_ssi_appsec.py::TestDockerSSIAppsecFeatures::test_telemetry_source_ssi: v1.8.3
   tests/docker_ssi/test_docker_ssi_crash.py::TestDockerSSICrash::test_crash: missing_feature (No implemented the endpoint /crashme)
-  tests/ffe/test_dynamic_evaluation.py: missing_feature
-  tests/ffe/test_exposures.py: missing_feature
-  tests/ffe/test_flag_eval_metrics.py: missing_feature
+  tests/ffe/test_dynamic_evaluation.py: missing_feature (PHP M1 dynamic evaluation is locally validated through the PHP 8.2 parametric path; weblog activation remains deferred)
+  tests/ffe/test_exposures.py: missing_feature (PHP exposure buffering and native flush lifecycle are not wired yet)
+  tests/ffe/test_flag_eval_metrics.py: missing_feature (PHP feature_flag.evaluations metric emission is not wired yet)
   tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234)
   tests/integrations/crossed_integrations/test_kafka.py::Test_Kafka: missing_feature
   tests/integrations/crossed_integrations/test_kinesis.py::Test_Kinesis_PROPAGATION_VIA_MESSAGE_ATTRIBUTES: missing_feature
@@ -730,7 +730,9 @@ manifest:
   tests/parametric/test_dynamic_configuration.py::TestDynamicConfigV1_ServiceTargets::test_not_match_service_target: missing_feature
   tests/parametric/test_dynamic_configuration.py::TestDynamicConfigV2: '>=1.16.0'
   tests/parametric/test_dynamic_configuration.py::TestDynamicConfigV2::test_tracing_client_tracing_tags: missing_feature
-  tests/parametric/test_ffe/test_dynamic_evaluation.py::Test_Feature_Flag_Dynamic_Evaluation: missing_feature
+  tests/parametric/test_ffe/test_dynamic_evaluation.py::Test_Feature_Flag_Dynamic_Evaluation: v1.20.0-dev
+  tests/parametric/test_ffe/test_dynamic_evaluation.py::Test_Feature_Flag_Parametric_Evaluation_Metrics: v1.20.0-dev
+  tests/parametric/test_ffe/test_dynamic_evaluation.py::Test_Feature_Flag_Parametric_Exposures: v1.20.0-dev
   tests/parametric/test_headers_b3.py::Test_Headers_B3::test_headers_b3_migrated_extract_invalid:
     - declaration: missing_feature (Need to remove b3=b3multi alias)
       component_version: <1.16.0
@@ -865,7 +867,7 @@ manifest:
   tests/parametric/test_parametric_endpoints.py::Test_Parametric_DDSpan_Start: v1.13.0+4663b2fa7c20c6920f347d059b57dc2a419cb7f7
   tests/parametric/test_parametric_endpoints.py::Test_Parametric_DDTrace_Baggage: missing_feature (baggage is not supported)
   tests/parametric/test_parametric_endpoints.py::Test_Parametric_DDTrace_Current_Span: bug (APMAPI-778)  # current span endpoint should return span and trace id of zero if no span is "active"
-  tests/parametric/test_parametric_endpoints.py::Test_Parametric_FFE_Start: missing_feature
+  tests/parametric/test_parametric_endpoints.py::Test_Parametric_FFE_Start: v1.20.0-dev
   tests/parametric/test_parametric_endpoints.py::Test_Parametric_Otel_Baggage: missing_feature (otel baggage is not supported)
   tests/parametric/test_parametric_endpoints.py::Test_Parametric_Otel_Current_Span: bug (APMAPI-778)  # otel current span endpoint should return a span and trace id of zero if no span is "active"
   tests/parametric/test_parametric_endpoints.py::Test_Parametric_Write_Log: missing_feature

@@ -16,6 +16,57 @@ This directory contains system tests for the Feature Flags & Experimentation (FF
 ./run.sh FEATURE_FLAGGING_AND_EXPERIMENTATION --library <language>
 ```
 
+## PHP Local Runbook
+
+PHP Milestone 1 is evaluation-only. Dynamic evaluation is enabled in
+`manifests/php.yml` for the locally validated PHP 8.2 parametric path. Weblog
+FFE activation remains deferred. Exposure and flag-evaluation metric tests
+remain `missing_feature`.
+
+The PHP parametric container consumes a custom tracer package from `binaries/`.
+Build the package from `dd-trace-php` with Bob's debug-artifact helper:
+
+```bash
+cd /path/to/dd-trace-php
+./tooling/bin/build-debug-artifact gnu-aarch64-8.2-nts /path/to/system-tests/binaries
+```
+
+On macOS with Colima, run the same command with the Docker socket exported:
+
+```bash
+DOCKER_HOST=unix://$HOME/.colima/default/docker.sock \
+  ./tooling/bin/build-debug-artifact gnu-aarch64-8.2-nts /path/to/system-tests/binaries
+```
+
+See `tests/parametric/test_ffe/AGENTS.md` for the focused PHP M1 validation
+commands and FFE-specific invariants.
+
+Run the validated dynamic evaluation tests for PHP:
+
+```bash
+TEST_LIBRARY=php \
+  ./run.sh PARAMETRIC tests/parametric/test_ffe/test_dynamic_evaluation.py::Test_Feature_Flag_Dynamic_Evaluation \
+  -F tests/parametric/test_ffe/test_dynamic_evaluation.py::Test_Feature_Flag_Dynamic_Evaluation -vv
+```
+
+Run the FFE parametric endpoint smoke:
+
+```bash
+TEST_LIBRARY=php \
+  ./run.sh PARAMETRIC tests/parametric/test_parametric_endpoints.py::Test_Parametric_FFE_Start \
+  -F tests/parametric/test_parametric_endpoints.py::Test_Parametric_FFE_Start -vv
+```
+
+The PHP parametric server exposes `POST /ffe/start` and `POST /ffe/evaluate`
+from `utils/build/docker/php/parametric/server.php`. The evaluation endpoint
+accepts the canonical evaluation case fields used by the fixture corpus:
+`flag`, `variationType`, `defaultValue`, `targetingKey`, and `attributes`.
+
+The endpoint calls `DDTrace\FeatureFlags\Client`, which reads only from the
+tracer Remote Config lifecycle and the native `libdatadog` evaluator. M1
+transitional warnings are suppressed inside the endpoint so responses stay valid
+JSON for system-tests.
+
 ---
 
 # Eval Metrics Implementation Guide

@@ -0,0 +1,60 @@
+# FFE Parametric Test Notes
+
+Follow the repository root `AGENTS.md` first. These notes are specific to
+Feature Flags and Experimentation parametric tests.
+
+## PHP macOS Validation
+
+PHP M1 validation uses the parametric app, not the weblog. Build a local
+`dd-trace-php` artifact into this repo before running PHP tests:
+
+```bash
+DD_TRACE_PHP=/Users/leo.romanovsky/go/src/github.com/DataDog/dd-trace-php-ffe-runtime-first
+SYSTEM_TESTS=/Users/leo.romanovsky/go/src/github.com/DataDog/system-tests-pr-g-php-ffe-scaffold
+
+cd "$DD_TRACE_PHP"
+DOCKER_HOST=unix://$HOME/.colima/default/docker.sock \
+./tooling/bin/build-debug-artifact \
+  gnu-aarch64-8.2-nts \
+  "$SYSTEM_TESTS/binaries"
+```
+
+Use `gnu-aarch64-8.2-nts` for the PHP 8.2 parametric path on Apple Silicon.
+Docker Desktop users can usually omit `DOCKER_HOST`; Colima users normally need
+the socket path above.
+
+Run the focused PHP validator:
+
+```bash
+cd "$SYSTEM_TESTS"
+TEST_LIBRARY=php \
+  ./run.sh PARAMETRIC \
+  tests/parametric/test_ffe/test_dynamic_evaluation.py::Test_Feature_Flag_Dynamic_Evaluation \
+  -F tests/parametric/test_ffe/test_dynamic_evaluation.py::Test_Feature_Flag_Dynamic_Evaluation \
+  -vv
+```
+
+Run the endpoint smoke:
+
+```bash
+TEST_LIBRARY=php \
+  ./run.sh PARAMETRIC \
+  tests/parametric/test_parametric_endpoints.py::Test_Parametric_FFE_Start \
+  -F tests/parametric/test_parametric_endpoints.py::Test_Parametric_FFE_Start \
+  -vv
+```
+
+## Test Invariants
+
+- PHP evaluation tests must use the live Remote Config flow through
+  `_set_and_wait_ffe_rc`; do not bypass RC with a PHP fixture loader.
+- The PHP endpoint must call `DDTrace\FeatureFlags\Client`, which is backed by
+  the tracer Remote Config lifecycle and the native `libdatadog` evaluator.
+- Preserve empty targeting keys. `targetingKey: ""` is valid and must not be
+  collapsed to missing or `null`.
+- Preserve empty JSON object responses as `{}`, not `[]`.
+- Reason assertions are intentionally narrow for M1. Keep value assertions
+  canonical and avoid broadening reason checks until libdatadog/system-test
+  reason semantics are settled.
+- Do not enable PHP weblog FFE, exposures, or evaluation metrics in this M1
+  parametric change. Those are later milestones.
@@ -1,5 +1,6 @@
 """Test FFE (Feature Flags & Experimentation) functionality via parametric tests."""
 
+import base64
 import json
 import pytest
 from pathlib import Path
@@ -51,6 +52,11 @@ def _get_test_case_files() -> list[str]:
     "DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS": "0.2",
 }
 
+OTLP_METRICS_ENVVARS = {
+    **DEFAULT_ENVVARS,
+    "DD_METRICS_OTEL_ENABLED": "true",
+}
+
 
 def _set_and_wait_ffe_rc(
     test_agent: TestAgentAPI, ufc_data: dict[str, Any], config_id: str | None = None
@@ -79,6 +85,80 @@ def _set_and_wait_ffe_rc(
     return test_agent.wait_for_rc_apply_state(RC_PRODUCT, state=RemoteConfigApplyState.ACKNOWLEDGED, clear=True)
 
 
+def _make_logging_ufc_fixture(flag_key: str, *, do_log: bool = True) -> dict[str, Any]:
+    return {
+        "createdAt": "2024-04-17T19:40:53.716Z",
+        "format": "SERVER",
+        "environment": {"name": "Test"},
+        "flags": {
+            flag_key: {
+                "key": flag_key,
+                "enabled": True,
+                "variationType": "STRING",
+                "variations": {
+                    "on": {"key": "on", "value": "on-value"},
+                    "off": {"key": "off", "value": "off-value"},
+                },
+                "allocations": [
+                    {
+                        "key": "default-allocation",
+                        "rules": [],
+                        "splits": [{"variationKey": "on", "shards": []}],
+                        "doLog": do_log,
+                    }
+                ],
+            }
+        },
+    }
+
+
+def _decode_agent_json_body(encoded_body: str) -> dict[str, Any]:
+    decoded = base64.b64decode(encoded_body)
+    return json.loads(decoded.decode("utf-8"))
+
+
+def _find_exposure(test_agent: TestAgentAPI, flag_key: str, targeting_key: str) -> dict[str, Any] | None:
+    for request in test_agent.requests():
+        if not request["url"].endswith("/evp_proxy/v2/api/v2/exposures"):
+            continue
+
+        payload = _decode_agent_json_body(request["body"])
+        for exposure in payload.get("exposures", []):
+            if (
+                exposure.get("flag", {}).get("key") == flag_key
+                and exposure.get("subject", {}).get("id") == targeting_key
+            ):
+                return exposure
+
+    return None
+
+
+def _find_metric_data_point(metrics: list[dict[str, Any]], attributes: dict[str, str]) -> dict[str, Any] | None:
+    for export in metrics:
+        for resource_metric in export.get("resource_metrics", []):
+            for scope_metric in resource_metric.get("scope_metrics", []):
+                for metric in scope_metric.get("metrics", []):
+                    if metric.get("name") != "feature_flag.evaluations":
+                        continue
+
+                    for data_point in metric.get("sum", {}).get("data_points", []):
+                        data_point_attributes = {
+                            item["key"]: item["value"]["string_value"] for item in data_point.get("attributes", [])
+                        }
+                        if attributes == data_point_attributes:
+                            return data_point
+
+    return None
+
+
+def _data_point_value(data_point: dict[str, Any]) -> int | float | None:
+    if "as_int" in data_point:
+        return int(data_point["as_int"])
+    if "as_double" in data_point:
+        return data_point["as_double"]
+    return None
+
+
 @scenarios.parametric
 @features.feature_flags_dynamic_evaluation
 class Test_Feature_Flag_Dynamic_Evaluation:
@@ -154,7 +234,7 @@ def test_ffe_flag_evaluation(self, test_case_file: str, test_agent: TestAgentAPI
             assert actual_value == expected_result, (
                 f"Test case {i} in {test_case_file} failed: "
                 f"flag='{flag}', targetingKey='{targeting_key}', "
-                f"expected={expected_result}, actual={actual_value}"
+                f"expected={expected_result}, actual={actual_value}, result={result}"
             )
 
     @parametrize("library_env", [{**DEFAULT_ENVVARS}])
@@ -186,3 +266,86 @@ def test_ffe_of7_empty_targeting_key(self, test_agent: TestAgentAPI, test_librar
         assert result.get("value") == "on-value", (
             f"OF.7 failed: empty targeting key should return 'on-value', got '{result.get('value')}'"
         )
+
+
+@scenarios.parametric
+@features.feature_flags_exposures
+class Test_Feature_Flag_Parametric_Exposures:
+    """Test PHP FFE exposure emission through the parametric app."""
+
+    @parametrize("library_env", [{**DEFAULT_ENVVARS}])
+    def test_php_ffe_exposure_event(self, test_agent: TestAgentAPI, test_library: APMLibrary) -> None:
+        if context.library.name != "php":
+            pytest.skip("Parametric FFE exposure validation is currently PHP-specific")
+
+        flag_key = "php-parametric-exposure-flag"
+        targeting_key = "php-parametric-user"
+
+        _set_and_wait_ffe_rc(test_agent, _make_logging_ufc_fixture(flag_key), "php-parametric-exposure")
+
+        success = test_library.ffe_start()
+        assert success, "Failed to start FFE provider"
+
+        result = test_library.ffe_evaluate(
+            flag=flag_key,
+            variation_type="STRING",
+            default_value="default",
+            targeting_key=targeting_key,
+            attributes={"plan": "pro"},
+        )
+
+        assert result.get("value") == "on-value"
+        assert result.get("variant") == "on"
+
+        flush_result = test_library.ffe_flush()
+        assert flush_result.get("flushed"), f"Expected FFE exposure flush, got {flush_result}"
+
+        exposure = _find_exposure(test_agent, flag_key, targeting_key)
+        assert exposure is not None, f"Expected exposure event for flag '{flag_key}' and subject '{targeting_key}'"
+        assert exposure.get("allocation", {}).get("key") == "default-allocation"
+        assert exposure.get("variant", {}).get("key") == "on"
+        assert exposure.get("subject", {}).get("attributes", {}).get("plan") == "pro"
+
+
+@scenarios.parametric
+@features.feature_flags_eval_metrics
+class Test_Feature_Flag_Parametric_Evaluation_Metrics:
+    """Test PHP FFE evaluation metric emission through the parametric app."""
+
+    @parametrize("library_env", [{**OTLP_METRICS_ENVVARS}])
+    def test_php_ffe_evaluation_metric(self, test_agent: TestAgentAPI, test_library: APMLibrary) -> None:
+        if context.library.name != "php":
+            pytest.skip("Parametric FFE evaluation metric validation is currently PHP-specific")
+
+        flag_key = "php-parametric-metric-flag"
+
+        _set_and_wait_ffe_rc(test_agent, _make_logging_ufc_fixture(flag_key), "php-parametric-metric")
+
+        success = test_library.ffe_start()
+        assert success, "Failed to start FFE provider"
+
+        for _ in range(3):
+            result = test_library.ffe_evaluate(
+                flag=flag_key,
+                variation_type="STRING",
+                default_value="default",
+                targeting_key="php-parametric-user",
+                attributes={},
+            )
+            assert result.get("value") == "on-value"
+            assert result.get("variant") == "on"
+
+        flush_result = test_library.ffe_flush()
+        assert flush_result.get("flushed"), f"Expected FFE metric flush, got {flush_result}"
+
+        data_point = _find_metric_data_point(
+            test_agent.wait_for_num_otlp_metrics(num=1),
+            {
+                "feature_flag.key": flag_key,
+                "feature_flag.result.variant": "on",
+                "feature_flag.result.reason": "static",
+                "feature_flag.result.allocation_key": "default-allocation",
+            },
+        )
+        assert data_point is not None, f"Expected feature_flag.evaluations metric for flag '{flag_key}'"
+        assert _data_point_value(data_point) == 3