diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 24c447d3..5be29494 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -42,6 +42,39 @@ use crate::platform::{build_runtime_services, open_kv_store, UnavailableKvStore} const CREATIVE_OPPORTUNITIES_TOML: &str = include_str!("../../../creative-opportunities.toml"); +/// Parses the embedded `creative-opportunities.toml` at most once per Wasm +/// instance. +/// +/// On parse failure, logs an error and falls back to an empty +/// [`CreativeOpportunitiesFile`] — i.e. the documented "feature disabled" +/// state — instead of panicking the request hot path. The build-time +/// validator in `crates/trusted-server-core/build.rs` catches every realistic +/// authoring mistake; this fallback exists so a CI-bypassed binary patch or a +/// future schema change can't take the entire fleet down with a per-request +/// panic. +static SLOTS_FILE: std::sync::LazyLock< + trusted_server_core::creative_opportunities::CreativeOpportunitiesFile, +> = std::sync::LazyLock::new(|| { + let mut file = match toml::from_str::< + trusted_server_core::creative_opportunities::CreativeOpportunitiesFile, + >(CREATIVE_OPPORTUNITIES_TOML) + { + Ok(file) => file, + Err(err) => { + log::error!( + "creative-opportunities.toml failed to parse at startup; \ + falling back to an empty slots file (server-side ad-slot \ + templates disabled): {err}" + ); + trusted_server_core::creative_opportunities::CreativeOpportunitiesFile::default() + } + }; + // Pre-compile glob patterns once so per-request `matches_path` doesn't + // re-invoke `Pattern::new` on every page hit. + file.compile(); + file +}); + /// Entry point for the Fastly Compute program. /// /// Uses an undecorated `main()` with `Request::from_client()` instead of @@ -94,9 +127,7 @@ fn main() { } }; - let slots_file: trusted_server_core::creative_opportunities::CreativeOpportunitiesFile = - toml::from_str(CREATIVE_OPPORTUNITIES_TOML) - .expect("should parse creative-opportunities.toml"); + let slots_file = &*SLOTS_FILE; let integration_registry = match IntegrationRegistry::new(&settings) { Ok(r) => r, @@ -121,7 +152,7 @@ fn main() { &orchestrator, &integration_registry, &runtime_services, - &slots_file, + slots_file, req, )) { response.send_to_client(); diff --git a/crates/trusted-server-core/src/auction/endpoints.rs b/crates/trusted-server-core/src/auction/endpoints.rs index 5d5bb292..5b4e7b25 100644 --- a/crates/trusted-server-core/src/auction/endpoints.rs +++ b/crates/trusted-server-core/src/auction/endpoints.rs @@ -6,7 +6,7 @@ use fastly::{Request, Response}; use crate::auction::formats::AdRequest; use crate::compat; use crate::consent; -use crate::cookies::handle_request_cookies; +use crate::cookies::{handle_request_cookies, parse_ts_eids_cookie}; use crate::edge_cookie::get_or_generate_ec_id_from_http_request; use crate::error::TrustedServerError; use crate::platform::RuntimeServices; @@ -125,8 +125,8 @@ pub async fn handle_auction( .map(|_| services.kv_store()), }); - // Convert tsjs request format to auction request - let auction_request = convert_tsjs_to_auction_request( + // Convert tsjs request format to auction request. + let mut auction_request = convert_tsjs_to_auction_request( &body, settings, services, @@ -135,6 +135,10 @@ pub async fn handle_auction( &ec_id, geo, )?; + // Forward Extended User IDs from the `ts-eids` cookie so programmatic + // callers (slim-Prebid, native apps) get parity with the publisher / + // page-bids paths, both of which already do this. + auction_request.user.eids = parse_ts_eids_cookie(cookie_jar.as_ref()); // Create auction context let context = AuctionContext { diff --git a/crates/trusted-server-core/src/auction/orchestrator.rs b/crates/trusted-server-core/src/auction/orchestrator.rs index bf45c47e..e5dafaa7 100644 --- a/crates/trusted-server-core/src/auction/orchestrator.rs +++ b/crates/trusted-server-core/src/auction/orchestrator.rs @@ -540,32 +540,29 @@ impl AuctionOrchestrator { } let starting_count = winning_bids.len(); - winning_bids.retain(|slot_id, bid| match floor_prices.get(slot_id) { - Some(floor) => { - // price=None means the SSP returned an encoded price (e.g. APS amznbid). - // In the parallel-only path this bid cannot yet be floor-checked; it passes - // through and will be decoded (and re-checked) by the mediation layer. - // In the mediation path, mediation decodes prices before calling this - // function, so any bid still carrying price=None is dropped upstream. - match bid.price { - Some(price) if price >= *floor => true, - Some(_) => { - log::info!( - "Dropping winning bid below floor price for slot '{}'", - slot_id - ); - false - } - None => { - log::debug!( - "Passing encoded-price bid for slot '{}' - price not yet decoded", - slot_id - ); - true - } - } + winning_bids.retain(|slot_id, bid| match (floor_prices.get(slot_id), bid.price) { + (Some(floor), Some(price)) if price >= *floor => true, + (Some(_), Some(_)) => { + log::info!( + "Dropping winning bid below floor price for slot '{}'", + slot_id + ); + false } - None => true, + (_, None) => { + // Any caller that needs to keep an undecoded (encoded-price) + // bid must decode it *before* invoking this function — both + // `select_winning_bids` and the mediator path already do. + // Letting `None`-price bids through here would cause + // `winning_bids.len()` to overcount what `build_bid_map` + // downstream is willing to emit, so they get dropped instead. + log::debug!( + "Dropping bid for slot '{}' - no decoded price (caller must decode before apply_floor_prices)", + slot_id + ); + false + } + (None, Some(_)) => true, }); if winning_bids.len() != starting_count { @@ -872,7 +869,14 @@ impl AuctionOrchestrator { remaining, mediator.timeout_ms(), ); - let placeholder = fastly::Request::get("https://placeholder.invalid/"); + // The mediator runs on the collect path. See the doc-comment on + // `AuctionContext::request`: the real client request was already + // consumed by `send_async` during dispatch, so we substitute a + // canonical placeholder URL. Any future mediator that needs real + // client headers must snapshot them at dispatch time onto + // `DispatchedAuction` rather than reading `context.request` here. + let placeholder = + fastly::Request::get(crate::auction::types::MEDIATOR_PLACEHOLDER_URL); let mediator_context = AuctionContext { settings: context.settings, request: &placeholder, @@ -1256,9 +1260,14 @@ mod tests { } #[test] - fn test_apply_floor_prices_allows_none_prices_for_encoded_bids() { - // Test that bids with None prices (APS-style) pass through floor pricing - // This is correct behavior for parallel-only strategy where mediation happens later + fn test_apply_floor_prices_drops_bids_with_undecoded_price() { + // Bids that reach apply_floor_prices with `price=None` cannot have a + // floor compared against them — and they would not survive downstream + // (build_bid_map filters them) — so apply_floor_prices drops them so + // the count it reports matches what eventually ships to the client. + // Both production paths (select_winning_bids and the mediator filter) + // already decode/skip None prices before calling this function; this + // test pins the contract. let orchestrator = AuctionOrchestrator::new(AuctionConfig::default()); let mut floor_prices = HashMap::new(); floor_prices.insert("slot-1".to_string(), 1.00); @@ -1268,7 +1277,7 @@ mod tests { "slot-1".to_string(), Bid { slot_id: "slot-1".to_string(), - price: None, // APS bid with encoded price + price: None, currency: "USD".to_string(), creative: Some("
Ad
".to_string()), adomain: None, @@ -1289,25 +1298,15 @@ mod tests { }, ); - // Apply floor pricing - should pass through with None price let filtered = orchestrator.apply_floor_prices(winning_bids, &floor_prices); - assert_eq!( - filtered.len(), - 1, - "APS bid with None price should pass through floor check" - ); assert!( - filtered.contains_key("slot-1"), - "Slot-1 should still be present" + filtered.is_empty(), + "bid with None price should be dropped by apply_floor_prices" ); assert!( - filtered - .get("slot-1") - .expect("slot-1 should be present") - .price - .is_none(), - "Price should still be None (not decoded yet)" + !filtered.contains_key("slot-1"), + "slot-1 should not survive when its bid has no decoded price" ); } diff --git a/crates/trusted-server-core/src/auction/types.rs b/crates/trusted-server-core/src/auction/types.rs index 6ee986b9..79616b13 100644 --- a/crates/trusted-server-core/src/auction/types.rs +++ b/crates/trusted-server-core/src/auction/types.rs @@ -115,6 +115,29 @@ pub struct SiteInfo { } /// Context passed to auction providers. +/// +/// # The `request` field is path-dependent +/// +/// `request` carries the **real downstream client request** in the dispatch +/// path ([`AuctionOrchestrator::run_auction`][run] and +/// [`dispatch_auction`][dispatch]). Providers there can read client headers +/// (DNT, User-Agent, cookies, X-* customs) directly off it. +/// +/// In the **collect path** ([`collect_dispatched_auction`][collect]) the +/// mediator is invoked with a synthetic placeholder request +/// (`https://placeholder.invalid/`), because the real client request has +/// already been consumed by `send_async` during dispatch and the host pipeline +/// can't lend it across the `.await`. **Mediators must not depend on reading +/// client state from `context.request`** — the placeholder has none of the +/// real headers. If a future mediator needs that data, snapshot it into a new +/// field on this struct at dispatch time and stash it on the +/// [`DispatchedAuction`] token so collect can attach it to the mediator's +/// context. See +/// (P2-1) for the open follow-up. +/// +/// [run]: crate::auction::AuctionOrchestrator::run_auction +/// [dispatch]: crate::auction::AuctionOrchestrator::dispatch_auction +/// [collect]: crate::auction::AuctionOrchestrator::collect_dispatched_auction pub struct AuctionContext<'a> { pub settings: &'a Settings, pub request: &'a Request, @@ -127,6 +150,12 @@ pub struct AuctionContext<'a> { pub services: &'a RuntimeServices, } +/// URL used by the orchestrator when invoking a mediator from the collect +/// path. Providers can `debug_assert` against this value to catch a mediator +/// that has accidentally started depending on `context.request` carrying real +/// client headers. +pub const MEDIATOR_PLACEHOLDER_URL: &str = "https://placeholder.invalid/"; + /// Response from a single auction provider. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AuctionResponse { diff --git a/crates/trusted-server-core/src/creative_opportunities.rs b/crates/trusted-server-core/src/creative_opportunities.rs index 25add829..cbf79b11 100644 --- a/crates/trusted-server-core/src/creative_opportunities.rs +++ b/crates/trusted-server-core/src/creative_opportunities.rs @@ -65,6 +65,18 @@ pub struct CreativeOpportunitySlot { /// Provider-specific slot identifiers. #[serde(default)] pub providers: SlotProviders, + /// Pre-compiled [`page_patterns`](Self::page_patterns) for hot-path matching. + /// + /// Populated by [`compile_patterns`](Self::compile_patterns) once at file + /// load time (see [`CreativeOpportunitiesFile::compile`]). When this is + /// empty, [`matches_path`](Self::matches_path) falls back to compiling on + /// every call so callers that build slots by hand (tests, legacy code) + /// still work. + /// + /// `pub(crate)` rather than private so cross-module test helpers in this + /// crate can construct slots via struct-literal syntax with an empty cache. + #[serde(skip, default)] + pub(crate) compiled_patterns: Vec, } impl CreativeOpportunitySlot { @@ -79,6 +91,16 @@ impl CreativeOpportunitySlot { /// Patterns that cannot be compiled even after normalisation are silently skipped. #[must_use] pub fn matches_path(&self, path: &str) -> bool { + // Fast path: use the pre-compiled patterns when available so we don't + // re-run `Pattern::new` on every request. The vec is non-empty iff + // [`compile_patterns`](Self::compile_patterns) succeeded at load time + // and the slot has at least one pattern. + if !self.compiled_patterns.is_empty() { + return self.compiled_patterns.iter().any(|p| p.matches(path)); + } + + // Fallback for slots constructed by hand (tests, legacy callers that + // skip `compile_patterns`). Re-compiles on every call. self.page_patterns .iter() .any(|pattern| match Pattern::new(pattern) { @@ -92,6 +114,28 @@ impl CreativeOpportunitySlot { }) } + /// Compile [`page_patterns`](Self::page_patterns) into the + /// [`compiled_patterns`](Self::compiled_patterns) cache. + /// + /// Patterns that fail to compile (either directly or after the `**`→`*` + /// normalisation that [`matches_path`](Self::matches_path) does) are + /// silently skipped — the slot just becomes un-matchable, matching the + /// fallback behaviour. + /// + /// Idempotent: calling twice replaces the cache, so a slot list reloaded + /// at runtime won't accumulate stale patterns. + pub fn compile_patterns(&mut self) { + self.compiled_patterns = self + .page_patterns + .iter() + .filter_map(|pattern| { + Pattern::new(pattern) + .or_else(|_| Pattern::new(&pattern.replace("**", "*"))) + .ok() + }) + .collect(); + } + /// Returns the GAM ad unit path for this slot. /// /// Uses the explicit [`gam_unit_path`](Self::gam_unit_path) override when set, @@ -116,8 +160,7 @@ impl CreativeOpportunitySlot { /// Provider-specific params (e.g., APS `slotID`, PBS bidder params) are wired /// into the `bidders` map keyed by provider/bidder name. #[must_use] - pub fn to_ad_slot(&self, gam_network_id: &str) -> AdSlot { - let _ = gam_network_id; + pub fn to_ad_slot(&self) -> AdSlot { let mut bidders: HashMap = HashMap::new(); if let Some(ref aps) = self.providers.aps { bidders.insert( @@ -187,6 +230,18 @@ pub struct CreativeOpportunitiesFile { pub slots: Vec, } +impl CreativeOpportunitiesFile { + /// Pre-compile every slot's + /// [`page_patterns`](CreativeOpportunitySlot::page_patterns) so + /// [`matches_path`](CreativeOpportunitySlot::matches_path) runs without + /// re-invoking `Pattern::new` on every request. Call once after loading. + pub fn compile(&mut self) { + for slot in &mut self.slots { + slot.compile_patterns(); + } + } +} + /// Validates that a slot ID contains only safe characters. /// /// Allowed characters: ASCII alphanumerics, underscores (`_`), and hyphens (`-`). @@ -237,6 +292,49 @@ mod tests { floor_price: Some(0.50), targeting: Default::default(), providers: Default::default(), + compiled_patterns: Vec::new(), + } + } + + #[test] + fn compile_patterns_populates_cache_and_match_uses_it() { + let mut slot = make_slot("atf", vec!["/20**", "/about"]); + assert!( + slot.compiled_patterns.is_empty(), + "freshly-built slot should have no compiled patterns" + ); + slot.compile_patterns(); + assert_eq!( + slot.compiled_patterns.len(), + 2, + "compile_patterns should populate one entry per page pattern" + ); + assert!( + slot.matches_path("/2024/01/my-article/"), + "matches_path should hit the compiled-pattern fast path" + ); + assert!( + slot.matches_path("/about"), + "matches_path should hit /about via the compiled cache" + ); + assert!( + !slot.matches_path("/contact"), + "matches_path should reject paths that match nothing in the cache" + ); + } + + #[test] + fn file_compile_populates_every_slot() { + let mut file = CreativeOpportunitiesFile { + slots: vec![make_slot("a", vec!["/a/*"]), make_slot("b", vec!["/b/*"])], + }; + file.compile(); + for slot in &file.slots { + assert_eq!( + slot.compiled_patterns.len(), + 1, + "every slot's patterns should be pre-compiled after file.compile()" + ); } } @@ -300,7 +398,7 @@ mod tests { slot.providers.aps = Some(ApsSlotParams { slot_id: "aps-slot-atf".to_string(), }); - let ad_slot = slot.to_ad_slot("21765378893"); + let ad_slot = slot.to_ad_slot(); let aps_params = ad_slot.bidders.get("aps").expect("should have aps bidder"); assert_eq!( aps_params.get("slotID").and_then(|v| v.as_str()), @@ -311,7 +409,7 @@ mod tests { #[test] fn to_ad_slot_sets_floor_price_and_formats() { let slot = make_slot("atf", vec!["/"]); - let ad_slot = slot.to_ad_slot("21765378893"); + let ad_slot = slot.to_ad_slot(); assert_eq!(ad_slot.id, "atf"); assert_eq!(ad_slot.floor_price, Some(0.50)); assert_eq!(ad_slot.formats.len(), 1); diff --git a/crates/trusted-server-core/src/html_processor.rs b/crates/trusted-server-core/src/html_processor.rs index 26978cef..b60f6f93 100644 --- a/crates/trusted-server-core/src/html_processor.rs +++ b/crates/trusted-server-core/src/html_processor.rs @@ -167,6 +167,24 @@ impl HtmlProcessorConfig { ad_bids_state: std::sync::Arc::new(std::sync::RwLock::new(None)), } } + + /// Attach the streaming-auction `" .to_string(), - concat!( - "" - ).to_string(), + format!("", GPT_BOOTSTRAP_JS), ] } } +/// Inline `window.__tsAdInit` bootstrap injected at `` so the bids +/// script at `` can call it before the TSJS bundle has loaded. +/// +/// The bundle's idempotent implementation in +/// `crates/js/lib/src/integrations/gpt/index.ts` later overwrites this stub. +/// Both implementations guard the one-time-per-page setup with +/// `window.__tsServicesEnabled` so neither double-enables services if the +/// publisher's own init code also calls `googletag.enableServices()`. +const GPT_BOOTSTRAP_JS: &str = include_str!("gpt_bootstrap.js"); + // Default value functions fn default_enabled() -> bool { @@ -1120,6 +1097,32 @@ mod tests { ); } + #[test] + fn head_inserts_bootstrap_guards_enable_services_with_idempotency_flag() { + let config = test_config(); + let integration = GptIntegration::new(config); + let doc_state = IntegrationDocumentState::default(); + let ctx = IntegrationHtmlContext { + request_host: "edge.example.com", + request_scheme: "https", + origin_host: "example.com", + document_state: &doc_state, + }; + let combined = integration.head_inserts(&ctx).join(""); + assert!( + combined.contains("__tsServicesEnabled"), + "should guard enableServices/enableSingleRequest with the __tsServicesEnabled flag" + ); + assert!( + combined.contains("window.__tsAdInit"), + "should install __tsAdInit on window" + ); + assert!( + !combined.contains("googletag.pubads().refresh()"), + "should never call unbounded refresh() — only refresh(newSlots)" + ); + } + #[test] fn head_injector_integration_id() { let integration = GptIntegration::new(test_config()); diff --git a/crates/trusted-server-core/src/integrations/gpt_bootstrap.js b/crates/trusted-server-core/src/integrations/gpt_bootstrap.js new file mode 100644 index 00000000..a3d28a28 --- /dev/null +++ b/crates/trusted-server-core/src/integrations/gpt_bootstrap.js @@ -0,0 +1,78 @@ +// Edge-injected GPT auction bootstrap. +// +// This is the minimal `window.__tsAdInit` that runs on first page load +// before the TSJS bundle has had a chance to install its richer +// idempotent implementation. The bundle in +// crates/js/lib/src/integrations/gpt/index.ts overwrites `__tsAdInit` +// once it loads. +// +// Contract with the bundle: +// - Both implementations must set `window.__tsServicesEnabled = true` +// after calling `enableSingleRequest()`/`enableServices()` so a +// subsequent call from any source (the bundle's `__tsAdInit`, the +// publisher's own GPT init code) becomes a no-op. +// - `refresh()` is called only for the slots defined in this pass, +// never the global slot list, so we never accidentally refresh +// publisher-managed slots that we don't own. +// +// Only installed if `window.__tsAdInit` isn't already defined — that +// way the bundle (or anything else) can preempt this fallback by +// installing first. +(function () { + if (typeof window === "undefined" || window.__tsAdInit) { + return; + } + window.__tsAdInit = function () { + var slots = window.__ts_ad_slots || []; + var bids = window.__ts_bids || {}; + var divToSlotId = {}; + googletag.cmd.push(function () { + var newSlots = []; + slots.forEach(function (slot) { + var s = googletag.defineSlot( + slot.gam_unit_path, + slot.formats, + slot.div_id, + ); + if (!s) return; + s.addService(googletag.pubads()); + Object.entries(slot.targeting || {}).forEach(function (e) { + s.setTargeting(e[0], e[1]); + }); + var b = bids[slot.id] || {}; + ["hb_pb", "hb_bidder", "hb_adid"].forEach(function (k) { + if (b[k]) s.setTargeting(k, b[k]); + }); + s.setTargeting("ts_initial", "1"); + divToSlotId[slot.div_id] = slot.id; + newSlots.push(s); + }); + // Guard the one-time-per-page setup so a follow-up call (e.g. + // publisher's own init code or the bundle's `__tsAdInit` after + // it overwrites this stub) doesn't double-enable services. + if (!window.__tsServicesEnabled) { + googletag.pubads().enableSingleRequest(); + googletag.enableServices(); + window.__tsServicesEnabled = true; + googletag + .pubads() + .addEventListener("slotRenderEnded", function (ev) { + var divId = ev.slot.getSlotElementId(); + var slotId = divToSlotId[divId] || divId; + var b = (window.__ts_bids || {})[slotId] || {}; + var ourBidWon = + !ev.isEmpty && + b.hb_adid && + ev.slot.getTargeting("hb_adid")[0] === b.hb_adid; + if (ourBidWon) { + if (b.nurl) navigator.sendBeacon(b.nurl); + if (b.burl) navigator.sendBeacon(b.burl); + } + }); + } + if (newSlots.length > 0) { + googletag.pubads().refresh(newSlots); + } + }); + }; +})(); diff --git a/crates/trusted-server-core/src/integrations/prebid.rs b/crates/trusted-server-core/src/integrations/prebid.rs index d7711d61..b74b234c 100644 --- a/crates/trusted-server-core/src/integrations/prebid.rs +++ b/crates/trusted-server-core/src/integrations/prebid.rs @@ -164,10 +164,6 @@ pub struct PrebidIntegrationConfig { /// - `both` — consent in both cookies and body (default) #[serde(default)] pub consent_forwarding: ConsentForwardingMode, - /// When true, suppresses client-side nurl firing. - /// Use for PBS deployments that fire nurl internally. - #[serde(default)] - pub suppress_nurl: bool, } impl IntegrationConfig for PrebidIntegrationConfig { @@ -1661,16 +1657,9 @@ mod tests { bid_param_overrides: HashMap::default(), bid_param_override_rules: Vec::new(), consent_forwarding: ConsentForwardingMode::Both, - suppress_nurl: false, } } - #[test] - fn prebid_config_suppress_nurl_defaults_to_false() { - let config = base_config(); - assert!(!config.suppress_nurl, "should not suppress nurl by default"); - } - fn create_test_auction_request() -> AuctionRequest { AuctionRequest { id: "auction-123".to_string(), diff --git a/crates/trusted-server-core/src/price_bucket.rs b/crates/trusted-server-core/src/price_bucket.rs index b683020b..cfdca9eb 100644 --- a/crates/trusted-server-core/src/price_bucket.rs +++ b/crates/trusted-server-core/src/price_bucket.rs @@ -20,7 +20,10 @@ impl PriceGranularity { #[must_use] pub fn price_bucket(cpm: f64, granularity: PriceGranularity) -> String { - if cpm <= 0.0 { + // Reject NaN / Inf early so the `(x * 100.0).floor() as u64` cast below + // can never see a non-finite value (the cast's behaviour for NaN/Inf is + // implementation-defined in Rust and "saturate to 0" only by convention). + if !cpm.is_finite() || cpm <= 0.0 { return "0.00".to_string(); } match granularity { @@ -125,4 +128,31 @@ mod tests { price_bucket(2.53, PriceGranularity::Dense) ); } + + #[test] + fn non_finite_cpm_returns_zero_bucket() { + for granularity in [ + PriceGranularity::Dense, + PriceGranularity::Low, + PriceGranularity::Medium, + PriceGranularity::High, + PriceGranularity::Auto, + ] { + assert_eq!( + price_bucket(f64::NAN, granularity), + "0.00", + "NaN cpm should bucket to 0.00 for granularity {granularity:?}" + ); + assert_eq!( + price_bucket(f64::INFINITY, granularity), + "0.00", + "+Inf cpm should bucket to 0.00 for granularity {granularity:?}" + ); + assert_eq!( + price_bucket(f64::NEG_INFINITY, granularity), + "0.00", + "-Inf cpm should bucket to 0.00 for granularity {granularity:?}" + ); + } + } } diff --git a/crates/trusted-server-core/src/publisher.rs b/crates/trusted-server-core/src/publisher.rs index 2974eccd..9019e3c6 100644 --- a/crates/trusted-server-core/src/publisher.rs +++ b/crates/trusted-server-core/src/publisher.rs @@ -262,26 +262,32 @@ fn process_response_streaming( Ok(()) } -/// Create a unified HTML stream processor +/// Create a unified HTML stream processor. +/// +/// Builds the config via [`HtmlProcessorConfig::from_settings`] and then +/// layers the auction-hold streaming fields on top via +/// [`HtmlProcessorConfig::with_ad_state`], so the canonical builder stays the +/// single source of truth: a future field added to `from_settings` is +/// inherited here automatically. fn create_html_stream_processor( origin_host: &str, request_host: &str, request_scheme: &str, - _settings: &Settings, + settings: &Settings, integration_registry: &IntegrationRegistry, ad_slots_script: Option, ad_bids_state: Arc>>, ) -> Result> { use crate::html_processor::{create_html_processor, HtmlProcessorConfig}; - let config = HtmlProcessorConfig { - origin_host: origin_host.to_string(), - request_host: request_host.to_string(), - request_scheme: request_scheme.to_string(), - integrations: integration_registry.clone(), - ad_slots_script, - ad_bids_state, - }; + let config = HtmlProcessorConfig::from_settings( + settings, + integration_registry, + origin_host, + request_host, + request_scheme, + ) + .with_ad_state(ad_slots_script, ad_bids_state); Ok(create_html_processor(config)) } @@ -493,7 +499,7 @@ pub async fn stream_publisher_body_async( if !is_html { // Non-HTML: collect auction first, then stream. There is no // to hold, so delaying the entire body until collection is acceptable. - let placeholder = Request::get("https://placeholder.invalid/"); + let placeholder = Request::get(crate::auction::types::MEDIATOR_PLACEHOLDER_URL); let result = orchestrator .collect_dispatched_auction( dispatched, @@ -540,16 +546,25 @@ pub async fn stream_publisher_body_async( .await } -/// Build a minimal [`AuctionContext`] for the mediator call in collection. +/// Build a minimal [`AuctionContext`] for the collect phase. /// -/// The `request` field is a short-lived placeholder (providers use it only for -/// header extraction; the placeholder is functionally equivalent to the original -/// since `req` was already consumed by `send_async` before dispatch). +/// See [`AuctionContext::request`]: the orchestrator's collect path runs +/// after `send_async` has already consumed the real client request, so this +/// context carries a synthetic placeholder. The orchestrator itself +/// instantiates a fresh placeholder when it actually invokes a mediator — +/// this argument is plumbing for the (presently unused) case where the +/// orchestrator needs the caller's request shape. fn make_collect_context<'a>( settings: &'a Settings, services: &'a RuntimeServices, placeholder: &'a Request, ) -> AuctionContext<'a> { + debug_assert_eq!( + placeholder.get_url_str(), + crate::auction::types::MEDIATOR_PLACEHOLDER_URL, + "make_collect_context must be given the canonical placeholder; \ + callers must not forward a real client request through the collect path" + ); AuctionContext { settings, request: placeholder, @@ -560,13 +575,39 @@ fn make_collect_context<'a>( } } +/// Well-known crawler User-Agent fragments. Best-effort: an attacker can +/// trivially spoof their UA, so this is for opt-out signalling to honest +/// crawlers (preventing SSP auctions burning partner quota on their behalf), +/// not security. +pub(crate) const BOT_USER_AGENT_FRAGMENTS: &[&str] = + &["Googlebot", "Bingbot", "AhrefsBot", "SemrushBot", "DotBot"]; + +/// Returns true when the request's User-Agent matches any well-known crawler +/// fragment in [`BOT_USER_AGENT_FRAGMENTS`]. +pub(crate) fn is_bot_user_agent(req: &Request) -> bool { + let ua = req.get_header_str("user-agent").unwrap_or(""); + BOT_USER_AGENT_FRAGMENTS + .iter() + .any(|frag| ua.contains(frag)) +} + +/// Returns true when the request advertises itself as a prefetch via either +/// the standard `Sec-Purpose` or the legacy `Purpose` header. +pub(crate) fn is_prefetch_request(req: &Request) -> bool { + req.get_header_str("sec-purpose") + .is_some_and(|v| v.contains("prefetch")) + || req + .get_header_str("purpose") + .is_some_and(|v| v.contains("prefetch")) +} + /// Write winning bids from an auction result into the shared `ad_bids_state` lock. pub(crate) fn write_bids_to_state( winning_bids: &std::collections::HashMap, price_granularity: PriceGranularity, ad_bids_state: &Arc>>, ) { - log::info!( + log::debug!( "write_bids_to_state: {} winning bid(s): [{}]", winning_bids.len(), winning_bids.keys().cloned().collect::>().join(", ") @@ -576,6 +617,40 @@ pub(crate) fn write_bids_to_state( *ad_bids_state.write().expect("should write bid state") = Some(bids_script); } +/// Prepend an HTML comment summarising the auction result onto the shared +/// `ad_bids_state` so it lands directly before the injected bids `` sequences inside the string. pub(crate) fn build_bids_script(bid_map: &serde_json::Map) -> String { - let json = serde_json::to_string(bid_map).unwrap_or_else(|_| "{}".to_string()); + let json = serde_json::to_string(bid_map) + .expect("serde_json::to_string of Map should be infallible"); let escaped = html_escape_for_script(&json); format!( "", @@ -1328,7 +1363,8 @@ pub(crate) fn build_ad_slots_script( }) }) .collect(); - let json = serde_json::to_string(&slots).unwrap_or_else(|_| "[]".to_string()); + let json = serde_json::to_string(&slots) + .expect("serde_json::to_string of Vec should be infallible"); let escaped = html_escape_for_script(&json); format!( "", @@ -1463,48 +1499,64 @@ pub async fn handle_page_bids( .as_ref() .is_some_and(|tcf| tcf.has_purpose_consent(1)); + // Same bot / prefetch guards the publisher path uses — without them this + // endpoint would fire real SSP auctions on Sec-Purpose=prefetch warm-up + // navigations and known crawler UA scans, burning partner request quota. + let is_prefetch = is_prefetch_request(&req); + let is_bot = is_bot_user_agent(&req); + if matched_slots.is_empty() { log::debug!( "No creative opportunity slots matched path '{}' — skipping auction", path_param ); + } else if is_bot || is_prefetch { + log::debug!( + "page-bids: skipping auction for path '{}' (is_bot={}, is_prefetch={})", + path_param, + is_bot, + is_prefetch + ); } - let winning_bids = if !matched_slots.is_empty() && consent_allows_auction { - let mut auction_request = build_auction_request( - &matched_slots, - &ec_id, - &consent_context, - &request_info, - &path_param, - co_config, - req.get_header_str("user-agent"), - ); - auction_request.user.eids = parse_ts_eids_cookie(cookie_jar.as_ref()); - let timeout_ms = co_config - .auction_timeout_ms - .unwrap_or(settings.auction.timeout_ms); - let auction_context = AuctionContext { - settings, - request: &req, - client_info: services.client_info(), - timeout_ms, - provider_responses: None, - services, - }; - match orchestrator - .run_auction(&auction_request, &auction_context, services) - .await - { - Ok(result) => result.winning_bids, - Err(e) => { - log::warn!("page-bids auction failed: {e:?}"); - std::collections::HashMap::new() + let winning_bids = + if !matched_slots.is_empty() && consent_allows_auction && !is_bot && !is_prefetch { + let slots_ctx = MatchedSlotsContext { + matched_slots: &matched_slots, + request_path: &path_param, + }; + let mut auction_request = build_auction_request( + &slots_ctx, + &ec_id, + &consent_context, + &request_info, + req.get_header_str("user-agent"), + ); + auction_request.user.eids = parse_ts_eids_cookie(cookie_jar.as_ref()); + let timeout_ms = co_config + .auction_timeout_ms + .unwrap_or(settings.auction.timeout_ms); + let auction_context = AuctionContext { + settings, + request: &req, + client_info: services.client_info(), + timeout_ms, + provider_responses: None, + services, + }; + match orchestrator + .run_auction(&auction_request, &auction_context, services) + .await + { + Ok(result) => result.winning_bids, + Err(e) => { + log::warn!("page-bids auction failed: {e:?}"); + std::collections::HashMap::new() + } } - } - } else { - std::collections::HashMap::new() - }; + } else { + std::collections::HashMap::new() + }; let bid_map = build_bid_map(&winning_bids, co_config.price_granularity); @@ -2507,6 +2559,7 @@ mod tests { .into_iter() .collect(), providers: Default::default(), + compiled_patterns: Vec::new(), } } @@ -2725,6 +2778,7 @@ mod tests { floor_price: Some(0.50), targeting: Default::default(), providers: Default::default(), + compiled_patterns: Vec::new(), }], } } @@ -2771,6 +2825,79 @@ mod tests { ); } + #[tokio::test] + async fn bot_user_agent_returns_slots_but_no_bids() { + // Crawlers should get slot definitions (so HTML structure is unchanged) + // but the server must not burn SSP request quota running a real auction + // for them. Same gate the publisher path applies. + let settings = settings_with_co(); + let orchestrator = AuctionOrchestrator::new(settings.auction.clone()); + let services = noop_services(); + let slots_file = file_with_article_slot(); + let mut req = make_page_bids_request("/2024/01/my-article/"); + req.set_header("user-agent", "Mozilla/5.0 (compatible; Googlebot/2.1)"); + + let response = handle_page_bids(&settings, &orchestrator, &services, &slots_file, req) + .await + .expect("should return ok response"); + + let body: serde_json::Value = + serde_json::from_slice(&response.into_body_bytes()).expect("should be json"); + + assert_eq!( + body["slots"] + .as_array() + .expect("slots should be array") + .len(), + 1, + "bot request should still get slot definitions" + ); + assert_eq!( + body["bids"] + .as_object() + .expect("bids should be object") + .len(), + 0, + "bot request must not run an auction (no SSP cost burned for crawlers)" + ); + } + + #[tokio::test] + async fn prefetch_request_returns_slots_but_no_bids() { + // Navigations triggered by Sec-Purpose=prefetch should not fire real + // SSP auctions — the user has not yet visited the page. + let settings = settings_with_co(); + let orchestrator = AuctionOrchestrator::new(settings.auction.clone()); + let services = noop_services(); + let slots_file = file_with_article_slot(); + let mut req = make_page_bids_request("/2024/01/my-article/"); + req.set_header("sec-purpose", "prefetch"); + + let response = handle_page_bids(&settings, &orchestrator, &services, &slots_file, req) + .await + .expect("should return ok response"); + + let body: serde_json::Value = + serde_json::from_slice(&response.into_body_bytes()).expect("should be json"); + + assert_eq!( + body["slots"] + .as_array() + .expect("slots should be array") + .len(), + 1, + "prefetch request should still get slot definitions" + ); + assert_eq!( + body["bids"] + .as_object() + .expect("bids should be object") + .len(), + 0, + "prefetch request must not run an auction" + ); + } + #[tokio::test] async fn url_not_matching_any_pattern_returns_empty_response() { // Slots exist but request path does not match — no auction, no injection.