diff --git a/src/lib.rs b/src/lib.rs index 05112ce..60f82ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2735,6 +2735,35 @@ mod tests { "100.128.0.1 (NOT CGNAT) should be allowed" ); } + + // --- M1: Row-set expansion limit --- + + #[pg_test] + fn test_row_set_expansion_limit_via_dsl() { + // The row-set expansion limit (10,000 rows) is enforced inside + // expand_row_set(). This is tested thoroughly in the unit test + // types::tests::test_row_set_expansion_rejects_oversized_result. + // Here we just verify the types module is accessible and the limit works + // at the substitution layer by checking a small expansion works. + use crate::types::substitute_all; + use std::collections::HashMap; + + let mut results = HashMap::new(); + let json = r#"{"rows":[{"id":1},{"id":2}],"row_count":2}"#; + results.insert("batch".to_string(), json.to_string()); + + let sys = crate::types::SystemVars { + instance_id: "test1234".to_string(), + label: None, + }; + let vars = HashMap::new(); + let result = substitute_all("SELECT * FROM $batch.*", &results, &vars, &sys); + assert!(result.is_ok(), "Small row-set should expand successfully"); + assert!( + result.unwrap().contains("VALUES"), + "Should produce a VALUES clause" + ); + } } /// Required by `cargo pgrx test` diff --git a/src/types.rs b/src/types.rs index db2da4a..fb558f5 100644 --- a/src/types.rs +++ b/src/types.rs @@ -614,6 +614,10 @@ fn extract_column_value( /// Expand `$name.*` into an inline `VALUES` subquery (SQL) or JSON array (raw). fn expand_row_set(name: &str, json_str: &str, for_sql: bool) -> Result { + /// Maximum number of rows allowed in `$name.*` expansion to prevent + /// unbounded SQL string allocation from large result sets. + const MAX_ROWSET_EXPANSION: usize = 10_000; + let json: serde_json::Value = serde_json::from_str(json_str) .map_err(|e| format!("${name}.* — invalid result JSON: {e}"))?; @@ -622,6 +626,15 @@ fn expand_row_set(name: &str, json_str: &str, for_sql: bool) -> Result MAX_ROWSET_EXPANSION { + return Err(format!( + "${name}.* — result has {} rows, exceeding the maximum of {} for row-set expansion. \ + Use pagination or intermediate tables for large result sets.", + rows.len(), + MAX_ROWSET_EXPANSION + )); + } + if !for_sql { return Ok(serde_json::to_string(rows).unwrap()); } @@ -1679,4 +1692,50 @@ mod tests { "should accept graph within node count limit" ); } + + #[test] + fn test_row_set_expansion_rejects_oversized_result() { + // Build a JSON result with more than 10,000 rows + let mut rows = Vec::new(); + for i in 0..10_001 { + rows.push(serde_json::json!({"id": i})); + } + let json_str = serde_json::json!({"rows": rows, "row_count": 10_001}).to_string(); + let results = make_results(&[("big", &json_str)]); + + let result = substitute_all("SELECT * FROM $big.*", &results, &empty_vars(), &sys_vars()); + assert!( + result.is_err(), + "Should reject row-set expansion > 10,000 rows" + ); + let err = result.unwrap_err(); + assert!( + err.contains("exceeding the maximum"), + "Error should mention the limit, got: {err}" + ); + } + + #[test] + fn test_row_set_expansion_accepts_within_limit() { + // Build a JSON result with exactly 100 rows (well within limit) + let mut rows = Vec::new(); + for i in 0..100 { + rows.push(serde_json::json!({"id": i, "name": format!("item_{i}")})); + } + let json_str = serde_json::json!({"rows": rows, "row_count": 100}).to_string(); + let results = make_results(&[("batch", &json_str)]); + + let result = substitute_all( + "SELECT * FROM $batch.*", + &results, + &empty_vars(), + &sys_vars(), + ); + assert!( + result.is_ok(), + "Should accept row-set expansion within limit" + ); + let sql = result.unwrap(); + assert!(sql.contains("VALUES"), "Should produce VALUES clause"); + } }