From b150d7d85c6d37ef56bf041ca4123e4986f63d12 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 14 Nov 2023 14:07:42 +0100 Subject: [PATCH] Lower default GraphQL parser recursion limit to 500 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous default of 4096 was too high. I can reliable make the Router process (compiled in release mode) abort with a stack overflow with ~2400 nested selection sets. `deeply_nested.rs` demonstrates it. It as "benchmark" because that’s an easy way to get access to a Router compiled in release mode. It is not run on CI because CI currently does not compile in release mode. Note that the Router cannot handle response JSON data more nested than 128 levels anyway:`serde_json::Deserializer` has its own recursion limit, hard-coded to 128. It can be disabled (with both run-time *and* compile-time opt-in) but not changed. Our corpus of 5.3 million operations from customers has 8 entries that use more recursion than the new default. See also: * https://github.com/apollographql/apollo-rs/pull/662 * https://github.com/apollographql/apollo-rs/pull/721 --- .../fix_simon_recursion_limit_default.md | 19 ++ apollo-router/Cargo.toml | 4 + apollo-router/benches/deeply_nested.rs | 222 ++++++++++++++++++ .../benches/deeply_nested/router.yaml | 11 + .../benches/deeply_nested/supergraph.graphql | 34 +++ apollo-router/src/configuration/mod.rs | 6 +- ...nfiguration__tests__schema_generation.snap | 6 +- .../testdata/metrics/limits.router.yaml | 2 +- apollo-router/src/configuration/tests.rs | 1 + docs/source/configuration/overview.mdx | 4 +- 10 files changed, 300 insertions(+), 9 deletions(-) create mode 100644 .changesets/fix_simon_recursion_limit_default.md create mode 100644 apollo-router/benches/deeply_nested.rs create mode 100644 apollo-router/benches/deeply_nested/router.yaml create mode 100644 apollo-router/benches/deeply_nested/supergraph.graphql diff --git a/.changesets/fix_simon_recursion_limit_default.md b/.changesets/fix_simon_recursion_limit_default.md new file mode 100644 index 0000000000..a4741ad767 --- /dev/null +++ b/.changesets/fix_simon_recursion_limit_default.md @@ -0,0 +1,19 @@ +### Lower default GraphQL parser recursion limit to 500 ([PR #4205](https://github.com/apollographql/router/pull/4205)) + +The GraphQL parser uses recursion for nested selection sets, list values, or object values. +The nesting level is limited to protect against stack overflow. +This changes the default limit to 500, from 4096 which we found to be too high. +It is possible to change the limit (or backport the new default to older Router versions) +in YAML configuration: + +```yaml +limits: + parser_max_recursion: 700 +``` + +However deeply-nested selection sets often cause deeply-nested response data. +When handling a response from a subgraph, +the JSON parser has its own recursion limit of 128 nesting levels. +That limit is not configurable. + +By [@SimonSapin](https://github.com/SimonSapin) in https://github.com/apollographql/router/pull/4205 diff --git a/apollo-router/Cargo.toml b/apollo-router/Cargo.toml index 64e77e69d9..fec5b1ac68 100644 --- a/apollo-router/Cargo.toml +++ b/apollo-router/Cargo.toml @@ -325,3 +325,7 @@ path = "tests/integration_tests.rs" [[bench]] name = "huge_requests" harness = false + +[[bench]] +name = "deeply_nested" +harness = false diff --git a/apollo-router/benches/deeply_nested.rs b/apollo-router/benches/deeply_nested.rs new file mode 100644 index 0000000000..3a0c31bdfb --- /dev/null +++ b/apollo-router/benches/deeply_nested.rs @@ -0,0 +1,222 @@ +//! Exercise the Router, compiled in release mode, with very deeply-nested selections sets +//! and response data. +//! +//! Run with `cargo bench --bench deeply_nested` + +#![allow(clippy::single_char_add_str)] // don’t care + +use std::fmt::Write; + +use futures::stream::StreamExt; +use serde_json_bytes::Value; +use tokio::io::AsyncBufReadExt; +use tokio::process::Command; + +const ROUTER_EXE: &str = env!("CARGO_BIN_EXE_router"); + +// chosen by fair dice roll. guaranteed to be random. https://xkcd.com/221/ +const SUBGRAPH_PORT: u16 = 44168; // hard-coded in deeply_nested/supergraph.graphql + +const SUPERGRAPH_PORT: u16 = 44167; // hard-coded in deeply_nested/router.yaml + +const VERBOSE: bool = false; + +#[tokio::main] +async fn main() { + if VERBOSE { + println!("Router executable: {ROUTER_EXE}"); + } + assert!(ROUTER_EXE.contains("release")); + macro_rules! request { + ($nesting_level: expr) => {{ + let level = $nesting_level; + let result = graphql_client(level).await; + if let Err(error) = &result { + if VERBOSE { + if error.len() < 200 { + println!("Error at {level} nesting levels: {error}\n"); + } else { + println!( + "Error at {level} nesting levels: {}[…]{}\n", + &error[..100], + &error[error.len() - 100..] + ); + } + } + } + result + }}; + } + + let _subgraph = spawn_subgraph(); + + let graphql_recursion_limit = 5_000; + let _router = spawn_router(graphql_recursion_limit).await; + + assert_eq!( + request!(8).unwrap().to_string(), + r#"{"value":0,"next":{"value":1,"next":{"value":1,"next":{"value":2,"next":{"value":3,"next":{"value":5,"next":{"value":8,"next":{"value":13,"next":{"value":21}}}}}}}}}"# + ); + + assert!(request!(125).is_ok()); + + // JSON parser recursion limit in serde_json::Deserializier + assert!(request!(126) + .unwrap_err() + .contains("service 'subgraph_1' response was malformed: recursion limit exceeded")); + + // Stack overflow: the router process aborts before it can send a response + // + // As of commit 6e426ddf2fe9480210dfa74c85040db498c780a2 (Router 1.33.2+), + // with Rust 1.72.0 on aarch64-apple-darwin, this happens starting at ~2400 nesting levels. + assert!(request!(3000) + .unwrap_err() + .contains("connection closed before message completed")); + + let graphql_recursion_limit = 500; + let _router = spawn_router(graphql_recursion_limit).await; + + // GraphQL parser recursion limit in apollo-parser + assert!(request!(500) + .unwrap_err() + .contains("Error: parser recursion limit reached")); +} + +async fn spawn_router(graphql_recursion_limit: usize) -> tokio::process::Child { + let mut child = Command::new(ROUTER_EXE) + .args(["-s", "supergraph.graphql", "-c", "router.yaml"]) + .env("PARSER_MAX_RECURSION", graphql_recursion_limit.to_string()) + .current_dir( + std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("benches") + .join("deeply_nested"), + ) + .kill_on_drop(true) + .stdout(std::process::Stdio::piped()) + .stderr(if VERBOSE { + std::process::Stdio::inherit() + } else { + std::process::Stdio::null() + }) + .spawn() + .unwrap(); + + let mut router_stdout = tokio::io::BufReader::new(child.stdout.take().unwrap()).lines(); + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + tokio::spawn(async move { + let mut tx = Some(tx); + while let Some(line) = router_stdout.next_line().await.unwrap() { + if line.contains("GraphQL endpoint exposed") { + if let Some(tx) = tx.take() { + let _ = tx.send(()); + // Don’t stop here, keep consuming output so the pipe doesn’t block on a full buffer + } + } + if VERBOSE { + println!("{}", line); + } + } + }); + rx.await.unwrap(); + child +} + +async fn graphql_client(nesting_level: usize) -> Result { + let mut json = String::from(r#"{"query":"{value"#); + for _ in 0..nesting_level { + json.push_str(" next{value"); + } + for _ in 0..nesting_level { + json.push_str("}"); + } + json.push_str(r#"}"}"#); + let request = hyper::Request::post(format!("http://127.0.0.1:{SUPERGRAPH_PORT}")) + .header("content-type", "application/json") + .header("fibonacci-iterations", nesting_level) + .body(json.into()) + .unwrap(); + let client = hyper::Client::new(); + let mut response = client.request(request).await.map_err(|e| e.to_string())?; + let body = hyper::body::to_bytes(response.body_mut()) + .await + .map_err(|e| e.to_string())?; + let json = serde_json::from_slice::(&body).map_err(|e| e.to_string())?; + if let Some(errors) = json.get("errors") { + if !errors.is_null() { + return Err(errors.to_string()); + } + } + Ok(json.get("data").cloned().unwrap_or(Value::Null)) +} + +fn spawn_subgraph() -> ShutdownOnDrop { + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + let shutdown_on_drop = ShutdownOnDrop(Some(tx)); + + let service = hyper::service::make_service_fn(|_| async { + Ok::<_, hyper::Error>(hyper::service::service_fn(subgraph)) + }); + let server = hyper::Server::bind(&([127, 0, 0, 1], SUBGRAPH_PORT).into()) + .serve(service) + .with_graceful_shutdown(async { + let _ = rx.await; + }); + tokio::spawn(async move { + if let Err(e) = server.await { + eprintln!("server error: {}", e); + } + }); + shutdown_on_drop +} + +async fn subgraph( + request: hyper::Request, +) -> Result, hyper::Error> { + let nesting_level = request + .headers() + .get("fibonacci-iterations") + .unwrap() + .to_str() + .unwrap() + .parse::() + .unwrap(); + // Read the request body and prompty ignore it + request + .into_body() + .for_each(|chunk| { + let _: &[u8] = &chunk.unwrap(); + async {} + }) + .await; + // Assume we got a GraphQL request with that many nested selection sets + let mut json = String::from(r#"{"data":{"value":0"#); + let mut a = 1; + let mut b = 1; + for _ in 0..nesting_level { + json.push_str(r#","next":{"value":"#); + write!(&mut json, "{a}").unwrap(); + let next = a + b; + a = b; + b = next; + } + for _ in 0..nesting_level { + json.push_str("}"); + } + json.push_str("}}"); + let mut response = hyper::Response::new(hyper::Body::from(json)); + let application_json = hyper::header::HeaderValue::from_static("application/json"); + response + .headers_mut() + .insert("content-type", application_json); + Ok(response) +} + +struct ShutdownOnDrop(Option>); + +impl Drop for ShutdownOnDrop { + fn drop(&mut self) { + if let Some(tx) = self.0.take() { + let _ = tx.send(()); + } + } +} diff --git a/apollo-router/benches/deeply_nested/router.yaml b/apollo-router/benches/deeply_nested/router.yaml new file mode 100644 index 0000000000..9938b15a2d --- /dev/null +++ b/apollo-router/benches/deeply_nested/router.yaml @@ -0,0 +1,11 @@ +supergraph: + listen: 127.0.0.1:44167 +limits: + parser_max_recursion: ${env.PARSER_MAX_RECURSION} +include_subgraph_errors: + all: true +headers: + all: + request: + - propagate: + named: fibonacci-iterations diff --git a/apollo-router/benches/deeply_nested/supergraph.graphql b/apollo-router/benches/deeply_nested/supergraph.graphql new file mode 100644 index 0000000000..e28b86e773 --- /dev/null +++ b/apollo-router/benches/deeply_nested/supergraph.graphql @@ -0,0 +1,34 @@ +schema + @core(feature: "https://specs.apollo.dev/core/v0.1") + @core(feature: "https://specs.apollo.dev/join/v0.1") +{ + query: Query +} + +directive @core(feature: String!) repeatable on SCHEMA + +directive @join__field( + graph: join__Graph + requires: join__FieldSet + provides: join__FieldSet +) on FIELD_DEFINITION + +directive @join__type( + graph: join__Graph! + key: join__FieldSet +) repeatable on OBJECT | INTERFACE + +directive @join__owner(graph: join__Graph!) on OBJECT | INTERFACE + +directive @join__graph(name: String!, url: String!) on ENUM_VALUE + +scalar join__FieldSet + +enum join__Graph { + SUBGRAPH_1 @join__graph(name: "subgraph_1", url: "http://127.0.0.1:44168/") +} + +type Query { + value: Int! + next: Query +} diff --git a/apollo-router/src/configuration/mod.rs b/apollo-router/src/configuration/mod.rs index 09b7da6677..66163ff92c 100644 --- a/apollo-router/src/configuration/mod.rs +++ b/apollo-router/src/configuration/mod.rs @@ -758,7 +758,7 @@ pub(crate) struct Limits { pub(crate) warn_only: bool, /// Limit recursion in the GraphQL parser to protect against stack overflow. - /// default: 4096 + /// default: 500 pub(crate) parser_max_recursion: usize, /// Limit the number of tokens the GraphQL parser processes before aborting. @@ -783,8 +783,8 @@ impl Default for Limits { // This is `apollo-parser`’s default, which protects against stack overflow // but is still very high for "reasonable" queries. - // https://docs.rs/apollo-parser/0.2.8/src/apollo_parser/parser/mod.rs.html#368 - parser_max_recursion: 4096, + // https://github.com/apollographql/apollo-rs/blob/apollo-parser%400.7.3/crates/apollo-parser/src/parser/mod.rs#L93-L104 + parser_max_recursion: 500, } } } diff --git a/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap b/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap index 11a20f21e8..70807f321d 100644 --- a/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap +++ b/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap @@ -1645,7 +1645,7 @@ expression: "&schema" "max_root_fields": null, "max_aliases": null, "warn_only": false, - "parser_max_recursion": 4096, + "parser_max_recursion": 500, "parser_max_tokens": 15000, "experimental_http_max_request_bytes": 2000000 }, @@ -1691,8 +1691,8 @@ expression: "&schema" "nullable": true }, "parser_max_recursion": { - "description": "Limit recursion in the GraphQL parser to protect against stack overflow. default: 4096", - "default": 4096, + "description": "Limit recursion in the GraphQL parser to protect against stack overflow. default: 500", + "default": 500, "type": "integer", "format": "uint", "minimum": 0.0 diff --git a/apollo-router/src/configuration/testdata/metrics/limits.router.yaml b/apollo-router/src/configuration/testdata/metrics/limits.router.yaml index 6f7069ec85..07b68dd40c 100644 --- a/apollo-router/src/configuration/testdata/metrics/limits.router.yaml +++ b/apollo-router/src/configuration/testdata/metrics/limits.router.yaml @@ -4,6 +4,6 @@ limits: warn_only: true max_root_fields: 1 parser_max_tokens: 15000 - parser_max_recursion: 4096 + parser_max_recursion: 500 max_height: 2 max_aliases: 2 diff --git a/apollo-router/src/configuration/tests.rs b/apollo-router/src/configuration/tests.rs index 8ef806aefb..4b15fe66a5 100644 --- a/apollo-router/src/configuration/tests.rs +++ b/apollo-router/src/configuration/tests.rs @@ -400,6 +400,7 @@ fn validate_project_config_files() { std::env::set_var("ZIPKIN_HOST", "http://example.com"); std::env::set_var("TEST_CONFIG_ENDPOINT", "http://example.com"); std::env::set_var("TEST_CONFIG_COLLECTOR_ENDPOINT", "http://example.com"); + std::env::set_var("PARSER_MAX_RECURSION", "500"); #[cfg(not(unix))] let filename_matcher = Regex::from_str("((.+[.])?router\\.yaml)|(.+\\.mdx)").unwrap(); diff --git a/docs/source/configuration/overview.mdx b/docs/source/configuration/overview.mdx index 720f4de57b..6fc50130e7 100644 --- a/docs/source/configuration/overview.mdx +++ b/docs/source/configuration/overview.mdx @@ -696,7 +696,7 @@ limits: # Parser-based limits parser_max_tokens: 15000 # Default value - parser_max_recursion: 4096 # Default value + parser_max_recursion: 500 # Default value # Operation-based limits (Enterprise only) max_depth: 100 @@ -738,7 +738,7 @@ The default value is `15000`. Limits the deepest level of recursion allowed by the router's GraphQL parser to prevent stack overflows. This corresponds to the deepest nesting level of any single GraphQL operation or fragment defined in a query document. -The default value is `4096`. +The default value is `500`. In the example below, the `GetProducts` operation has a recursion of three, and the `ProductVariation` fragment has a recursion of two. Therefore, the _max_ recursion of the query document is three.