Add logs and extend metrics for graphql_validation_mode: both (#3674)

This adds logging for query validation errors with either Rust or JS when there is a mismatch, i.e. one of them validates but the other does not. In other cases we are not really interested in the specific error (it will just go back to the user), so we don't need to log there. To log the Rust validation error well, I now store the ApolloDiagnostics that were produced on `Query{}`. `Query` is serializable for caching, but ApolloDiagnostic is not. Here I just skipped serializing `ApolloDiagnostic` so if `Query` is loaded from cache, it does not have the validation error stored. I'm not sure this is the right thing to do. The ApolloDiagnostics are later used after query planning (which may produce a JS validation error). So it's correct if we can ~safely assume that we only have valid Query instances cached. Otherwise we might get spurious error logs from this. - [ ] So is that a safe assumption? Reading the CachingQueryPlanner implementation I think it does only store errors (then it's not a `Query` instance) and fully successful planning (then it has run both Rust and JS validation already). So it looks fine, but it could be a bit brittle to rely on this. I also simplified the validation error printing which - [x] depends on apollographql/apollo-rs#630. - [x] and on #3675  **Checklist** Complete the checklist (and note appropriate exceptions) before a final PR is raised. - [ ] Changes are compatible[^1] - [ ] Documentation[^2] completed - [ ] Performance impact assessed and acceptable - Tests added and passing[^3] - [ ] Unit Tests - [ ] Integration Tests - [ ] Manual Tests **Exceptions** *Note any exceptions here* **Notes** [^1]. It may be appropriate to bring upcoming changes to the attention of other (impacted) groups. Please endeavour to do this before seeking PR approval. The mechanism for doing this will vary considerably, so use your judgement as to how and when to do this. [^2]. Configuration is an important part of many changes. Where applicable please try to document configuration examples. [^3]. Tick whichever testing boxes are applicable. If you are adding Manual Tests: - please document the manual testing (extensively) in the Exceptions. - please raise a separate issue to automate the test and label it (or ask for it to be labeled) as `manual test`
apollographql · Sep 12, 2023 · 81fc803 · 81fc803
1 parent 2915737
commit 81fc803
Show file tree

Hide file tree

Showing 4 changed files with 86 additions and 65 deletions.
diff --git a/apollo-router/src/error.rs b/apollo-router/src/error.rs
@@ -1,5 +1,4 @@
 //! Router errors.
-use std::io::IsTerminal;
 use std::sync::Arc;
 
 use displaydoc::Display;
@@ -11,7 +10,6 @@ use serde::Deserialize;
 use serde::Serialize;
 use thiserror::Error;
 use tokio::task::JoinError;
-use tracing::level_filters::LevelFilter;
 
 pub(crate) use crate::configuration::ConfigurationError;
 pub(crate) use crate::graphql::Error;
@@ -438,6 +436,14 @@ impl From<SpecError> for QueryPlannerError {
     }
 }
 
+impl From<ValidationErrors> for QueryPlannerError {
+    fn from(err: ValidationErrors) -> Self {
+        // This needs to be serializable, so eagerly stringify the non-serializable
+        // ApolloDiagnostics.
+        QueryPlannerError::SpecError(SpecError::ValidationError(err.to_string()))
+    }
+}
+
 impl From<router_bridge::error::Error> for QueryPlannerError {
     fn from(error: router_bridge::error::Error) -> Self {
         QueryPlannerError::RouterBridgeError(error)
@@ -503,9 +509,9 @@ pub(crate) enum SchemaError {
     UrlParse(String, http::uri::InvalidUri),
     /// Could not find an URL for subgraph {0}
     MissingSubgraphUrl(String),
-    /// GraphQL parser error(s).
+    /// GraphQL parser error: {0}
     Parse(ParseErrors),
-    /// GraphQL parser or validation error(s).
+    /// GraphQL validation error: {0}
     Validate(ValidationErrors),
     /// Api error(s): {0}
     Api(String),
@@ -520,11 +526,16 @@ pub(crate) struct ParseErrors {
 impl std::fmt::Display for ParseErrors {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let mut errors = self.errors.iter();
-        if let Some(error) = errors.next() {
-            write!(f, "{}", error.message())?;
+        for (i, error) in errors.by_ref().take(5).enumerate() {
+            if i > 0 {
+                f.write_str("\n")?;
+            }
+            // TODO(@goto-bus-stop): display line/column once that is exposed from apollo-rs
+            write!(f, "at index {}: {}", error.index(), error.message())?;
         }
-        for error in errors {
-            write!(f, "\n{}", error.message())?;
+        let remaining = errors.count();
+        if remaining > 0 {
+            write!(f, "\n...and {remaining} other errors")?;
         }
         Ok(())
     }
@@ -540,39 +551,15 @@ impl std::fmt::Display for ValidationErrors {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let mut errors = self.errors.iter();
         if let Some(error) = errors.next() {
-            write!(f, "{}", error.data)?;
+            write!(f, "at index {}: {}", error.location.offset(), error.data)?;
         }
         for error in errors {
-            write!(f, "\n{}", error.data)?;
+            write!(f, "\nat index {}: {}", error.location.offset(), error.data)?;
         }
         Ok(())
     }
 }
 
-impl ValidationErrors {
-    #[allow(clippy::needless_return)]
-    pub(crate) fn print(&self) {
-        if LevelFilter::current() == LevelFilter::OFF && cfg!(not(debug_assertions)) {
-            return;
-        } else if std::io::stdout().is_terminal() {
-            // Fancy reports for TTYs
-            self.errors.iter().for_each(|err| {
-                // `format!` works around https://github.com/rust-lang/rust/issues/107118
-                // to test the panic from https://github.com/apollographql/router/issues/2269
-                #[allow(clippy::format_in_format_args)]
-                {
-                    println!("{}", format!("{err}"));
-                }
-            });
-        } else {
-            // Best effort to display errors
-            self.errors.iter().for_each(|diag| {
-                println!("{}", diag.data);
-            });
-        };
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;

diff --git a/apollo-router/src/query_planner/bridge_query_planner.rs b/apollo-router/src/query_planner/bridge_query_planner.rs
@@ -275,42 +275,74 @@ impl BridgeQueryPlanner {
         operation: Option<String>,
         selections: Query,
     ) -> Result<QueryPlannerContent, QueryPlannerError> {
+        /// Compare errors from graphql-js and apollo-rs validation, and produce metrics on
+        /// whether they had the same result.
+        ///
+        /// The result isn't inspected deeply: it only checks validation success/failure.
+        fn compare_validation_errors(
+            js_validation_error: Option<&router_bridge::planner::PlanErrors>,
+            rs_validation_error: Option<&crate::error::ValidationErrors>,
+        ) {
+            let is_validation_error = js_validation_error
+                .map_or(false, |js| js.errors.iter().all(|err| err.validation_error));
+            match (is_validation_error, rs_validation_error) {
+                (false, Some(validation_error)) => {
+                    tracing::warn!(
+                        monotonic_counter.apollo.router.validation = 1u64,
+                        validation.source = VALIDATION_SOURCE_OPERATION,
+                        validation.result = VALIDATION_FALSE_POSITIVE,
+                        "validation mismatch: JS query planner did not report query validation error, but apollo-rs did"
+                    );
+                    tracing::warn!(
+                        "validation mismatch: Rust validation reported: {validation_error}"
+                    );
+                }
+                (true, None) => {
+                    tracing::warn!(
+                        monotonic_counter.apollo.router.validation = 1u64,
+                        validation.source = VALIDATION_SOURCE_OPERATION,
+                        validation.result = VALIDATION_FALSE_NEGATIVE,
+                        "validation mismatch: apollo-rs did not report query validation error, but JS query planner did"
+                    );
+                    tracing::warn!(
+                        "validation mismatch: JS validation reported: {}",
+                        // Unwrapping is safe because `is_validation_error` is true
+                        js_validation_error.unwrap(),
+                    );
+                }
+                // if JS and Rust implementations agree, we return the JS result for now.
+                _ => tracing::info!(
+                    monotonic_counter.apollo.router.validation = 1u64,
+                    validation.source = VALIDATION_SOURCE_OPERATION,
+                    validation.result = VALIDATION_MATCH,
+                ),
+            }
+        }
+
         let planner_result = self
             .planner
             .plan(filtered_query.clone(), operation.clone())
             .await
             .map_err(QueryPlannerError::RouterBridgeError)?
             .into_result()
             .map_err(|err| {
-                let is_validation_error = err.errors.iter().all(|err| err.validation_error);
-                match (is_validation_error, &selections.validation_error) {
-                    (false, Some(_)) => {
-                        tracing::warn!(
-                            monotonic_counter.apollo.router.validation = 1u64,
-                            validation.source = VALIDATION_SOURCE_OPERATION,
-                            validation.result = VALIDATION_FALSE_POSITIVE,
-                            "validation mismatch: JS query planner did not report query validation error, but apollo-rs did"
-                        );
-                    }
-                    (true, None) => {
-                        tracing::warn!(
-                            monotonic_counter.apollo.router.validation = 1u64,
-                            validation.source = VALIDATION_SOURCE_OPERATION,
-                            validation.result = VALIDATION_FALSE_NEGATIVE,
-                            "validation mismatch: apollo-rs did not report query validation error, but JS query planner did"
-                        );
-                    }
-                    // if JS and Rust implementations agree, we return the JS result for now.
-                    _ => tracing::info!(
-                            monotonic_counter.apollo.router.validation = 1u64,
-                            validation.source = VALIDATION_SOURCE_OPERATION,
-                            validation.result = VALIDATION_MATCH,
-                    ),
+                if matches!(
+                    self.configuration.experimental_graphql_validation_mode,
+                    GraphQLValidationMode::Both
+                ) {
+                    compare_validation_errors(Some(&err), selections.validation_error.as_ref());
                 }
 
                 QueryPlannerError::from(err)
             })?;
 
+        if matches!(
+            self.configuration.experimental_graphql_validation_mode,
+            GraphQLValidationMode::Both
+        ) {
+            compare_validation_errors(None, selections.validation_error.as_ref());
+        }
+
         // the `statsReportKey` field should match the original query instead of the filtered query, to index them all under the same query
         let operation_signature = if original_query != filtered_query {
             Some(

diff --git a/apollo-router/src/spec/query.rs b/apollo-router/src/spec/query.rs
@@ -69,9 +69,12 @@ pub(crate) struct Query {
     pub(crate) is_original: bool,
     /// Validation errors, used for comparison with the JS implementation.
     ///
+    /// `ValidationErrors` is not serde-serializable. If this comes from cache,
+    /// the plan ought also to be cached, so we should not need this value anyways.
     /// XXX(@goto-bus-stop): Remove when only Rust validation is used
     #[derivative(PartialEq = "ignore", Hash = "ignore")]
-    pub(crate) validation_error: Option<SpecError>,
+    #[serde(skip)]
+    pub(crate) validation_error: Option<ValidationErrors>,
 }
 
 #[derive(Debug, Serialize, Deserialize)]
@@ -318,7 +321,10 @@ impl Query {
     }
 
     /// Check for validation errors in a query in the compiler.
-    pub(crate) fn validate_query(compiler: &ApolloCompiler, id: FileId) -> Result<(), SpecError> {
+    pub(crate) fn validate_query(
+        compiler: &ApolloCompiler,
+        id: FileId,
+    ) -> Result<(), ValidationErrors> {
         // Bail out on validation errors, only if the input is expected to be valid
         let diagnostics = compiler.db.validate_executable(id);
         let errors = diagnostics
@@ -330,10 +336,7 @@ impl Query {
             return Ok(());
         }
 
-        let errors = ValidationErrors { errors };
-        errors.print();
-
-        Err(SpecError::ValidationError(errors.to_string()))
+        Err(ValidationErrors { errors })
     }
 
     /// Extract serializable data structures from the apollo-compiler HIR.

diff --git a/apollo-router/src/spec/schema.rs b/apollo-router/src/spec/schema.rs
@@ -91,7 +91,6 @@ impl Schema {
             let errors = ValidationErrors {
                 errors: diagnostics.clone(),
             };
-            errors.print();
 
             // Only error out if new validation is used: with `Both`, we take the legacy
             // validation as authoritative and only use the new result for comparison