From aa94a4b24aee8b4c7d6074c6aaf413176672528b Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Wed, 1 May 2024 12:16:08 -0400 Subject: [PATCH] feat: Custom keyword validation (#473) Signed-off-by: Dmitry Dygalo Co-authored-by: Benjamin Tobler Co-authored-by: Benjamin Tobler Co-authored-by: Dmitry Dygalo --- CHANGELOG.md | 1 + jsonschema/src/compilation/mod.rs | 259 +++++++++++++++++++++++++- jsonschema/src/compilation/options.rs | 78 +++++++- jsonschema/src/error.rs | 23 ++- jsonschema/src/keywords/custom.rs | 84 +++++++++ jsonschema/src/keywords/mod.rs | 1 + jsonschema/src/lib.rs | 1 + 7 files changed, 440 insertions(+), 7 deletions(-) create mode 100644 jsonschema/src/keywords/custom.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e76acef..53edefa1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Added +- Custom keywords support. [#379](https://github.com/Stranger6667/jsonschema-rs/issues/429) - Expose `JsonPointerNode` that can be converted into `JSONPointer`. This is needed for the upcoming custom validators support. diff --git a/jsonschema/src/compilation/mod.rs b/jsonschema/src/compilation/mod.rs index 202c1bcd..0245ec7b 100644 --- a/jsonschema/src/compilation/mod.rs +++ b/jsonschema/src/compilation/mod.rs @@ -6,7 +6,7 @@ pub(crate) mod options; use crate::{ error::ErrorIterator, - keywords, + keywords::{self, custom::CustomKeyword, BoxedValidator}, output::Output, paths::{JSONPointer, JsonPointerNode}, primitive_type::{PrimitiveType, PrimitiveTypesBitMap}, @@ -198,7 +198,13 @@ pub(crate) fn compile_validators<'a>( { is_props = true; } - if let Some(validator) = context + // Check if this keyword is overridden, then check the standard definitions + if let Some(factory) = context.config.get_keyword_factory(keyword) { + let path = context.as_pointer_with(keyword.to_owned()); + let validator = CustomKeyword::new(factory.init(object, subschema, path)?); + let validator: BoxedValidator = Box::new(validator); + validators.push((keyword.clone(), validator)); + } else if let Some(validator) = context .config .draft() .get_validator(keyword) @@ -244,8 +250,17 @@ pub(crate) fn compile_validators<'a>( #[cfg(test)] mod tests { use super::JSONSchema; - use crate::error::ValidationError; - use serde_json::{from_str, json, Value}; + use crate::{ + error::{self, no_error, ValidationError}, + keywords::custom::Keyword, + paths::{JSONPointer, JsonPointerNode}, + primitive_type::PrimitiveType, + ErrorIterator, + }; + use num_cmp::NumCmp; + use once_cell::sync::Lazy; + use regex::Regex; + use serde_json::{from_str, json, Map, Value}; use std::{fs::File, io::Read, path::Path}; fn load(path: &str, idx: usize) -> Value { @@ -302,4 +317,240 @@ mod tests { ); assert_eq!(errors[1].to_string(), r#""a" is shorter than 3 characters"#); } + + #[test] + fn custom_keyword_definition() { + /// Define a custom validator that verifies the object's keys consist of + /// only ASCII representable characters. + /// NOTE: This could be done with `propertyNames` + `pattern` but will be slower due to + /// regex usage. + struct CustomObjectValidator; + impl Keyword for CustomObjectValidator { + fn validate<'instance>( + &self, + instance: &'instance Value, + instance_path: &JsonPointerNode, + ) -> ErrorIterator<'instance> { + let mut errors = vec![]; + for key in instance.as_object().unwrap().keys() { + if !key.is_ascii() { + let error = ValidationError::custom( + JSONPointer::default(), + instance_path.into(), + instance, + "Key is not ASCII", + ); + errors.push(error); + } + } + Box::new(errors.into_iter()) + } + + fn is_valid(&self, instance: &Value) -> bool { + for (key, _value) in instance.as_object().unwrap() { + if !key.is_ascii() { + return false; + } + } + true + } + } + + fn custom_object_type_factory<'a>( + _: &'a Map, + schema: &'a Value, + path: JSONPointer, + ) -> Result, ValidationError<'a>> { + const EXPECTED: &str = "ascii-keys"; + if schema.as_str().map_or(true, |key| key != EXPECTED) { + Err(ValidationError::constant_string( + JSONPointer::default(), + path, + schema, + EXPECTED, + )) + } else { + Ok(Box::new(CustomObjectValidator)) + } + } + + // Define a JSON schema that enforces the top level object has ASCII keys and has at least 1 property + let schema = + json!({ "custom-object-type": "ascii-keys", "type": "object", "minProperties": 1 }); + let compiled = JSONSchema::options() + .with_keyword("custom-object-type", custom_object_type_factory) + .compile(&schema) + .unwrap(); + + // Verify schema validation detects object with too few properties + let instance = json!({}); + assert!(compiled.validate(&instance).is_err()); + assert!(!compiled.is_valid(&instance)); + + // Verify validator succeeds on a valid custom-object-type + let instance = json!({ "a" : 1 }); + assert!(compiled.validate(&instance).is_ok()); + assert!(compiled.is_valid(&instance)); + + // Verify validator detects invalid custom-object-type + let instance = json!({ "å" : 1 }); + let error = compiled + .validate(&instance) + .expect_err("Should fail") + .next() + .expect("Not empty"); + assert_eq!(error.to_string(), "Key is not ASCII"); + assert!(!compiled.is_valid(&instance)); + } + + #[test] + fn custom_format_and_override_keyword() { + /// Check that a string has some number of digits followed by a dot followed by exactly 2 digits. + fn currency_format_checker(s: &str) -> bool { + static CURRENCY_RE: Lazy = Lazy::new(|| { + Regex::new("^(0|([1-9]+[0-9]*))(\\.[0-9]{2})$").expect("Invalid regex") + }); + CURRENCY_RE.is_match(s) + } + /// A custom keyword validator that overrides "minimum" + /// so that "minimum" may apply to "currency"-formatted strings as well. + struct CustomMinimumValidator { + limit: f64, + limit_val: Value, + with_currency_format: bool, + schema_path: JSONPointer, + } + + impl Keyword for CustomMinimumValidator { + fn validate<'instance>( + &self, + instance: &'instance Value, + instance_path: &JsonPointerNode, + ) -> ErrorIterator<'instance> { + if self.is_valid(instance) { + no_error() + } else { + error::error(ValidationError::minimum( + self.schema_path.clone(), + instance_path.into(), + instance, + self.limit_val.clone(), + )) + } + } + + fn is_valid(&self, instance: &Value) -> bool { + match instance { + // Numeric comparison should happen just like original behavior + Value::Number(instance) => { + if let Some(item) = instance.as_u64() { + !NumCmp::num_lt(item, self.limit) + } else if let Some(item) = instance.as_i64() { + !NumCmp::num_lt(item, self.limit) + } else { + let item = instance.as_f64().expect("Always valid"); + !NumCmp::num_lt(item, self.limit) + } + } + // String comparison should cast currency-formatted + Value::String(instance) => { + if self.with_currency_format && currency_format_checker(instance) { + // all preconditions for minimum applying are met + let value = instance + .parse::() + .expect("format validated by regex checker"); + !NumCmp::num_lt(value, self.limit) + } else { + true + } + } + // In all other cases, the "minimum" keyword should not apply + _ => true, + } + } + } + + /// Build a validator that overrides the standard `minimum` keyword + fn custom_minimum_factory<'a>( + parent: &'a Map, + schema: &'a Value, + schema_path: JSONPointer, + ) -> Result, ValidationError<'a>> { + let limit = if let Value::Number(limit) = schema { + limit.as_f64().expect("Always valid") + } else { + return Err(ValidationError::single_type_error( + // There is no metaschema definition for a custom keyword, hence empty `schema` pointer + JSONPointer::default(), + schema_path, + schema, + PrimitiveType::Number, + )); + }; + let with_currency_format = parent + .get("format") + .map_or(false, |format| format == "currency"); + Ok(Box::new(CustomMinimumValidator { + limit, + limit_val: schema.clone(), + with_currency_format, + schema_path, + })) + } + + // Schema includes both the custom format and the overridden keyword + let schema = json!({ "minimum": 2, "type": "string", "format": "currency" }); + let compiled = JSONSchema::options() + .with_format("currency", currency_format_checker) + .with_keyword("minimum", custom_minimum_factory) + .with_keyword("minimum-2", |_, _, _| todo!()) + .compile(&schema) + .expect("Invalid schema"); + + // Control: verify schema validation rejects non-string types + let instance = json!(15); + assert!(compiled.validate(&instance).is_err()); + assert!(!compiled.is_valid(&instance)); + + // Control: verify validator rejects ill-formatted strings + let instance = json!("not a currency"); + assert!(compiled.validate(&instance).is_err()); + assert!(!compiled.is_valid(&instance)); + + // Verify validator allows properly formatted strings that conform to custom keyword + let instance = json!("3.00"); + assert!(compiled.validate(&instance).is_ok()); + assert!(compiled.is_valid(&instance)); + + // Verify validator rejects properly formatted strings that do not conform to custom keyword + let instance = json!("1.99"); + assert!(compiled.validate(&instance).is_err()); + assert!(!compiled.is_valid(&instance)); + + // Define another schema that applies "minimum" to an integer to ensure original behavior + let schema = json!({ "minimum": 2, "type": "integer" }); + let compiled = JSONSchema::options() + .with_format("currency", currency_format_checker) + .with_keyword("minimum", custom_minimum_factory) + .compile(&schema) + .expect("Invalid schema"); + + // Verify schema allows integers greater than 2 + let instance = json!(3); + assert!(compiled.validate(&instance).is_ok()); + assert!(compiled.is_valid(&instance)); + + // Verify schema rejects integers less than 2 + let instance = json!(1); + assert!(compiled.validate(&instance).is_err()); + assert!(!compiled.is_valid(&instance)); + + // Invalid `minimum` value + let schema = json!({ "minimum": "foo" }); + let error = JSONSchema::options() + .with_keyword("minimum", custom_minimum_factory) + .compile(&schema) + .expect_err("Should fail"); + assert_eq!(error.to_string(), "\"foo\" is not of type \"number\""); + } } diff --git a/jsonschema/src/compilation/options.rs b/jsonschema/src/compilation/options.rs index d020e595..d3507486 100644 --- a/jsonschema/src/compilation/options.rs +++ b/jsonschema/src/compilation/options.rs @@ -5,8 +5,10 @@ use crate::{ DEFAULT_CONTENT_ENCODING_CHECKS_AND_CONVERTERS, }, content_media_type::{ContentMediaTypeCheckType, DEFAULT_CONTENT_MEDIA_TYPE_CHECKS}, + keywords::custom::KeywordFactory, + paths::JSONPointer, resolver::{DefaultResolver, Resolver, SchemaResolver}, - schemas, ValidationError, + schemas, Keyword, ValidationError, }; use ahash::AHashMap; use once_cell::sync::Lazy; @@ -275,6 +277,7 @@ pub struct CompilationOptions { validate_formats: Option, validate_schema: bool, ignore_unknown_formats: bool, + keywords: AHashMap>, } impl Default for CompilationOptions { @@ -289,6 +292,7 @@ impl Default for CompilationOptions { formats: AHashMap::default(), validate_formats: None, ignore_unknown_formats: true, + keywords: AHashMap::default(), } } } @@ -637,6 +641,78 @@ impl CompilationOptions { pub(crate) const fn are_unknown_formats_ignored(&self) -> bool { self.ignore_unknown_formats } + + /// Register a custom keyword definition. + /// + /// ## Example + /// + /// ```rust + /// # use jsonschema::{ErrorIterator, JSONSchema, paths::{JsonPointerNode, JSONPointer}, Keyword, ValidationError}; + /// # use serde_json::{json, Value, Map}; + /// # use std::{sync::Arc, iter::once}; + /// + /// struct MyCustomValidator; + /// + /// impl Keyword for MyCustomValidator { + /// fn validate<'instance>( + /// &self, + /// instance: &'instance Value, + /// instance_path: &JsonPointerNode, + /// ) -> ErrorIterator<'instance> { + /// // ... validate instance ... + /// if !instance.is_object() { + /// let error = ValidationError::custom( + /// JSONPointer::default(), + /// instance_path.into(), + /// instance, + /// "Boom!", + /// ); + /// Box::new(once(error)) + /// } else { + /// Box::new(None.into_iter()) + /// } + /// } + /// fn is_valid(&self, instance: &Value) -> bool { + /// // ... determine if instance is valid ... + /// true + /// } + /// } + /// + /// // You can create a factory function, or use a closure to create new validator instances. + /// fn custom_validator_factory<'a>( + /// parent: &'a Map, + /// schema: &'a Value, + /// path: JSONPointer, + /// ) -> Result, ValidationError<'a>> { + /// Ok(Box::new(MyCustomValidator)) + /// } + /// + /// assert!(JSONSchema::options() + /// .with_keyword("my-type", custom_validator_factory) + /// .with_keyword("my-type-with-closure", |_, _, _| Ok(Box::new(MyCustomValidator))) + /// .compile(&json!({ "my-type": "my-schema"})) + /// .expect("A valid schema") + /// .is_valid(&json!({ "a": "b"}))); + /// ``` + pub fn with_keyword(&mut self, name: N, factory: F) -> &mut Self + where + N: Into, + F: for<'a> Fn( + &'a serde_json::Map, + &'a serde_json::Value, + JSONPointer, + ) -> Result, ValidationError<'a>> + + Send + + Sync + + 'static, + { + self.keywords.insert(name.into(), Arc::new(factory)); + self + } + + pub(crate) fn get_keyword_factory(&self, name: &str) -> Option<&Arc> { + self.keywords.get(name) + } } // format name & a pointer to a check function type FormatKV<'a> = Option<(&'a &'static str, &'a fn(&str) -> bool)>; diff --git a/jsonschema/src/error.rs b/jsonschema/src/error.rs index e61039c0..201b3e73 100644 --- a/jsonschema/src/error.rs +++ b/jsonschema/src/error.rs @@ -7,8 +7,8 @@ use crate::{ use serde_json::{Map, Number, Value}; use std::{ borrow::Cow, - error, fmt, - fmt::Formatter, + error, + fmt::{self, Formatter}, io, iter::{empty, once}, str::Utf8Error, @@ -80,6 +80,8 @@ pub enum ValidationErrorKind { ContentEncoding { content_encoding: String }, /// The input value does not respect the defined contentMediaType ContentMediaType { content_media_type: String }, + /// Custom error message for user-defined validation. + Custom { message: String }, /// The input value doesn't match any of specified options. Enum { options: Value }, /// Value is too large. @@ -735,6 +737,22 @@ impl<'a> ValidationError<'a> { schema_path: JSONPointer::default(), } } + /// Create a new custom validation error. + pub fn custom( + schema_path: JSONPointer, + instance_path: JSONPointer, + instance: &'a Value, + message: impl Into, + ) -> ValidationError<'a> { + ValidationError { + instance_path, + instance: Cow::Borrowed(instance), + kind: ValidationErrorKind::Custom { + message: message.into(), + }, + schema_path, + } + } } impl error::Error for ValidationError<'_> {} @@ -994,6 +1012,7 @@ impl fmt::Display for ValidationError<'_> { .collect::>() .join(", ") ), + ValidationErrorKind::Custom { message } => f.write_str(message), } } } diff --git a/jsonschema/src/keywords/custom.rs b/jsonschema/src/keywords/custom.rs new file mode 100644 index 00000000..8d596bff --- /dev/null +++ b/jsonschema/src/keywords/custom.rs @@ -0,0 +1,84 @@ +use crate::{ + paths::{JSONPointer, JsonPointerNode}, + validator::Validate, + ErrorIterator, ValidationError, +}; +use serde_json::{Map, Value}; +use std::fmt::{Display, Formatter}; + +pub(crate) struct CustomKeyword { + inner: Box, +} + +impl CustomKeyword { + pub(crate) fn new(inner: Box) -> Self { + Self { inner } + } +} + +impl Display for CustomKeyword { + fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { + Ok(()) + } +} + +impl Validate for CustomKeyword { + fn validate<'instance>( + &self, + instance: &'instance Value, + instance_path: &JsonPointerNode, + ) -> ErrorIterator<'instance> { + self.inner.validate(instance, instance_path) + } + + fn is_valid(&self, instance: &Value) -> bool { + self.inner.is_valid(instance) + } +} + +/// Trait that allows implementing custom validation for keywords. +pub trait Keyword: Send + Sync { + /// Validate [instance](Value) according to a custom specification + /// + /// A custom keyword validator may be used when a validation that cannot be + /// easily or efficiently expressed in JSON schema. + /// + /// The custom validation is applied in addition to the JSON schema validation. + fn validate<'instance>( + &self, + instance: &'instance Value, + instance_path: &JsonPointerNode, + ) -> ErrorIterator<'instance>; + /// Validate [instance](Value) and return a boolean result. + /// Could be potentilly faster than `validate` method. + fn is_valid(&self, instance: &Value) -> bool; +} + +pub(crate) trait KeywordFactory: Send + Sync { + fn init<'a>( + &self, + parent: &'a Map, + schema: &'a Value, + path: JSONPointer, + ) -> Result, ValidationError<'a>>; +} + +impl KeywordFactory for F +where + F: for<'a> Fn( + &'a Map, + &'a Value, + JSONPointer, + ) -> Result, ValidationError<'a>> + + Send + + Sync, +{ + fn init<'a>( + &self, + parent: &'a Map, + schema: &'a Value, + path: JSONPointer, + ) -> Result, ValidationError<'a>> { + self(parent, schema, path) + } +} diff --git a/jsonschema/src/keywords/mod.rs b/jsonschema/src/keywords/mod.rs index 09de5cf3..71701fe5 100644 --- a/jsonschema/src/keywords/mod.rs +++ b/jsonschema/src/keywords/mod.rs @@ -6,6 +6,7 @@ pub(crate) mod boolean; pub(crate) mod const_; pub(crate) mod contains; pub(crate) mod content; +pub(crate) mod custom; pub(crate) mod dependencies; pub(crate) mod enum_; pub(crate) mod exclusive_maximum; diff --git a/jsonschema/src/lib.rs b/jsonschema/src/lib.rs index 4dda3f19..8ddb7edb 100644 --- a/jsonschema/src/lib.rs +++ b/jsonschema/src/lib.rs @@ -99,6 +99,7 @@ mod validator; pub use compilation::{options::CompilationOptions, JSONSchema}; pub use error::{ErrorIterator, ValidationError}; +pub use keywords::custom::Keyword; pub use resolver::{SchemaResolver, SchemaResolverError}; pub use schemas::Draft;