From ccffac7130b5014705c4a128a41ec9bf9e937978 Mon Sep 17 00:00:00 2001 From: David Harsha Date: Sat, 14 Oct 2023 11:16:16 -0700 Subject: [PATCH] Specify formats in meta schemas And use the same code paths as custom formats. This fixes an issue where older drafts have access to formats that aren't in the specification. --- lib/json_schemer.rb | 28 +-- lib/json_schemer/draft201909/meta.rb | 1 + lib/json_schemer/draft202012/meta.rb | 21 ++ .../draft202012/vocab/format_annotation.rb | 10 +- .../draft202012/vocab/format_assertion.rb | 8 +- lib/json_schemer/draft4/meta.rb | 4 + lib/json_schemer/draft6/meta.rb | 9 + lib/json_schemer/draft7/meta.rb | 3 + lib/json_schemer/format.rb | 203 ++++++++++-------- lib/json_schemer/openapi30/meta.rb | 5 + lib/json_schemer/openapi31/meta.rb | 8 + lib/json_schemer/schema.rb | 11 +- test/format_test.rb | 24 ++- 13 files changed, 198 insertions(+), 137 deletions(-) diff --git a/lib/json_schemer.rb b/lib/json_schemer.rb index f2c2817b..ecce26d7 100644 --- a/lib/json_schemer.rb +++ b/lib/json_schemer.rb @@ -145,6 +145,7 @@ def draft202012 @draft202012 ||= Schema.new( Draft202012::SCHEMA, :base_uri => Draft202012::BASE_URI, + :formats => Draft202012::FORMATS, :ref_resolver => Draft202012::Meta::SCHEMAS.to_proc, :regexp_resolver => 'ecma' ) @@ -154,6 +155,7 @@ def draft201909 @draft201909 ||= Schema.new( Draft201909::SCHEMA, :base_uri => Draft201909::BASE_URI, + :formats => Draft201909::FORMATS, :ref_resolver => Draft201909::Meta::SCHEMAS.to_proc, :regexp_resolver => 'ecma' ) @@ -164,6 +166,7 @@ def draft7 Draft7::SCHEMA, :vocabulary => { 'json-schemer://draft7' => true }, :base_uri => Draft7::BASE_URI, + :formats => Draft7::FORMATS, :regexp_resolver => 'ecma' ) end @@ -173,6 +176,7 @@ def draft6 Draft6::SCHEMA, :vocabulary => { 'json-schemer://draft6' => true }, :base_uri => Draft6::BASE_URI, + :formats => Draft6::FORMATS, :regexp_resolver => 'ecma' ) end @@ -182,6 +186,7 @@ def draft4 Draft4::SCHEMA, :vocabulary => { 'json-schemer://draft4' => true }, :base_uri => Draft4::BASE_URI, + :formats => Draft4::FORMATS, :regexp_resolver => 'ecma' ) end @@ -190,16 +195,9 @@ def openapi31 @openapi31 ||= Schema.new( OpenAPI31::SCHEMA, :base_uri => OpenAPI31::BASE_URI, + :formats => OpenAPI31::FORMATS, :ref_resolver => OpenAPI31::Meta::SCHEMAS.to_proc, - :regexp_resolver => 'ecma', - # https://spec.openapis.org/oas/latest.html#data-types - :formats => { - 'int32' => proc { |instance, _value| instance.is_a?(Integer) && instance.bit_length <= 32 }, - 'int64' => proc { |instance, _value| instance.is_a?(Integer) && instance.bit_length <= 64 }, - 'float' => proc { |instance, _value| instance.is_a?(Float) }, - 'double' => proc { |instance, _value| instance.is_a?(Float) }, - 'password' => proc { |_instance, _value| true } - } + :regexp_resolver => 'ecma' ) end @@ -211,17 +209,9 @@ def openapi30 'json-schemer://openapi30' => true }, :base_uri => OpenAPI30::BASE_URI, + :formats => OpenAPI30::FORMATS, :ref_resolver => OpenAPI30::Meta::SCHEMAS.to_proc, - :regexp_resolver => 'ecma', - :formats => { - 'int32' => proc { |instance, _value| instance.is_a?(Integer) && instance.bit_length <= 32 }, - 'int64' => proc { |instance, _value| instance.is_a?(Integer) && instance.bit_length <= 64 }, - 'float' => proc { |instance, _value| instance.is_a?(Float) }, - 'double' => proc { |instance, _value| instance.is_a?(Float) }, - 'byte' => proc { |instance, _value| Format.decode_content_encoding(instance, 'base64').first }, - 'binary' => proc { |instance, _value| instance.is_a?(String) && instance.encoding == Encoding::ASCII_8BIT }, - 'password' => proc { |_instance, _value| true } - } + :regexp_resolver => 'ecma' ) end diff --git a/lib/json_schemer/draft201909/meta.rb b/lib/json_schemer/draft201909/meta.rb index ac307a70..daeced90 100644 --- a/lib/json_schemer/draft201909/meta.rb +++ b/lib/json_schemer/draft201909/meta.rb @@ -2,6 +2,7 @@ module JSONSchemer module Draft201909 BASE_URI = URI('https://json-schema.org/draft/2019-09/schema') + FORMATS = Draft202012::FORMATS SCHEMA = { '$schema' => 'https://json-schema.org/draft/2019-09/schema', '$id' => 'https://json-schema.org/draft/2019-09/schema', diff --git a/lib/json_schemer/draft202012/meta.rb b/lib/json_schemer/draft202012/meta.rb index e31d7625..14e47c81 100644 --- a/lib/json_schemer/draft202012/meta.rb +++ b/lib/json_schemer/draft202012/meta.rb @@ -2,6 +2,27 @@ module JSONSchemer module Draft202012 BASE_URI = URI('https://json-schema.org/draft/2020-12/schema') + FORMATS = { + 'date-time' => Format::DATE_TIME, + 'date' => Format::DATE, + 'time' => Format::TIME, + 'duration' => Format::DURATION, + 'email' => Format::EMAIL, + 'idn-email' => Format::IDN_EMAIL, + 'hostname' => Format::HOSTNAME, + 'idn-hostname' => Format::IDN_HOSTNAME, + 'ipv4' => Format::IPV4, + 'ipv6' => Format::IPV6, + 'uri' => Format::URI, + 'uri-reference' => Format::URI_REFERENCE, + 'iri' => Format::IRI, + 'iri-reference' => Format::IRI_REFERENCE, + 'uuid' => Format::UUID, + 'uri-template' => Format::URI_TEMPLATE, + 'json-pointer' => Format::JSON_POINTER, + 'relative-json-pointer' => Format::RELATIVE_JSON_POINTER, + 'regex' => Format::REGEX + } SCHEMA = { '$schema' => 'https://json-schema.org/draft/2020-12/schema', '$id' => 'https://json-schema.org/draft/2020-12/schema', diff --git a/lib/json_schemer/draft202012/vocab/format_annotation.rb b/lib/json_schemer/draft202012/vocab/format_annotation.rb index 34c3b05b..9bc59cf7 100644 --- a/lib/json_schemer/draft202012/vocab/format_annotation.rb +++ b/lib/json_schemer/draft202012/vocab/format_annotation.rb @@ -4,20 +4,12 @@ module Draft202012 module Vocab module FormatAnnotation class Format < Keyword - extend JSONSchemer::Format - - DEFAULT_FORMAT = proc do |instance, value| - !instance.is_a?(String) || valid_spec_format?(instance, value) - rescue UnknownFormat - true - end - def error(formatted_instance_location:, **) "value at #{formatted_instance_location} does not match format: #{value}" end def parse - root.format && root.formats.fetch(value) { root.meta_schema.formats.fetch(value, DEFAULT_FORMAT) } + root.format && root.fetch_format(value, false) end def validate(instance, instance_location, keyword_location, _context) diff --git a/lib/json_schemer/draft202012/vocab/format_assertion.rb b/lib/json_schemer/draft202012/vocab/format_assertion.rb index 2ff6117c..05b5da16 100644 --- a/lib/json_schemer/draft202012/vocab/format_assertion.rb +++ b/lib/json_schemer/draft202012/vocab/format_assertion.rb @@ -4,18 +4,12 @@ module Draft202012 module Vocab module FormatAssertion class Format < Keyword - extend JSONSchemer::Format - - DEFAULT_FORMAT = proc do |instance, value| - !instance.is_a?(String) || valid_spec_format?(instance, value) - end - def error(formatted_instance_location:, **) "value at #{formatted_instance_location} does not match format: #{value}" end def parse - root.format && root.formats.fetch(value) { root.meta_schema.formats.fetch(value, DEFAULT_FORMAT) } + root.format && root.fetch_format(value) { raise UnknownFormat, value } end def validate(instance, instance_location, keyword_location, _context) diff --git a/lib/json_schemer/draft4/meta.rb b/lib/json_schemer/draft4/meta.rb index ed3421c8..10fc977b 100644 --- a/lib/json_schemer/draft4/meta.rb +++ b/lib/json_schemer/draft4/meta.rb @@ -2,6 +2,10 @@ module JSONSchemer module Draft4 BASE_URI = URI('http://json-schema.org/draft-04/schema#') + FORMATS = Draft6::FORMATS.dup + FORMATS.delete('uri-reference') + FORMATS.delete('uri-template') + FORMATS.delete('json-pointer') SCHEMA = { 'id' => 'http://json-schema.org/draft-04/schema#', '$schema' => 'http://json-schema.org/draft-04/schema#', diff --git a/lib/json_schemer/draft6/meta.rb b/lib/json_schemer/draft6/meta.rb index 9ffa8842..5fb958e0 100644 --- a/lib/json_schemer/draft6/meta.rb +++ b/lib/json_schemer/draft6/meta.rb @@ -2,6 +2,15 @@ module JSONSchemer module Draft6 BASE_URI = URI('http://json-schema.org/draft-06/schema#') + FORMATS = Draft7::FORMATS.dup + FORMATS.delete('date') + FORMATS.delete('time') + FORMATS.delete('idn-email') + FORMATS.delete('idn-hostname') + FORMATS.delete('iri') + FORMATS.delete('iri-reference') + FORMATS.delete('relative-json-pointer') + FORMATS.delete('regex') SCHEMA = { '$schema' => 'http://json-schema.org/draft-06/schema#', '$id' => 'http://json-schema.org/draft-06/schema#', diff --git a/lib/json_schemer/draft7/meta.rb b/lib/json_schemer/draft7/meta.rb index 3405404a..34396da9 100644 --- a/lib/json_schemer/draft7/meta.rb +++ b/lib/json_schemer/draft7/meta.rb @@ -2,6 +2,9 @@ module JSONSchemer module Draft7 BASE_URI = URI('http://json-schema.org/draft-07/schema#') + FORMATS = Draft201909::FORMATS.dup + FORMATS.delete('duration') + FORMATS.delete('uuid') SCHEMA = { '$schema' => 'http://json-schema.org/draft-07/schema#', '$id' => 'http://json-schema.org/draft-07/schema#', diff --git a/lib/json_schemer/format.rb b/lib/json_schemer/format.rb index fcf239ff..c11e339e 100644 --- a/lib/json_schemer/format.rb +++ b/lib/json_schemer/format.rb @@ -1,11 +1,71 @@ # frozen_string_literal: true module JSONSchemer module Format - include Duration - include Email - include Hostname - include JSONPointer - include URITemplate + # https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-validation-01#section-7.3 + DATE_TIME = proc do |instance, _format| + !instance.is_a?(String) || valid_date_time?(instance) + end + DATE = proc do |instance, _format| + !instance.is_a?(String) || valid_date_time?("#{instance}T04:05:06.123456789+07:00") + end + TIME = proc do |instance, _format| + !instance.is_a?(String) || valid_date_time?("2001-02-03T#{instance}") + end + DURATION = proc do |instance, _format| + !instance.is_a?(String) || valid_duration?(instance) + end + # https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-validation-01#section-7.3.2 + EMAIL = proc do |instance, _format| + !instance.is_a?(String) || instance.ascii_only? && valid_email?(instance) + end + IDN_EMAIL = proc do |instance, _format| + !instance.is_a?(String) || valid_email?(instance) + end + # https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-validation-01#section-7.3.3 + HOSTNAME = proc do |instance, _format| + !instance.is_a?(String) || instance.ascii_only? && valid_hostname?(instance) + end + IDN_HOSTNAME = proc do |instance, _format| + !instance.is_a?(String) || valid_hostname?(instance) + end + # https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-validation-01#section-7.3.4 + IPV4 = proc do |instance, _format| + !instance.is_a?(String) || valid_ip?(instance, Socket::AF_INET) + end + IPV6 = proc do |instance, _format| + !instance.is_a?(String) || valid_ip?(instance, Socket::AF_INET6) + end + # https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-validation-01#section-7.3.5 + URI = proc do |instance, _format| + !instance.is_a?(String) || valid_uri?(instance) + end + URI_REFERENCE = proc do |instance, _format| + !instance.is_a?(String) || valid_uri_reference?(instance) + end + IRI = proc do |instance, _format| + !instance.is_a?(String) || valid_uri?(iri_escape(instance)) + end + IRI_REFERENCE = proc do |instance, _format| + !instance.is_a?(String) || valid_uri_reference?(iri_escape(instance)) + end + UUID = proc do |instance, _format| + !instance.is_a?(String) || valid_uuid?(instance) + end + # https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-validation-01#section-7.3.6 + URI_TEMPLATE = proc do |instance, _format| + !instance.is_a?(String) || valid_uri_template?(instance) + end + # https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-validation-01#section-7.3.7 + JSON_POINTER = proc do |instance, _format| + !instance.is_a?(String) || valid_json_pointer?(instance) + end + RELATIVE_JSON_POINTER = proc do |instance, _format| + !instance.is_a?(String) || valid_relative_json_pointer?(instance) + end + # https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-validation-01#section-7.3.8 + REGEX = proc do |instance, _format| + !instance.is_a?(String) || valid_regex?(instance) + end DATE_TIME_OFFSET_REGEX = /(Z|[\+\-]([01][0-9]|2[0-3]):[0-5][0-9])\z/i.freeze HOUR_24_REGEX = /T24/.freeze @@ -20,6 +80,12 @@ module Format end.freeze class << self + include Duration + include Email + include Hostname + include JSONPointer + include URITemplate + def percent_encode(data, regexp) data = data.dup data.force_encoding(Encoding::ASCII_8BIT) @@ -52,101 +118,56 @@ def parse_content_media_type(data, content_media_type) raise UnknownContentMediaType, content_media_type end end - end - def valid_spec_format?(data, format) - case format - when 'date-time' - valid_date_time?(data) - when 'date' - valid_date_time?("#{data}T04:05:06.123456789+07:00") - when 'time' - valid_date_time?("2001-02-03T#{data}") - when 'email' - data.ascii_only? && valid_email?(data) - when 'idn-email' - valid_email?(data) - when 'hostname' - data.ascii_only? && valid_hostname?(data) - when 'idn-hostname' - valid_hostname?(data) - when 'ipv4' - valid_ip?(data, Socket::AF_INET) - when 'ipv6' - valid_ip?(data, Socket::AF_INET6) - when 'uri' - valid_uri?(data) - when 'uri-reference' - valid_uri_reference?(data) - when 'iri' - valid_uri?(iri_escape(data)) - when 'iri-reference' - valid_uri_reference?(iri_escape(data)) - when 'uri-template' - valid_uri_template?(data) - when 'json-pointer' - valid_json_pointer?(data) - when 'relative-json-pointer' - valid_relative_json_pointer?(data) - when 'regex' - valid_regex?(data) - when 'duration' - valid_duration?(data) - when 'uuid' - valid_uuid?(data) - else - raise UnknownFormat, format + def valid_date_time?(data) + return false if HOUR_24_REGEX.match?(data) + datetime = DateTime.rfc3339(data) + return false if LEAP_SECOND_REGEX.match?(data) && datetime.new_offset.strftime('%H:%M') != '23:59' + DATE_TIME_OFFSET_REGEX.match?(data) + rescue ArgumentError + false end - end - - def valid_date_time?(data) - return false if HOUR_24_REGEX.match?(data) - datetime = DateTime.rfc3339(data) - return false if LEAP_SECOND_REGEX.match?(data) && datetime.new_offset.strftime('%H:%M') != '23:59' - DATE_TIME_OFFSET_REGEX.match?(data) - rescue ArgumentError - false - end - def valid_ip?(data, family) - IPAddr.new(data, family) - IP_REGEX.match?(data) - rescue IPAddr::Error - false - end + def valid_ip?(data, family) + IPAddr.new(data, family) + IP_REGEX.match?(data) + rescue IPAddr::Error + false + end - def parse_uri_scheme(data) - scheme, _userinfo, _host, _port, _registry, _path, opaque, query, _fragment = URI::RFC3986_PARSER.split(data) - # URI::RFC3986_PARSER.parse allows spaces in these and I don't think it should - raise URI::InvalidURIError if INVALID_QUERY_REGEX.match?(query) || INVALID_QUERY_REGEX.match?(opaque) - scheme - end + def parse_uri_scheme(data) + scheme, _userinfo, _host, _port, _registry, _path, opaque, query, _fragment = ::URI::RFC3986_PARSER.split(data) + # ::URI::RFC3986_PARSER.parse allows spaces in these and I don't think it should + raise ::URI::InvalidURIError if INVALID_QUERY_REGEX.match?(query) || INVALID_QUERY_REGEX.match?(opaque) + scheme + end - def valid_uri?(data) - !!parse_uri_scheme(data) - rescue URI::InvalidURIError - false - end + def valid_uri?(data) + !!parse_uri_scheme(data) + rescue ::URI::InvalidURIError + false + end - def valid_uri_reference?(data) - parse_uri_scheme(data) - true - rescue URI::InvalidURIError - false - end + def valid_uri_reference?(data) + parse_uri_scheme(data) + true + rescue ::URI::InvalidURIError + false + end - def iri_escape(data) - Format.percent_encode(data, IRI_ESCAPE_REGEX) - end + def iri_escape(data) + Format.percent_encode(data, IRI_ESCAPE_REGEX) + end - def valid_regex?(data) - !!EcmaRegexp.ruby_equivalent(data) - rescue InvalidEcmaRegexp - false - end + def valid_regex?(data) + !!EcmaRegexp.ruby_equivalent(data) + rescue InvalidEcmaRegexp + false + end - def valid_uuid?(data) - UUID_REGEX.match?(data) || NIL_UUID == data + def valid_uuid?(data) + UUID_REGEX.match?(data) || NIL_UUID == data + end end end end diff --git a/lib/json_schemer/openapi30/meta.rb b/lib/json_schemer/openapi30/meta.rb index 4deee431..f89ed86c 100644 --- a/lib/json_schemer/openapi30/meta.rb +++ b/lib/json_schemer/openapi30/meta.rb @@ -2,6 +2,11 @@ module JSONSchemer module OpenAPI30 BASE_URI = URI('json-schemer://openapi30/schema') + # https://spec.openapis.org/oas/v3.0.3#data-types + FORMATS = OpenAPI31::FORMATS.merge( + 'byte' => proc { |instance, _value| Format.decode_content_encoding(instance, 'base64').first }, + 'binary' => proc { |instance, _value| instance.is_a?(String) && instance.encoding == Encoding::ASCII_8BIT } + ) SCHEMA = { 'id' => 'json-schemer://openapi30/schema', '$schema' => 'http://json-schema.org/draft-04/schema#', diff --git a/lib/json_schemer/openapi31/meta.rb b/lib/json_schemer/openapi31/meta.rb index 85e7c11e..e9bb7ac9 100644 --- a/lib/json_schemer/openapi31/meta.rb +++ b/lib/json_schemer/openapi31/meta.rb @@ -2,6 +2,14 @@ module JSONSchemer module OpenAPI31 BASE_URI = URI('https://spec.openapis.org/oas/3.1/dialect/base') + # https://spec.openapis.org/oas/v3.1.0#data-types + FORMATS = { + 'int32' => proc { |instance, _format| instance.is_a?(Integer) && instance.bit_length <= 32 }, + 'int64' => proc { |instance, _format| instance.is_a?(Integer) && instance.bit_length <= 64 }, + 'float' => proc { |instance, _format| instance.is_a?(Float) }, + 'double' => proc { |instance, _format| instance.is_a?(Float) }, + 'password' => proc { |_instance, _format| true } + } SCHEMA = { '$id' => 'https://spec.openapis.org/oas/3.1/dialect/base', '$schema' => 'https://json-schema.org/draft/2020-12/schema', diff --git a/lib/json_schemer/schema.rb b/lib/json_schemer/schema.rb index bdce6b8c..3c6ed8d4 100644 --- a/lib/json_schemer/schema.rb +++ b/lib/json_schemer/schema.rb @@ -10,7 +10,6 @@ def original_instance(instance_location) end include Output - include Format::JSONPointer DEFAULT_SCHEMA = Draft202012::BASE_URI.to_s.freeze SCHEMA_KEYWORD_CLASS = Draft202012::Vocab::Core::Schema @@ -160,7 +159,7 @@ def validate_instance(instance, instance_location, keyword_location, context) def resolve_ref(uri) pointer = '' - if valid_json_pointer?(uri.fragment) + if Format.valid_json_pointer?(uri.fragment) pointer = URI.decode_www_form_component(uri.fragment) uri.fragment = nil end @@ -288,6 +287,14 @@ def schema_pointer end end + def fetch_format(format, *args, &block) + if meta_schema == self + formats.fetch(format, *args, &block) + else + formats.fetch(format) { meta_schema.fetch_format(format, *args, &block) } + end + end + def id_keyword @id_keyword ||= (keywords.key?('$id') ? '$id' : 'id') end diff --git a/test/format_test.rb b/test/format_test.rb index b0599279..a1deec45 100644 --- a/test/format_test.rb +++ b/test/format_test.rb @@ -21,22 +21,28 @@ def test_it_ignores_unknown_format schemer = JSONSchemer.schema({ 'type' => 'string', 'format' => 'unknown' }) assert(schemer.valid?('1')) refute(schemer.valid?(1)) + schemer = JSONSchemer.schema({ 'maximum' => 1, 'format' => 'unknown' }) + assert(schemer.valid?(1)) + refute(schemer.valid?(2)) end def test_format_assertion_raises_unknown_format - meta = { + annotation = { + '$vocabulary' => { + 'https://json-schema.org/draft/2020-12/vocab/format-annotation' => true + } + } + assertion = { '$vocabulary' => { 'https://json-schema.org/draft/2020-12/vocab/format-assertion' => true } } - schemer = JSONSchemer.schema( - { - '$schema' => 'http://example.com', - 'format' => 'unknown' - }, - :ref_resolver => proc { meta } - ) - assert_raises(JSONSchemer::UnknownFormat) { schemer.validate('anything') } + schema = { + '$schema' => 'http://example.com', + 'format' => 'unknown' + } + assert(JSONSchemer.schema(schema, :ref_resolver => proc { annotation }).valid?('x')) + assert_raises(JSONSchemer::UnknownFormat) { JSONSchemer.schema(schema, :ref_resolver => proc { assertion }) } end def test_it_validates_spaces_in_uri_format