Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamically determine jsonschema validator #2812

Merged
merged 3 commits into from
Jan 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion altair/utils/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ def _validate(self):
# type: () -> None
"""Validate the spec against the schema."""
schema_dict = json.loads(pkgutil.get_data(*self.schema_path).decode("utf-8"))
validate_jsonschema(self.spec, schema_dict)
validate_jsonschema(
self.spec,
schema_dict,
)

def _repr_mimebundle_(self, include=None, exclude=None):
"""Return a MIME bundle for display in Jupyter frontends."""
Expand Down
33 changes: 21 additions & 12 deletions altair/utils/schemapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@

import jsonschema
import jsonschema.exceptions
import jsonschema.validators
import numpy as np
import pandas as pd

from altair import vegalite

JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator
# If DEBUG_MODE is True, then schema objects are converted to dict and
# validated at creation time. This slows things down, particularly for
# larger specs, but leads to much more useful tracebacks for the user.
Expand Down Expand Up @@ -44,7 +44,7 @@ def debug_mode(arg):
DEBUG_MODE = original


def validate_jsonschema(spec, schema, resolver=None):
def validate_jsonschema(spec, schema, rootschema=None):
# We don't use jsonschema.validate as this would validate the schema itself.
# Instead, we pass the schema directly to the validator class. This is done for
# two reasons: The schema comes from Vega-Lite and is not based on the user
Expand All @@ -54,9 +54,18 @@ def validate_jsonschema(spec, schema, resolver=None):
# e.g. '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef,
# (Gradient|string|null)>' would be a valid $ref in a Vega-Lite schema but
# it is not a valid URI reference due to the characters such as '<'.
validator = JSONSCHEMA_VALIDATOR(
schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver
)
if rootschema is not None:
validator_cls = jsonschema.validators.validator_for(rootschema)
resolver = jsonschema.RefResolver.from_schema(rootschema)
else:
validator_cls = jsonschema.validators.validator_for(schema)
# No resolver is necessary if the schema is already the full schema
resolver = None

validator_kwargs = {"resolver": resolver}
if hasattr(validator_cls, "FORMAT_CHECKER"):
validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
validator = validator_cls(schema, **validator_kwargs)
error = jsonschema.exceptions.best_match(validator.iter_errors(spec))
if error is not None:
raise error
Expand Down Expand Up @@ -177,7 +186,6 @@ class SchemaBase(object):
_schema = None
_rootschema = None
_class_is_valid_at_instantiation = True
_validator = JSONSCHEMA_VALIDATOR

def __init__(self, *args, **kwds):
# Two valid options for initialization, which should be handled by
Expand Down Expand Up @@ -466,8 +474,9 @@ def validate(cls, instance, schema=None):
"""
if schema is None:
schema = cls._schema
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return validate_jsonschema(instance, schema, resolver=resolver)
return validate_jsonschema(
instance, schema, rootschema=cls._rootschema or cls._schema
)

@classmethod
def resolve_references(cls, schema=None):
Expand All @@ -485,8 +494,9 @@ def validate_property(cls, name, value, schema=None):
"""
value = _todict(value, validate=False, context={})
props = cls.resolve_references(schema or cls._schema).get("properties", {})
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return validate_jsonschema(value, props.get(name, {}), resolver=resolver)
return validate_jsonschema(
value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
)

def __dir__(self):
return list(self._kwds.keys())
Expand Down Expand Up @@ -578,9 +588,8 @@ def from_dict(
if "anyOf" in schema or "oneOf" in schema:
schemas = schema.get("anyOf", []) + schema.get("oneOf", [])
for possible_schema in schemas:
resolver = jsonschema.RefResolver.from_schema(rootschema)
try:
validate_jsonschema(dct, possible_schema, resolver=resolver)
validate_jsonschema(dct, possible_schema, rootschema=rootschema)
except jsonschema.ValidationError:
continue
else:
Expand Down
44 changes: 43 additions & 1 deletion tests/utils/tests/test_schemapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import numpy as np

from altair import load_schema
from altair.utils.schemapi import (
UndefinedType,
SchemaBase,
Expand All @@ -17,6 +18,7 @@
SchemaValidationError,
)

_JSONSCHEMA_DRAFT = load_schema()["$schema"]
# Make tests inherit from _TestSchema, so that when we test from_dict it won't
# try to use SchemaBase objects defined elsewhere as wrappers.

Expand All @@ -29,6 +31,7 @@ def _default_wrapper_classes(cls):

class MySchema(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"definitions": {
"StringMapping": {
"type": "object",
Expand Down Expand Up @@ -65,6 +68,7 @@ class StringArray(_TestSchema):

class Derived(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"definitions": {
"Foo": {"type": "object", "properties": {"d": {"type": "string"}}},
"Bar": {"type": "string", "enum": ["A", "B"]},
Expand All @@ -90,7 +94,10 @@ class Bar(_TestSchema):


class SimpleUnion(_TestSchema):
_schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"anyOf": [{"type": "integer"}, {"type": "string"}],
}


class DefinitionUnion(_TestSchema):
Expand All @@ -100,18 +107,38 @@ class DefinitionUnion(_TestSchema):

class SimpleArray(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"type": "array",
"items": {"anyOf": [{"type": "integer"}, {"type": "string"}]},
}


class InvalidProperties(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"type": "object",
"properties": {"for": {}, "as": {}, "vega-lite": {}, "$schema": {}},
}


class Draft7Schema(_TestSchema):
_schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}


class Draft202012Schema(_TestSchema):
_schema = {
"$schema": "http://json-schema.org/draft/2020-12/schema#",
"properties": {
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}


def test_construct_multifaceted_schema():
dct = {
"a": {"foo": "bar"},
Expand Down Expand Up @@ -221,6 +248,21 @@ def test_undefined_singleton():
assert Undefined is UndefinedType()


def test_schema_validator_selection():
# Tests if the correct validator class is chosen based on the $schema
# property in the schema. Reason for the AttributeError below is, that Draft 2020-12
# introduced changes to the "items" keyword, see
# https://json-schema.org/draft/2020-12/release-notes.html#changes-to-
# items-and-additionalitems
dct = {
"e": ["a", "b"],
}

assert Draft7Schema.from_dict(dct).to_dict() == dct
with pytest.raises(AttributeError, match="'list' object has no attribute 'get'"):
Draft202012Schema.from_dict(dct)


@pytest.fixture
def dct():
return {
Expand Down
33 changes: 21 additions & 12 deletions tools/schemapi/schemapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

import jsonschema
import jsonschema.exceptions
import jsonschema.validators
import numpy as np
import pandas as pd

from altair import vegalite

JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator
# If DEBUG_MODE is True, then schema objects are converted to dict and
# validated at creation time. This slows things down, particularly for
# larger specs, but leads to much more useful tracebacks for the user.
Expand Down Expand Up @@ -42,7 +42,7 @@ def debug_mode(arg):
DEBUG_MODE = original


def validate_jsonschema(spec, schema, resolver=None):
def validate_jsonschema(spec, schema, rootschema=None):
# We don't use jsonschema.validate as this would validate the schema itself.
# Instead, we pass the schema directly to the validator class. This is done for
# two reasons: The schema comes from Vega-Lite and is not based on the user
Expand All @@ -52,9 +52,18 @@ def validate_jsonschema(spec, schema, resolver=None):
# e.g. '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef,
# (Gradient|string|null)>' would be a valid $ref in a Vega-Lite schema but
# it is not a valid URI reference due to the characters such as '<'.
validator = JSONSCHEMA_VALIDATOR(
schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver
)
if rootschema is not None:
validator_cls = jsonschema.validators.validator_for(rootschema)
resolver = jsonschema.RefResolver.from_schema(rootschema)
else:
validator_cls = jsonschema.validators.validator_for(schema)
# No resolver is necessary if the schema is already the full schema
resolver = None

validator_kwargs = {"resolver": resolver}
if hasattr(validator_cls, "FORMAT_CHECKER"):
validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
validator = validator_cls(schema, **validator_kwargs)
error = jsonschema.exceptions.best_match(validator.iter_errors(spec))
if error is not None:
raise error
Expand Down Expand Up @@ -175,7 +184,6 @@ class SchemaBase(object):
_schema = None
_rootschema = None
_class_is_valid_at_instantiation = True
_validator = JSONSCHEMA_VALIDATOR

def __init__(self, *args, **kwds):
# Two valid options for initialization, which should be handled by
Expand Down Expand Up @@ -464,8 +472,9 @@ def validate(cls, instance, schema=None):
"""
if schema is None:
schema = cls._schema
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return validate_jsonschema(instance, schema, resolver=resolver)
return validate_jsonschema(
instance, schema, rootschema=cls._rootschema or cls._schema
)

@classmethod
def resolve_references(cls, schema=None):
Expand All @@ -483,8 +492,9 @@ def validate_property(cls, name, value, schema=None):
"""
value = _todict(value, validate=False, context={})
props = cls.resolve_references(schema or cls._schema).get("properties", {})
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return validate_jsonschema(value, props.get(name, {}), resolver=resolver)
return validate_jsonschema(
value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
)

def __dir__(self):
return list(self._kwds.keys())
Expand Down Expand Up @@ -576,9 +586,8 @@ def from_dict(
if "anyOf" in schema or "oneOf" in schema:
schemas = schema.get("anyOf", []) + schema.get("oneOf", [])
for possible_schema in schemas:
resolver = jsonschema.RefResolver.from_schema(rootschema)
try:
validate_jsonschema(dct, possible_schema, resolver=resolver)
validate_jsonschema(dct, possible_schema, rootschema=rootschema)
except jsonschema.ValidationError:
continue
else:
Expand Down
44 changes: 43 additions & 1 deletion tools/schemapi/tests/test_schemapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import numpy as np

from altair import load_schema
from altair.utils.schemapi import (
UndefinedType,
SchemaBase,
Expand All @@ -15,6 +16,7 @@
SchemaValidationError,
)

_JSONSCHEMA_DRAFT = load_schema()["$schema"]
# Make tests inherit from _TestSchema, so that when we test from_dict it won't
# try to use SchemaBase objects defined elsewhere as wrappers.

Expand All @@ -27,6 +29,7 @@ def _default_wrapper_classes(cls):

class MySchema(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"definitions": {
"StringMapping": {
"type": "object",
Expand Down Expand Up @@ -63,6 +66,7 @@ class StringArray(_TestSchema):

class Derived(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"definitions": {
"Foo": {"type": "object", "properties": {"d": {"type": "string"}}},
"Bar": {"type": "string", "enum": ["A", "B"]},
Expand All @@ -88,7 +92,10 @@ class Bar(_TestSchema):


class SimpleUnion(_TestSchema):
_schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"anyOf": [{"type": "integer"}, {"type": "string"}],
}


class DefinitionUnion(_TestSchema):
Expand All @@ -98,18 +105,38 @@ class DefinitionUnion(_TestSchema):

class SimpleArray(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"type": "array",
"items": {"anyOf": [{"type": "integer"}, {"type": "string"}]},
}


class InvalidProperties(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"type": "object",
"properties": {"for": {}, "as": {}, "vega-lite": {}, "$schema": {}},
}


class Draft7Schema(_TestSchema):
_schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}


class Draft202012Schema(_TestSchema):
_schema = {
"$schema": "http://json-schema.org/draft/2020-12/schema#",
"properties": {
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}


def test_construct_multifaceted_schema():
dct = {
"a": {"foo": "bar"},
Expand Down Expand Up @@ -219,6 +246,21 @@ def test_undefined_singleton():
assert Undefined is UndefinedType()


def test_schema_validator_selection():
# Tests if the correct validator class is chosen based on the $schema
# property in the schema. Reason for the AttributeError below is, that Draft 2020-12
# introduced changes to the "items" keyword, see
# https://json-schema.org/draft/2020-12/release-notes.html#changes-to-
# items-and-additionalitems
dct = {
"e": ["a", "b"],
}

assert Draft7Schema.from_dict(dct).to_dict() == dct
with pytest.raises(AttributeError, match="'list' object has no attribute 'get'"):
Draft202012Schema.from_dict(dct)


@pytest.fixture
def dct():
return {
Expand Down