Skip to content

Commit

Permalink
fix: Excessive reference inlining in large schemas
Browse files Browse the repository at this point in the history
Ref: #945
  • Loading branch information
Stranger6667 committed Dec 25, 2020
1 parent 2ca9f50 commit 94be3eb
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 6 deletions.
3 changes: 3 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ Changelog
This change allows Schemathesis to generate test cases even for endpoints containing optional path parameters (which is not compliant with the spec). `#941`_
- Using ``--auth`` together with ``--header`` that sets the ``Authorization`` header causes a validation error.
Before, the ``--header`` value was ignored in such cases, and the basic auth passed in ``--auth`` was used. `#911`_
- When ``hypothesis-jsonschema`` fails to resolve recursive references, the test is skipped with an error message that indicates why it happens.

**Fixed**

- Allow generating requests without payload if the schema does not require it. `#916`_
- Allow sending ``null`` as request payload if the schema expects it. `#919`_
- CLI failure if the tested operation is `GET` and has payload examples. `#925`_
- Excessive reference inlining that leads to out-of-memory for large schemas with deep references. `#945`_

**Removed**

Expand Down Expand Up @@ -1612,6 +1614,7 @@ Deprecated
.. _0.3.0: https://github.com/schemathesis/schemathesis/compare/v0.2.0...v0.3.0
.. _0.2.0: https://github.com/schemathesis/schemathesis/compare/v0.1.0...v0.2.0

.. _#945: https://github.com/schemathesis/schemathesis/issues/945
.. _#941: https://github.com/schemathesis/schemathesis/issues/941
.. _#934: https://github.com/schemathesis/schemathesis/issues/934
.. _#925: https://github.com/schemathesis/schemathesis/issues/925
Expand Down
7 changes: 7 additions & 0 deletions src/schemathesis/extra/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from _pytest.nodes import Node
from _pytest.python import Class, Function, FunctionDefinition, Metafunc, Module, PyCollector
from hypothesis.errors import InvalidArgument
from hypothesis_jsonschema._canonicalise import HypothesisRefResolutionError
from packaging import version

from .. import DataGenerationMethod
Expand Down Expand Up @@ -168,3 +169,9 @@ def pytest_pyfunc_call(pyfuncitem): # type:ignore
outcome.get_result()
except InvalidArgument as exc:
pytest.fail(exc.args[0])
except HypothesisRefResolutionError:
pytest.skip(
"Currently, Schemathesis can't generate data for this operation due to "
"recursive references in the operation definition. See more information in this issue - "
"https://github.com/Zac-HD/hypothesis-jsonschema/issues/33"
)
10 changes: 10 additions & 0 deletions src/schemathesis/runner/impl/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import hypothesis
import requests
from _pytest.logging import LogCaptureHandler, catching_logs
from hypothesis_jsonschema._canonicalise import HypothesisRefResolutionError
from requests.auth import HTTPDigestAuth, _basic_auth_str

from ...constants import DEFAULT_DEADLINE, DEFAULT_STATEFUL_RECURSION_LIMIT, USER_AGENT, DataGenerationMethod
Expand Down Expand Up @@ -144,6 +145,15 @@ def run_test( # pylint: disable=too-many-locals
error = reraise(exc)
status = Status.error
result.add_error(error)
except HypothesisRefResolutionError:
status = Status.error
result.add_error(
hypothesis.errors.Unsatisfiable(
"Currently, Schemathesis can't generate data for this operation due to "
"recursive references in the operation definition. See more information in this issue - "
"https://github.com/Zac-HD/hypothesis-jsonschema/issues/33"
)
)
except Exception as error:
status = Status.error
result.add_error(error)
Expand Down
3 changes: 3 additions & 0 deletions src/schemathesis/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,9 @@ def get_links(self, endpoint: Endpoint) -> Dict[str, Dict[str, Any]]:
def validate_response(self, endpoint: Endpoint, response: GenericResponse) -> None:
raise NotImplementedError

def prepare_schema(self, schema: Any) -> Any:
raise NotImplementedError


def endpoints_to_dict(endpoints: Generator[Endpoint, None, None]) -> Dict[str, CaseInsensitiveDict]:
output: Dict[str, CaseInsensitiveDict] = {}
Expand Down
7 changes: 5 additions & 2 deletions src/schemathesis/specs/openapi/_hypothesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ...exceptions import InvalidSchema
from ...hooks import GLOBAL_HOOK_DISPATCHER, HookContext, HookDispatcher
from ...models import Case, Endpoint
from ...schemas import BaseSchema
from ...utils import NOT_SET
from .constants import LOCATION_TO_CONTAINER
from .parameters import OpenAPIParameter, parameters_to_json_schema
Expand Down Expand Up @@ -112,7 +113,7 @@ def get_case_strategy( # pylint: disable=too-many-locals
if body is NOT_SET:
if endpoint.body:
parameter = draw(st.sampled_from(endpoint.body.items))
strategy = _get_body_strategy(parameter, to_strategy)
strategy = _get_body_strategy(parameter, to_strategy, endpoint.schema)
media_type = parameter.media_type
body = draw(strategy)
else:
Expand All @@ -137,9 +138,10 @@ def get_case_strategy( # pylint: disable=too-many-locals


def _get_body_strategy(
parameter: OpenAPIParameter, to_strategy: Callable[[Dict[str, Any]], st.SearchStrategy]
parameter: OpenAPIParameter, to_strategy: Callable[[Dict[str, Any]], st.SearchStrategy], parent_schema: BaseSchema
) -> st.SearchStrategy:
schema = parameter.as_json_schema()
schema = parent_schema.prepare_schema(schema)
strategy = to_strategy(schema)
if not parameter.is_required:
strategy |= st.just(NOT_SET)
Expand All @@ -158,6 +160,7 @@ def get_parameters_strategy(
# contains errors.
# In this case, we know that the `required` keyword should always be `True`.
schema["required"] = list(schema["properties"])
schema = endpoint.schema.prepare_schema(schema)
strategy = to_strategy(schema)
serialize = endpoint.get_parameter_serializer(location)
if serialize is not None:
Expand Down
23 changes: 20 additions & 3 deletions src/schemathesis/specs/openapi/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from copy import deepcopy
from difflib import get_close_matches
from json import JSONDecodeError
from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Sequence, Tuple, Type, Union
from typing import Any, Callable, ClassVar, Dict, Generator, Iterable, List, Optional, Sequence, Tuple, Type, Union
from urllib.parse import urlsplit

import jsonschema
Expand Down Expand Up @@ -44,7 +44,7 @@
OpenAPI30Parameter,
OpenAPIParameter,
)
from .references import ConvertingResolver
from .references import RECURSION_DEPTH_LIMIT, ConvertingResolver
from .security import BaseSecurityProcessor, OpenAPISecurityProcessor, SwaggerSecurityProcessor
from .stateful import create_state_machine

Expand All @@ -55,6 +55,7 @@ class BaseOpenAPISchema(BaseSchema):
operations: Tuple[str, ...]
security: BaseSecurityProcessor
parameter_cls: Type[OpenAPIParameter]
component_locations: ClassVar[Tuple[str, ...]] = ()
_endpoints_by_operation_id: Dict[str, Endpoint]

@property # pragma: no mutate
Expand Down Expand Up @@ -82,7 +83,9 @@ def get_all_endpoints(self) -> Generator[Endpoint, None, None]:
continue
self.dispatch_hook("before_process_path", context, path, methods)
scope, raw_methods = self._resolve_methods(methods)
methods = self.resolver.resolve_all(methods)
# Setting a low recursion limit doesn't solve the problem with recursive references & inlining too much
# but decreases the number of cases when Schemathesis stuck on this step.
methods = self.resolver.resolve_all(methods, RECURSION_DEPTH_LIMIT - 5)
common_parameters = get_common_parameters(methods)
for method, resolved_definition in methods.items():
# Only method definitions are parsed
Expand Down Expand Up @@ -362,6 +365,18 @@ def validate_response(self, endpoint: Endpoint, response: GenericResponse) -> No
) from exc
return None # explicitly return None for mypy

def prepare_schema(self, schema: Any) -> Any:
"""Inline Open API definitions.
Inlining components helps `hypothesis-jsonschema` generate data that involves non-resolved references.
"""
schema = deepcopy(schema)
# Different spec versions allow different keywords to store possible reference targets
for key in self.component_locations:
if key in self.raw_schema:
schema[key] = deepcopy(self.raw_schema[key])
return schema


@contextmanager
def in_scopes(resolver: jsonschema.RefResolver, scopes: List[str]) -> Generator[None, None, None]:
Expand All @@ -388,6 +403,7 @@ class SwaggerV20(BaseOpenAPISchema):
operations: Tuple[str, ...] = ("get", "put", "post", "delete", "options", "head", "patch")
parameter_cls: Type[OpenAPIParameter] = OpenAPI20Parameter
security = SwaggerSecurityProcessor()
component_locations: ClassVar[Tuple[str, ...]] = ("definitions", "parameters", "responses")
links_field = "x-links"

@property
Expand Down Expand Up @@ -521,6 +537,7 @@ class OpenApi30(SwaggerV20): # pylint: disable=too-many-ancestors
operations = SwaggerV20.operations + ("trace",)
security = OpenAPISecurityProcessor()
parameter_cls = OpenAPI30Parameter
component_locations = ("components",)
links_field = "links"

@property
Expand Down
2 changes: 1 addition & 1 deletion test/test_hypothesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,6 @@ def test_optional_payload(request, spec_version):
}
jsonschema.validate(raw_schema, OPENAPI_30)
schema = schemathesis.from_dict(raw_schema)
strategy = _get_body_strategy(schema["/users"]["post"].body[0], make_positive_strategy)
strategy = _get_body_strategy(schema["/users"]["post"].body[0], make_positive_strategy, schema)
# Then `None` could be generated by Schemathesis
assert find(strategy, lambda x: x is NOT_SET) is NOT_SET

0 comments on commit 94be3eb

Please sign in to comment.