diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 6d940cb0a..a3bd01e22 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -209,6 +209,13 @@ jobs: - name: Upload coverage to Codecov uses: "codecov/codecov-action@v1" + - name: Check Docstrings + run: > + nox + -db conda -r -v + --non-interactive + --session "doctests-${{ matrix.python-version }}" + - name: Check Docs run: > nox diff --git a/.gitignore b/.gitignore index 0d8362231..39e188cf5 100644 --- a/.gitignore +++ b/.gitignore @@ -113,7 +113,7 @@ venv.bak/ /asv_bench/results/ # Docs -docs/source/generated +docs/source/reference/generated # Nox .nox diff --git a/Makefile b/Makefile index 983f3c6bf..98107bb49 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ requirements: pip install -r requirements-dev.txt docs: - rm -rf docs/source/generated && \ + rm -rf docs/**/generated docs/**/methods docs/_build && \ python -m sphinx -E "docs/source" "docs/_build" -W && \ make -C docs doctest diff --git a/docs/source/API_reference.rst b/docs/source/API_reference.rst deleted file mode 100644 index cca8a43d5..000000000 --- a/docs/source/API_reference.rst +++ /dev/null @@ -1,167 +0,0 @@ -.. pandera package index documentation toctree - -.. currentmodule:: pandera - -API -=== - -The ``io`` module and built-in ``Hypothesis`` checks require a pandera -installation with the corresponding extension, see the -:ref:`installation` instructions for more details. - -Schemas -------- - -.. autosummary:: - :toctree: generated - :template: class.rst - :nosignatures: - - pandera.schemas.DataFrameSchema - pandera.schemas.SeriesSchema - - -Schema Components ------------------ - -.. autosummary:: - :toctree: generated - :template: class.rst - :nosignatures: - - pandera.schema_components.Column - pandera.schema_components.Index - pandera.schema_components.MultiIndex - - -Schema Models -------------- - -.. autosummary:: - :toctree: generated - :template: class.rst - :nosignatures: - - pandera.model.SchemaModel - -**Model Components** - -.. autosummary:: - :toctree: generated - :nosignatures: - - pandera.model_components.Field - pandera.model_components.check - pandera.model_components.dataframe_check - -**Typing** - -.. autosummary:: - :toctree: generated - :template: typing_module.rst - :nosignatures: - - pandera.typing - -**Config** - -.. autosummary:: - :toctree: generated - :template: model_component_class.rst - :nosignatures: - - pandera.model.BaseConfig - - -Checks ------- - -.. autosummary:: - :toctree: generated - :template: class.rst - :nosignatures: - - pandera.checks.Check - pandera.hypotheses.Hypothesis - - -Pandas Data Types ------------------ - -.. autosummary:: - :toctree: generated - :template: pandas_dtype_class.rst - :nosignatures: - - pandera.dtypes.DataType - - -Decorators ----------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - pandera.decorators.check_input - pandera.decorators.check_output - pandera.decorators.check_io - pandera.decorators.check_types - - -Schema Inference ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - pandera.schema_inference.infer_schema - - -IO Utils --------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - pandera.io.from_yaml - pandera.io.to_yaml - pandera.io.to_script - - -Data Synthesis Strategies -------------------------- - -.. autosummary:: - :toctree: generated - :template: strategies_module.rst - :nosignatures: - - pandera.strategies - - -Extensions ----------- - -.. autosummary:: - :toctree: generated - :template: module.rst - :nosignatures: - - pandera.extensions - - -Errors ------- - -.. autosummary:: - :toctree: generated - :template: class.rst - :nosignatures: - - pandera.errors.SchemaError - pandera.errors.SchemaErrors - pandera.errors.SchemaInitError - pandera.errors.SchemaDefinitionError diff --git a/docs/source/_templates/dtype.rst b/docs/source/_templates/dtype.rst new file mode 100644 index 000000000..7625a0dfe --- /dev/null +++ b/docs/source/_templates/dtype.rst @@ -0,0 +1,41 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + + {% block attributes %} + {% if attributes %} + .. rubric:: Attributes + + .. autosummary:: + :nosignatures: + + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + + {% endif %} + {% endblock %} + + {% block methods %} + {% if methods %} + .. rubric:: Methods + + .. autosummary:: + :nosignatures: + :toctree: methods + + {# Ignore the DateTime alias to avoid `WARNING: document isn't included in any toctree`#} + {% if objname != "DateTime" %} + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + + {%- if members and '__call__' in members %} + ~{{ name }}.__call__ + {%- endif %} + {%- endif %} + + {%- endif %} + {% endblock %} diff --git a/docs/source/conf.py b/docs/source/conf.py index f82b1237e..32feb47d7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -162,7 +162,7 @@ .. role:: green """ -autosummary_generate = ["API_reference.rst"] +autosummary_generate = True autosummary_filename_map = { "pandera.Check": "pandera.Check", "pandera.check": "pandera.check_decorator", @@ -174,6 +174,11 @@ "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), } +# strip prompts +copybutton_prompt_text = ( + r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " +) +copybutton_prompt_is_regexp = True # this is a workaround to filter out forward reference issue in # sphinx_autodoc_typehints diff --git a/docs/source/data_synthesis_strategies.rst b/docs/source/data_synthesis_strategies.rst index d49e4a115..3bbd410e0 100644 --- a/docs/source/data_synthesis_strategies.rst +++ b/docs/source/data_synthesis_strategies.rst @@ -4,8 +4,8 @@ .. _data synthesis strategies: -Data Synthesis Strategies (new) -=============================== +Data Synthesis Strategies +========================= *new in 0.6.0* diff --git a/docs/source/dataframe_schemas.rst b/docs/source/dataframe_schemas.rst index ec8b0fd7f..1fe814eef 100644 --- a/docs/source/dataframe_schemas.rst +++ b/docs/source/dataframe_schemas.rst @@ -10,7 +10,7 @@ DataFrame Schemas The :class:`~pandera.schemas.DataFrameSchema` class enables the specification of a schema that verifies the columns and index of a pandas ``DataFrame`` object. -The ``DataFrameSchema`` object consists of |column|_\s and an |index|_. +The :class:`~pandera.schemas.DataFrameSchema` object consists of |column|_\s and an |index|_. .. |column| replace:: ``Column`` .. |index| replace:: ``Index`` @@ -44,12 +44,25 @@ The ``DataFrameSchema`` object consists of |column|_\s and an |index|_. Column Validation ----------------- -A :class:`~pandera.schema_components.Column` must specify the properties of a column in a dataframe -object. It can be optionally verified for its data type, `null values`_ or +A :class:`~pandera.schema_components.Column` must specify the properties of a +column in a dataframe object. It can be optionally verified for its data type, +`null values`_ or duplicate values. The column can be coerced_ into the specified type, and the required_ parameter allows control over whether or not the column is allowed to be missing. +Similarly to pandas, the data type can be specified as: + +* a string alias, as long as it is recognized by pandas. +* a python type: `int`, `float`, `double`, `bool`, `str` +* a `numpy data type <(https://numpy.org/doc/stable/user/basics.types.html)>`_ +* a `pandas extension type <(https://pandas.pydata.org/pandas-docs/stable/user_guide/basics.html#dtypes)>`_: + it can be an instance (e.g `pd.CategoricalDtype(["a", "b"])`) or a + class (e.g `pandas.CategoricalDtype`) if it can be initialized with default + values. +* a pandera :class:`~pandera.dtypes.DataType`: it can also be an instance or a + class. + :ref:`Column checks` allow for the DataFrame's values to be checked against a user-provided function. ``Check`` objects also support :ref:`grouping` by a different column so that the user can make @@ -270,7 +283,7 @@ objects can also be used to validate columns in a dataframe on its own: validated_df = df.pipe(column1_schema).pipe(column2_schema) -For multi-column use cases, the ``DataFrameSchema`` is still recommended, but +For multi-column use cases, the :class:`~pandera.schemas.DataFrameSchema` is still recommended, but if you have one or a small number of columns to verify, using ``Column`` objects by themselves is appropriate. @@ -594,12 +607,13 @@ indexes by composing a list of ``pandera.Index`` objects. foo 2 3 -Get Pandas Datatypes --------------------- +Get Pandas Data Types +--------------------- Pandas provides a `dtype` parameter for casting a dataframe to a specific dtype -schema. ``DataFrameSchema`` provides a `dtype` property which returns a pandas -style dict. The keys of the dict are column names and values are the dtype. +schema. :class:`~pandera.schemas.DataFrameSchema` provides +a :attr:`~pandera.schemas.DataFrameSchema.dtypes` property which returns a +dictionary whose keys are column names and values are :class:`~pandera.dtypes.DataType`. Some examples of where this can be provided to pandas are: diff --git a/docs/source/dtypes.rst b/docs/source/dtypes.rst new file mode 100644 index 000000000..b687d0d19 --- /dev/null +++ b/docs/source/dtypes.rst @@ -0,0 +1,188 @@ +.. pandera documentation for check_input and check_output decorators + +.. currentmodule:: pandera + +.. _dtypes: + +Pandera Data Types (new) +======================== + +*new in 0.7.0* + +Motivations +~~~~~~~~~~~ + +Pandera defines its own interface for data types in order to abstract the +specifics of dataframe-like data structures in the python ecosystem, such +as Apache Spark, Apache Arrow and xarray. + +.. note:: In the following section ``Pandera Data Type`` refers to a + :class:`pandera.dtypes.DataType` object whereas ``native data type`` refers + to data types used by third-party libraries that Pandera supports (e.g. pandas). + +Most of the time, it is transparent to end users since pandera columns and +indexes accept native data types. However, it is possible to extend the pandera +interface by: + +* modifying the **data type check** performed during schema validation. +* modifying the behavior of the **coerce** argument for :class:`~pandea.schemas.DataFrameSchema`. +* adding your **own custom data types**. + +DataType basics +~~~~~~~~~~~~~~~ + +All pandera data types inherit from :class:`pandera.dtypes.DataType` and must +be hashable. + +A data type implements three key methods: + +* :meth:`pandera.dtypes.DataType.check` which validates that data types are equivalent. +* :meth:`pandera.dtypes.DataType.coerce` which coerces a data container + (e.g. :class:`pandas.Series`) to the data type. +* The dunder method ``__str__()`` which should output the native alias. + For example ``str(pandera.Float64) == "float64"`` + + +For pandera's validation methods to be aware of a data type, it has to be +registered with the targeted engine via :meth:`pandera.engines.engine.Engine.register_dtype`. +An engine is in charge of mapping a pandera :class:`~pandera.dtypes.DataType` +with a native data type counterpart belonging to a third-party library. The mapping +can be queried with :meth:`pandera.engines.engine.Engine.dtype`. + +As of pandera ``0.7.0``, only the pandas :class:`~pandera.engines.pandas_engine.Engine` +is supported. + + +Example +~~~~~~~ + +Let's extend :class:`pandas.BooleanDtype` coercion to handle the string +literals ``"True"`` and ``"False"``. + +.. testcode:: dtypes + + import pandas as pd + import pandera as pa + from pandera import dtypes + from pandera.engines import pandas_engine + + + @pandas_engine.Engine.register_dtype # step 1 + @dtypes.immutable # step 2 + class LiteralBool(pandas_engine.BOOL): # step 3 + def coerce(self, series: pd.Series) -> pd.Series: + """Coerce a pandas.Series to date types.""" + if pd.api.types.is_string_dtype(series): + series = series.replace({"True": 1, "False": 0}) + return series.astype("boolean") + + + data = pd.Series(["True", "False"], name="literal_bools") + + # step 4 + print( + pa.SeriesSchema(LiteralBool(), coerce=True, name="literal_bools") + .validate(data) + .dtype + ) + +.. testoutput:: dtypes + + boolean + +The example above performs the following steps: + +1. Register the data type with the pandas engine. +2. :func:`pandera.dtypes.immutable` creates an immutable (and hashable) + :func:`dataclass`. +3. Inherit :class:`pandera.engines.pandas_engine.BOOL`, which is the pandera + representation of :class:`pandas.BooleanDtype`. This is not mandatory but + it makes our life easier by having already implemented all the required + methods. +4. Check that our new data type can coerce the string literals. + +So far we did not override the default behavior: + +.. testcode:: dtypes + + import pandera as pa + + pa.SeriesSchema("boolean", coerce=True).validate(data) + + +.. testoutput:: dtypes + + Traceback (most recent call last): + ... + pandera.errors.SchemaError: Error while coercing 'literal_bools' to type boolean: Need to pass bool-like values + +To completely replace the default :class:`~pandera.engines.pandas_engine.BOOL`, +we need to supply all the equivalent representations to +:meth:`~pandera.engines.engine.Engine.register_dtype`. Behind the scenes, when +``pa.SeriesSchema("boolean")`` is called the corresponding pandera data type +is looked up using :meth:`pandera.engines.engine.Engine.dtype`. + +.. testcode:: dtypes + + print(f"before: {pandas_engine.Engine.dtype('boolean').__class__}") + + + @pandas_engine.Engine.register_dtype( + equivalents=["boolean", pd.BooleanDtype, pd.BooleanDtype()], + ) + @dtypes.immutable + class LiteralBool(pandas_engine.BOOL): + def coerce(self, series: pd.Series) -> pd.Series: + """Coerce a pandas.Series to date types.""" + if pd.api.types.is_string_dtype(series): + series = series.replace({"True": 1, "False": 0}) + return series.astype("boolean") + + + print(f"after: {pandas_engine.Engine.dtype('boolean').__class__}") + + for dtype in ["boolean", pd.BooleanDtype, pd.BooleanDtype()]: + pa.SeriesSchema(dtype, coerce=True).validate(data) + +.. testoutput:: dtypes + + before: + after: + +.. note:: For convenience, we specified both ``pd.BooleanDtype`` and + ``pd.BooleanDtype()`` as equivalents. That gives us more flexibility in + what pandera schemas can recognize (see last for-loop above). + +Parametrized data types +~~~~~~~~~~~~~~~~~~~~~~~ + +Some data types can be parametrized. One common example is +:class:`pandas.CategoricalDtype`. + +The ``equivalents`` argument of +:meth:`~pandera.engines.engine.Engine.register_dtype` does not handle +this situation but will automatically register a :func:`classmethod` with +signature ``from_parametrized_dtype(cls, equivalent:...)`` if the decorated +:class:`~pandera.dtypes.DataType` defines it. The ``equivalent`` argument must +be type-annotated because it is leveraged to dispatch the input of +:class:`~pandera.engines.engine.Engine.dtype` to the appropriate +``from_parametrized_dtype`` class method. + +For example, here is a snippet from :class:`pandera.engines.pandas_engine.Category`: + +.. code-block:: python + + import pandas as pd + from pandera import dtypes + + @classmethod + def from_parametrized_dtype( + cls, cat: Union[dtypes.Category, pd.CategoricalDtype] + ): + """Convert a categorical to + a Pandera :class:`pandera.dtypes.pandas_engine.Category`.""" + return cls(categories=cat.categories, ordered=cat.ordered) # type: ignore + + +.. note:: The dispatch mechanism relies on :func:`functools.singledispatch`. + Unlike the built-in implementation, :data:`typing.Union` is recognized. diff --git a/docs/source/extensions.rst b/docs/source/extensions.rst index de4928bd8..dd9be7344 100644 --- a/docs/source/extensions.rst +++ b/docs/source/extensions.rst @@ -4,8 +4,8 @@ .. _extensions: -Extensions (new) -================ +Extensions +========== *new in 0.6.0* diff --git a/docs/source/index.rst b/docs/source/index.rst index 3396d18e6..c49cf791d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -306,6 +306,7 @@ Submit issues, feature requests or bugfixes on series_schemas checks hypothesis + dtypes decorators schema_inference schema_models @@ -318,7 +319,7 @@ Submit issues, feature requests or bugfixes on :caption: Reference :hidden: - API_reference + reference/index .. toctree:: :maxdepth: 6 diff --git a/docs/source/reference/core.rst b/docs/source/reference/core.rst new file mode 100644 index 000000000..c39175c6e --- /dev/null +++ b/docs/source/reference/core.rst @@ -0,0 +1,35 @@ +.. _api-core: + +Schemas +======= + +.. autosummary:: + :toctree: generated + :template: class.rst + :nosignatures: + + pandera.schemas.DataFrameSchema + pandera.schemas.SeriesSchema + +Schema Components +================= + +.. autosummary:: + :toctree: generated + :template: class.rst + :nosignatures: + + pandera.schema_components.Column + pandera.schema_components.Index + pandera.schema_components.MultiIndex + +Checks +====== + +.. autosummary:: + :toctree: generated + :template: class.rst + :nosignatures: + + pandera.checks.Check + pandera.hypotheses.Hypothesis diff --git a/docs/source/reference/decorators.rst b/docs/source/reference/decorators.rst new file mode 100644 index 000000000..2506336f4 --- /dev/null +++ b/docs/source/reference/decorators.rst @@ -0,0 +1,13 @@ +.. _api-decorators: + +Decorators +========== + +.. autosummary:: + :toctree: generated + :nosignatures: + + pandera.decorators.check_input + pandera.decorators.check_output + pandera.decorators.check_io + pandera.decorators.check_types diff --git a/docs/source/reference/dtypes.rst b/docs/source/reference/dtypes.rst new file mode 100644 index 000000000..0fc166e75 --- /dev/null +++ b/docs/source/reference/dtypes.rst @@ -0,0 +1,94 @@ +.. _api-dtypes: + +Pandera Data Types +================== + +Library-agnostic dtypes +----------------------- + +.. autosummary:: + :toctree: generated + :template: dtype.rst + :nosignatures: + + pandera.dtypes.DataType + pandera.dtypes.Bool + pandera.dtypes.Timestamp + pandera.dtypes.DateTime + pandera.dtypes.Timedelta + pandera.dtypes.Category + pandera.dtypes.Float + pandera.dtypes.Float16 + pandera.dtypes.Float32 + pandera.dtypes.Float64 + pandera.dtypes.Float128 + pandera.dtypes.Int + pandera.dtypes.Int8 + pandera.dtypes.Int16 + pandera.dtypes.Int32 + pandera.dtypes.Int64 + pandera.dtypes.UInt + pandera.dtypes.UInt8 + pandera.dtypes.UInt16 + pandera.dtypes.UInt32 + pandera.dtypes.UInt64 + pandera.dtypes.Complex + pandera.dtypes.Complex64 + pandera.dtypes.Complex128 + pandera.dtypes.Complex256 + pandera.dtypes.String + + +Pandas-specific Dtypes +---------------------- + +Listed here for compatibility with pandera versions < 0.7. +Passing native pandas dtypes to pandera components is preferred. + +.. autosummary:: + :toctree: generated + :template: dtype.rst + :nosignatures: + + pandera.engines.pandas_engine.BOOL + pandera.engines.pandas_engine.INT8 + pandera.engines.pandas_engine.INT16 + pandera.engines.pandas_engine.INT32 + pandera.engines.pandas_engine.INT64 + pandera.engines.pandas_engine.UINT8 + pandera.engines.pandas_engine.UINT16 + pandera.engines.pandas_engine.UINT32 + pandera.engines.pandas_engine.UINT64 + pandera.engines.pandas_engine.STRING + pandera.engines.numpy_engine.Object + +Utility functions +----------------- + +.. autosummary:: + :toctree: generated + :nosignatures: + + pandera.dtypes.is_subdtype + pandera.dtypes.is_float + pandera.dtypes.is_int + pandera.dtypes.is_uint + pandera.dtypes.is_complex + pandera.dtypes.is_numeric + pandera.dtypes.is_bool + pandera.dtypes.is_string + pandera.dtypes.is_datetime + pandera.dtypes.is_timedelta + pandera.dtypes.immutable + +Engines +------- + +.. autosummary:: + :toctree: generated + :template: class.rst + :nosignatures: + + pandera.engines.engine.Engine + pandera.engines.numpy_engine.Engine + pandera.engines.pandas_engine.Engine diff --git a/docs/source/reference/errors.rst b/docs/source/reference/errors.rst new file mode 100644 index 000000000..74fac1bde --- /dev/null +++ b/docs/source/reference/errors.rst @@ -0,0 +1,14 @@ +.. _api-errors: + +Errors +====== + +.. autosummary:: + :toctree: generated + :template: class.rst + :nosignatures: + + pandera.errors.SchemaError + pandera.errors.SchemaErrors + pandera.errors.SchemaInitError + pandera.errors.SchemaDefinitionError diff --git a/docs/source/reference/extensions.rst b/docs/source/reference/extensions.rst new file mode 100644 index 000000000..617b5ed7a --- /dev/null +++ b/docs/source/reference/extensions.rst @@ -0,0 +1,11 @@ +.. _api-extensions: + +Extensions +========== + +.. autosummary:: + :toctree: generated + :template: module.rst + :nosignatures: + + pandera.extensions diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst new file mode 100644 index 000000000..3ec1d4713 --- /dev/null +++ b/docs/source/reference/index.rst @@ -0,0 +1,41 @@ +.. pandera package index documentation toctree + +.. currentmodule:: pandera + +API +=== + +.. list-table:: + :widths: 25 75 + + * - :ref:`Core ` + - The core objects for defining pandera schemas + * - :ref:`Data Types ` + - Data types for type checking and coercion. + * - :ref:`Schema Models ` + - Alternative class-based API for defining pandera schemas. + * - :ref:`Decorators ` + - Decorators for integrating pandera schemas with python functions. + * - :ref:`Schema Inference ` + - Bootstrap schemas from real data + * - :ref:`IO Utilities ` + - Utility functions for reading/writing schemas + * - :ref:`Strategies ` + - Module of functions for generating data from schemas. + * - :ref:`Extensions ` + - Utility functions for extending pandera functionality + * - :ref:`Errors ` + - Pandera-specific exceptions + +.. toctree:: + :hidden: + + core + schema_models + decorators + schema_inference + io + strategies + extensions + errors + dtypes diff --git a/docs/source/reference/io.rst b/docs/source/reference/io.rst new file mode 100644 index 000000000..2da272a14 --- /dev/null +++ b/docs/source/reference/io.rst @@ -0,0 +1,16 @@ +.. _api-io-utils: + +IO Utils +======== + +The ``io`` module and built-in ``Hypothesis`` checks require a pandera +installation with the corresponding extension, see the +:ref:`installation` instructions for more details. + +.. autosummary:: + :toctree: generated + :nosignatures: + + pandera.io.from_yaml + pandera.io.to_yaml + pandera.io.to_script diff --git a/docs/source/reference/schema_inference.rst b/docs/source/reference/schema_inference.rst new file mode 100644 index 000000000..179c151ac --- /dev/null +++ b/docs/source/reference/schema_inference.rst @@ -0,0 +1,10 @@ +.. _api-schema-inference: + +Schema Inference +================ + +.. autosummary:: + :toctree: generated + :nosignatures: + + pandera.schema_inference.infer_schema diff --git a/docs/source/reference/schema_models.rst b/docs/source/reference/schema_models.rst new file mode 100644 index 000000000..9468a3380 --- /dev/null +++ b/docs/source/reference/schema_models.rst @@ -0,0 +1,45 @@ +.. _api-schema-models: + +Schema Models +============= + +.. currentmodule:: pandera + +Schema Model +------------ + +.. autosummary:: + :toctree: generated + :template: class.rst + + pandera.model.SchemaModel + +Model Components +---------------- + +.. autosummary:: + :toctree: generated + + pandera.model_components.Field + pandera.model_components.check + pandera.model_components.dataframe_check + +Typing +------ + +.. autosummary:: + :toctree: generated + :template: typing_module.rst + :nosignatures: + + pandera.typing + +Config +------ + +.. autosummary:: + :toctree: generated + :template: model_component_class.rst + :nosignatures: + + pandera.model.BaseConfig diff --git a/docs/source/reference/strategies.rst b/docs/source/reference/strategies.rst new file mode 100644 index 000000000..16f9b1aaa --- /dev/null +++ b/docs/source/reference/strategies.rst @@ -0,0 +1,11 @@ +.. _api-strategies: + +Data Synthesis Strategies +========================= + +.. autosummary:: + :toctree: generated + :template: strategies_module.rst + :nosignatures: + + pandera.strategies diff --git a/environment.yml b/environment.yml index d15718413..b9edb80a6 100644 --- a/environment.yml +++ b/environment.yml @@ -29,13 +29,13 @@ dependencies: - pytest - pytest-cov - pytest-xdist + - xdoctest - setuptools >= 52.0.0 - - nox = 2020.12.31 # pinning due to UnicodeDecodeError, see https://github.com/pandera-dev/pandera/pull/504/checks?check_run_id=2841360122 + - nox = 2020.12.31 # pinning due to UnicodeDecodeError, see https://github.com/pandera-dev/pandera/pull/504/checks?check_run_id=2841360122 - importlib_metadata # required if python < 3.8 # documentation - - sphinx = 3.5.4 # pinned due to doc-building error https://github.com/pandera-dev/pandera/runs/2601459267 - - sphinx_rtd_theme + - sphinx - sphinx-autodoc-typehints - sphinx-copybutton - recommonmark @@ -50,7 +50,7 @@ dependencies: - pre_commit - pip: - - furo + - furo==2021.6.18b36 - types-click - types-pyyaml - types-pkg_resources diff --git a/noxfile.py b/noxfile.py index bcd132984..f2efaa02d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -22,6 +22,7 @@ "mypy", "tests", "docs", + "doctests", ) DEFAULT_PYTHON = "3.8" @@ -172,11 +173,12 @@ def install_extras( force_pip: bool = False, ) -> None: """Install dependencies.""" - specs = [ - spec if spec != "pandas" else "pandas" - for spec in REQUIRES[extra].values() - if spec not in ALWAYS_USE_PIP - ] + specs, pip_specs = [], [] + for spec in REQUIRES[extra].values(): + if spec.split("==")[0] in ALWAYS_USE_PIP: + pip_specs.append(spec) + else: + specs.append(spec if spec != "pandas" else "pandas") if extra == "core": specs.append(REQUIRES["all"]["hypothesis"]) @@ -190,7 +192,7 @@ def install_extras( print("using pip installer") session.install(*specs) - session.install(*ALWAYS_USE_PIP) + session.install(*pip_specs) # always use pip for these packages session.install("-e", ".", "--no-deps") # install pandera @@ -294,10 +296,7 @@ def mypy(session: Session) -> None: @nox.parametrize("extra", EXTRA_NAMES) def tests(session: Session, extra: str) -> None: """Run the test suite.""" - install_extras( - session, - extra, - ) + install_extras(session, extra) if session.posargs: args = session.posargs @@ -324,6 +323,13 @@ def tests(session: Session, extra: str) -> None: session.run("pytest", *args) +@nox.session(python=PYTHON_VERSIONS) +def doctests(session: Session) -> None: + """Build the documentation.""" + install_extras(session, extra="all", force_pip=True) + session.run("xdoctest", PACKAGE, "--quiet") + + @nox.session(python=PYTHON_VERSIONS) def docs(session: Session) -> None: """Build the documentation.""" @@ -332,18 +338,22 @@ def docs(session: Session) -> None: # build html docs if not CI_RUN and not session.posargs: - shutil.rmtree(os.path.join("_build"), ignore_errors=True) - shutil.rmtree(os.path.join("generated"), ignore_errors=True) - session.run( - "sphinx-build", - "-W", - "-T", - "-b=html", - "-d", - os.path.join("_build", "doctrees", ""), - "source", - os.path.join("_build", "html", ""), + shutil.rmtree("_build", ignore_errors=True) + shutil.rmtree( + os.path.join("source", "reference", "generated"), + ignore_errors=True, ) + for builder in ["doctest", "html"]: + session.run( + "sphinx-build", + "-W", + "-T", + f"-b={builder}", + "-d", + os.path.join("_build", "doctrees", ""), + "source", + os.path.join("_build", builder, ""), + ) else: shutil.rmtree(os.path.join("_build"), ignore_errors=True) args = session.posargs or [ diff --git a/pandera/checks.py b/pandera/checks.py index 0345dc39b..ce67eaaa1 100644 --- a/pandera/checks.py +++ b/pandera/checks.py @@ -464,13 +464,13 @@ def __eq__(self, other): are_strategy_fn_objects_equal = True are_all_other_check_attributes_equal = { - i: self.__dict__[i] - for i in self.__dict__ - if i not in ["_check_fn", "strategy"] + k: v + for k, v in self.__dict__.items() + if k not in ["_check_fn", "strategy"] } == { - i: other.__dict__[i] - for i in other.__dict__ - if i not in ["_check_fn", "strategy"] + k: v + for k, v in other.__dict__.items() + if k not in ["_check_fn", "strategy"] } return ( diff --git a/pandera/dtypes.py b/pandera/dtypes.py index f0ccde032..53ced27f4 100644 --- a/pandera/dtypes.py +++ b/pandera/dtypes.py @@ -18,7 +18,8 @@ class DataType(ABC): """Base class of all Pandera data types.""" - continuous: bool = False + continuous: Optional[bool] = None + """Whether the number data type is continuous.""" def __init__(self): if self.__class__ is DataType: @@ -27,15 +28,16 @@ def __init__(self): ) def coerce(self, data_container: Any): - """Coerce data container to the dtype.""" + """Coerce data container to the data type.""" raise NotImplementedError() def __call__(self, data_container: Any): - """Coerce data container to the dtype.""" + """Coerce data container to the data type.""" return self.coerce(data_container) def check(self, pandera_dtype: "DataType") -> bool: - """Check that pandera :class:`DataType`s are equivalent.""" + """Check that pandera :class:`~pandera.dtypes.DataType` are + equivalent.""" return self == pandera_dtype def __repr__(self) -> str: @@ -64,7 +66,7 @@ def immutable( :param dtype: :class:`DataType` to decorate. :param dataclass_kwargs: Keywords arguments forwarded to :func:`dataclasses.dataclass`. - :returns: Immutable :class:`~pandera.dtypes.DataType` + :returns: Immutable :class:`DataType` """ kwargs = {"frozen": True, "init": False, "repr": False} kwargs.update(dataclass_kwargs) @@ -98,6 +100,7 @@ class _Number(DataType): """Semantic representation of a numeric data type.""" exact: Optional[bool] = None + """Whether the data type is an exact representation of a number.""" def check(self, pandera_dtype: "DataType") -> bool: if self.__class__ is _Number: @@ -109,6 +112,7 @@ def check(self, pandera_dtype: "DataType") -> bool: class _PhysicalNumber(_Number): bit_width: Optional[int] = None + """Number of bits used by the machine representation.""" _base_name: Optional[str] = dataclasses.field( default=None, init=False, repr=False ) @@ -135,7 +139,6 @@ def __str__(self) -> str: return "bool" -Boolean = Bool ############################################################################### # signed integer ############################################################################### @@ -149,6 +152,7 @@ class Int(_PhysicalNumber): # type: ignore exact = True bit_width = 64 signed: bool = dataclasses.field(default=True, init=False) + """Whether the integer data type is signed.""" def check(self, pandera_dtype: DataType) -> bool: return ( diff --git a/pandera/engines/engine.py b/pandera/engines/engine.py index bf01e0473..e8a4e3b00 100644 --- a/pandera/engines/engine.py +++ b/pandera/engines/engine.py @@ -3,7 +3,6 @@ # pylint:disable=no-value-for-parameter import functools import inspect -import warnings from abc import ABCMeta from dataclasses import dataclass from typing import ( @@ -12,11 +11,11 @@ Callable, Dict, List, + Optional, Set, Tuple, Type, TypeVar, - Union, get_type_hints, ) @@ -118,9 +117,7 @@ def _method(*args, **kwargs): cls._registry[cls].dispatch.register(source_dtype, _method) def _register_equivalents( - cls, - pandera_dtype_cls: Type[DataType], - *source_dtypes: Any, + cls, pandera_dtype_cls: Type[DataType], *source_dtypes: Any ) -> None: pandera_dtype = pandera_dtype_cls() # type: ignore for source_dtype in source_dtypes: @@ -129,21 +126,40 @@ def _register_equivalents( def register_dtype( cls: _EngineType, - pandera_dtype_cls: Type[DataType] = None, + pandera_dtype_cls: Type[_DataType] = None, *, - equivalents: List[Any] = None, - ): - """Register a Pandera :class:`DataType`. + equivalents: Optional[List[Any]] = None, + ) -> Callable: + """Register a Pandera :class:`~pandera.dtypes.DataType` with the engine, + as class decorator. :param pandera_dtype: The DataType to register. - :param equivalents: Equivalent scalar data type class or - non-parametrized data type instance. + :param equivalents: Equivalent scalar data type classes or + non-parametrized data type instances. .. note:: The classmethod ``from_parametrized_dtype`` will also be registered. + See :ref:`here` for more usage details. + + :example: + + >>> import pandera as pa + >>> + >>> class MyDataType(pa.DataType): + ... pass + >>> + >>> class MyEngine( + ... metaclass=pa.engines.engine.Engine, base_pandera_dtypes=MyDataType + ... ): + ... pass + >>> + >>> @MyEngine.register_dtype(equivalents=[bool]) + ... class MyBool(MyDataType): + ... pass + """ - def _wrapper(pandera_dtype_cls: Union[DataType, Type[DataType]]): + def _wrapper(pandera_dtype_cls: Type[_DataType]) -> Type[_DataType]: if not inspect.isclass(pandera_dtype_cls): raise ValueError( f"{cls.__name__}.register_dtype can only decorate a class, " @@ -155,11 +171,6 @@ def _wrapper(pandera_dtype_cls: Union[DataType, Type[DataType]]): if "from_parametrized_dtype" in pandera_dtype_cls.__dict__: cls._register_from_parametrized_dtype(pandera_dtype_cls) - elif not equivalents: - warnings.warn( - f"register_dtype({pandera_dtype_cls}) on a class without a " - + "'from_parametrized_dtype' classmethod has no effect." - ) cls._registered_dtypes.add(pandera_dtype_cls) return pandera_dtype_cls @@ -199,7 +210,9 @@ def dtype(cls: _EngineType, data_type: Any) -> _DataType: f"Data type '{data_type}' not understood by {cls.__name__}." ) from None - def get_registered_dtypes(cls) -> List[Type[DataType]]: - """Return :class:`pandera.dtypes.DataType`s registered + def get_registered_dtypes( # pylint:disable=W1401 + cls, + ) -> List[Type[DataType]]: + """Return the :class:`pandera.dtypes.DataType`\s registered with this engine.""" return list(cls._registered_dtypes) diff --git a/pandera/engines/numpy_engine.py b/pandera/engines/numpy_engine.py index c5fd62ff2..1a6982895 100644 --- a/pandera/engines/numpy_engine.py +++ b/pandera/engines/numpy_engine.py @@ -25,6 +25,7 @@ class DataType(dtypes.DataType): type: np.dtype = dataclasses.field( default=np.dtype("object"), repr=False, init=False ) + """Native numpy dtype boxed by the data type.""" def __init__(self, dtype: Any): super().__init__() @@ -58,7 +59,7 @@ class Engine( # pylint:disable=too-few-public-methods @classmethod def dtype(cls, data_type: Any) -> dtypes.DataType: """Convert input into a numpy-compatible - Pandera :class:`DataType` object.""" + Pandera :class:`~pandera.dtypes.DataType` object.""" try: return engine.Engine.dtype(cls, data_type) except TypeError: @@ -324,6 +325,8 @@ def check(self, pandera_dtype: "dtypes.DataType") -> bool: @Engine.register_dtype(equivalents=["object", "O", object, np.object_]) @immutable class Object(DataType): + """Semantic representation of a :class:`numpy.object_`.""" + type = np.dtype("object") diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index 44a8f2fb2..55b9a400d 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -41,6 +41,7 @@ class DataType(dtypes.DataType): """Base `DataType` for boxing Pandas data types.""" type: Any = dataclasses.field(repr=False, init=False) + """Native pandas dtype boxed by the data type.""" def __init__(self, dtype: Any): super().__init__() @@ -64,7 +65,14 @@ def check(self, pandera_dtype: dtypes.DataType) -> bool: pandera_dtype = Engine.dtype(pandera_dtype) except TypeError: return False - return super().check(pandera_dtype) + + # attempts to compare pandas native type if possible + # to let subclass inherit check + # (super will compare that DataType classes are exactly the same) + try: + return self.type == pandera_dtype.type + except TypeError: + return super().check(pandera_dtype) def __str__(self) -> str: return str(self.type) @@ -82,7 +90,7 @@ class Engine( # pylint:disable=too-few-public-methods @classmethod def dtype(cls, data_type: Any) -> "DataType": """Convert input into a pandas-compatible - Pandera :class:`DataType` object.""" + Pandera :class:`~pandera.dtypes.DataType` object.""" try: return engine.Engine.dtype(cls, data_type) except TypeError: @@ -113,7 +121,8 @@ def dtype(cls, data_type: Any) -> "DataType": @classmethod def numpy_dtype(cls, pandera_dtype: dtypes.DataType) -> np.dtype: - """Convert a pandera data type to a numpy data type.""" + """Convert a Pandera :class:`~pandera.dtypes.DataType + to a :class:`numpy.dtype`.""" pandera_dtype = engine.Engine.dtype(cls, pandera_dtype) alias = str(pandera_dtype).lower() @@ -139,11 +148,11 @@ def numpy_dtype(cls, pandera_dtype: dtypes.DataType) -> np.dtype: equivalents=["boolean", pd.BooleanDtype, pd.BooleanDtype()], ) @immutable -class Bool(DataType, dtypes.Bool): - type = pd.BooleanDtype() +class BOOL(DataType, dtypes.Bool): + """Semantic representation of a :class:`pandas.BooleanDtype`.""" + type = pd.BooleanDtype() -BOOL = Bool ############################################################################### # number @@ -197,7 +206,6 @@ def _register_numpy_numbers( equivalents.add("integer") numpy_data_type = getattr(numpy_engine, f"{pandera_name}{bit_width}") - print(f"EQUIVALENTS FOR {numpy_data_type}: {list(equivalents)}") Engine.register_dtype(numpy_data_type, equivalents=list(equivalents)) @@ -214,43 +222,40 @@ def _register_numpy_numbers( @Engine.register_dtype(equivalents=[pd.Int64Dtype, pd.Int64Dtype()]) @immutable -class Int64(DataType, dtypes.Int): +class INT64(DataType, dtypes.Int): + """Semantic representation of a :class:`pandas.Int64Dtype`.""" + type = pd.Int64Dtype() bit_width: int = 64 -INT64 = Int64 - - @Engine.register_dtype(equivalents=[pd.Int32Dtype, pd.Int32Dtype()]) @immutable -class Int32(Int64): +class INT32(INT64): + """Semantic representation of a :class:`pandas.Int32Dtype`.""" + type = pd.Int32Dtype() bit_width: int = 32 -INT32 = Int32 - - @Engine.register_dtype(equivalents=[pd.Int16Dtype, pd.Int16Dtype()]) @immutable -class Int16(Int32): +class INT16(INT32): + """Semantic representation of a :class:`pandas.Int16Dtype`.""" + type = pd.Int16Dtype() bit_width: int = 16 -INT16 = Int16 - - @Engine.register_dtype(equivalents=[pd.Int8Dtype, pd.Int8Dtype()]) @immutable -class Int8(Int16): +class INT8(INT16): + """Semantic representation of a :class:`pandas.Int8Dtype`.""" + type = pd.Int8Dtype() bit_width: int = 8 -INT8 = Int8 - ############################################################################### # unsigned integer ############################################################################### @@ -264,37 +269,40 @@ class Int8(Int16): @Engine.register_dtype(equivalents=[pd.UInt64Dtype, pd.UInt64Dtype()]) @immutable -class UInt64(DataType, dtypes.UInt): +class UINT64(DataType, dtypes.UInt): + """Semantic representation of a :class:`pandas.UInt64Dtype`.""" + type = pd.UInt64Dtype() bit_width: int = 64 @Engine.register_dtype(equivalents=[pd.UInt32Dtype, pd.UInt32Dtype()]) @immutable -class UInt32(UInt64): +class UINT32(UINT64): + """Semantic representation of a :class:`pandas.UInt32Dtype`.""" + type = pd.UInt32Dtype() bit_width: int = 32 @Engine.register_dtype(equivalents=[pd.UInt16Dtype, pd.UInt16Dtype()]) @immutable -class UInt16(UInt32): +class UINT16(UINT32): + """Semantic representation of a :class:`pandas.UInt16Dtype`.""" + type = pd.UInt16Dtype() bit_width: int = 16 @Engine.register_dtype(equivalents=[pd.UInt8Dtype, pd.UInt8Dtype()]) @immutable -class UInt8(UInt16): +class UINT8(UINT16): + """Semantic representation of a :class:`pandas.UInt8Dtype`.""" + type = pd.UInt8Dtype() bit_width: int = 8 -UINT64 = UInt64 -UINT32 = UInt32 -UINT16 = UInt16 -UINT8 = UInt8 - # ############################################################################### # # float # ############################################################################### @@ -330,6 +338,8 @@ class UInt8(UInt16): ) @immutable(init=True) class Category(DataType, dtypes.Category): + """Semantic representation of a :class:`pandas.CategoricalDtype`.""" + type: pd.CategoricalDtype = dataclasses.field(default=None, init=False) def __init__( # pylint:disable=super-init-not-called @@ -349,7 +359,7 @@ def from_parametrized_dtype( cls, cat: Union[dtypes.Category, pd.CategoricalDtype] ): """Convert a categorical to - a Pandera :class:`~pandera.dtypes.pandas_engine.Category`.""" + a Pandera :class:`pandera.dtypes.pandas_engine.Category`.""" return cls( # type: ignore categories=cat.categories, ordered=cat.ordered ) @@ -359,11 +369,10 @@ def from_parametrized_dtype( equivalents=["string", pd.StringDtype, pd.StringDtype()] ) @immutable -class String(DataType, dtypes.String): - type = pd.StringDtype() - +class STRING(DataType, dtypes.String): + """Semantic representation of a :class:`pandas.StringDtype`.""" -STRING = String + type = pd.StringDtype() @Engine.register_dtype( @@ -454,7 +463,7 @@ def _to_datetime(col: pd.Series) -> pd.Series: @classmethod def from_parametrized_dtype(cls, pd_dtype: pd.DatetimeTZDtype): """Convert a :class:`pandas.DatetimeTZDtype` to - a Pandera :class:`~pandera.engines.pandas_engine.DateTime`.""" + a Pandera :class:`pandera.engines.pandas_engine.DateTime`.""" return cls(unit=pd_dtype.unit, tz=pd_dtype.tz) # type: ignore def __str__(self) -> str: @@ -491,7 +500,7 @@ def __post_init__(self): @classmethod def from_parametrized_dtype(cls, pd_dtype: pd.PeriodDtype): """Convert a :class:`pandas.PeriodDtype` to - a Pandera :class:`~pandera.engines.pandas_engine.Period`.""" + a Pandera :class:`pandera.engines.pandas_engine.Period`.""" return cls(freq=pd_dtype.freq) # type: ignore @@ -519,7 +528,7 @@ def __post_init__(self): @classmethod def from_parametrized_dtype(cls, pd_dtype: pd.SparseDtype): """Convert a :class:`pandas.SparseDtype` to - a Pandera :class:`~pandera.engines.pandas_engine.Sparse`.""" + a Pandera :class:`pandera.engines.pandas_engine.Sparse`.""" return cls( # type: ignore dtype=pd_dtype.subtype, fill_value=pd_dtype.fill_value ) @@ -541,10 +550,5 @@ def __post_init__(self): @classmethod def from_parametrized_dtype(cls, pd_dtype: pd.IntervalDtype): """Convert a :class:`pandas.IntervalDtype` to - a Pandera :class:`~pandera.engines.pandas_engine.Interval`.""" + a Pandera :class:`pandera.engines.pandas_engine.Interval`.""" return cls(subtype=pd_dtype.subtype) # type: ignore - - -print("PANDAS ENGINE EQUIVALENTS") -for k, v in engine.Engine._registry[Engine].equivalents.items(): - print(f"{k}: equivalents={v}") diff --git a/pandera/schemas.py b/pandera/schemas.py index d8f205345..908716ab8 100644 --- a/pandera/schemas.py +++ b/pandera/schemas.py @@ -232,9 +232,11 @@ def _set_column_handler(column, column_name): @property def dtypes(self) -> Dict[str, DataType]: + # pylint:disable=anomalous-backslash-in-string """ - A pandas style dtypes dict where the keys are column names and values - are pandas dtype for the column. Excludes columns where regex=True. + A dict where the keys are column names and values are + :class:`~pandera.dtypes.DataType`\s for the column. Excludes columns + where `regex=True`. :returns: dictionary of columns and their associated dtypes. """ diff --git a/requirements-dev.txt b/requirements-dev.txt index c95ac87a3..af2ed6b92 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -20,18 +20,18 @@ pylint >= 2.7.2 pytest pytest-cov pytest-xdist +xdoctest setuptools >= 52.0.0 nox == 2020.12.31 importlib_metadata -sphinx == 3.5.4 -sphinx_rtd_theme +sphinx sphinx-autodoc-typehints sphinx-copybutton recommonmark twine asv pre_commit -furo +furo==2021.6.18b36 types-click types-pyyaml types-pkg_resources \ No newline at end of file diff --git a/tests/core/checks_fixtures.py b/tests/core/checks_fixtures.py index baa99e81e..08b11dbd2 100644 --- a/tests/core/checks_fixtures.py +++ b/tests/core/checks_fixtures.py @@ -1,5 +1,5 @@ """Pytest fixtures for testing custom checks.""" -import unittest.mock as mock +from unittest import mock import pandas as pd import pytest diff --git a/tests/core/test_dtypes.py b/tests/core/test_dtypes.py index 48e303c10..a509bdf58 100644 --- a/tests/core/test_dtypes.py +++ b/tests/core/test_dtypes.py @@ -39,10 +39,10 @@ nullable_int_dtypes = { - pandas_engine.Int8: "Int8", - pandas_engine.Int16: "Int16", - pandas_engine.Int32: "Int32", - pandas_engine.Int64: "Int64", + pandas_engine.INT8: "Int8", + pandas_engine.INT16: "Int16", + pandas_engine.INT32: "Int32", + pandas_engine.INT64: "Int64", } uint_dtypes = { @@ -58,10 +58,10 @@ } nullable_uint_dtypes = { - pandas_engine.UInt8: "UInt8", - pandas_engine.UInt16: "UInt16", - pandas_engine.UInt32: "UInt32", - pandas_engine.UInt64: "UInt64", + pandas_engine.UINT8: "UInt8", + pandas_engine.UINT16: "UInt16", + pandas_engine.UINT32: "UInt32", + pandas_engine.UINT64: "UInt64", } float_dtypes = { diff --git a/tests/core/test_engine.py b/tests/core/test_engine.py index a4b8bfefb..6e38121c0 100644 --- a/tests/core/test_engine.py +++ b/tests/core/test_engine.py @@ -41,14 +41,6 @@ class FakeEngine( # pylint:disable=too-few-public-methods del FakeEngine -def test_register_bare_dtype(engine: Engine): - """Test that a dtype without equivalents nor 'from_parametrized_dtype' - classmethod can be registered. - """ - with pytest.warns(UserWarning): - engine.register_dtype(SimpleDtype) - - def test_register_equivalents(engine: Engine, equivalents: List[Any]): """Test that a dtype with equivalents can be registered.""" engine.register_dtype(SimpleDtype, equivalents=equivalents) diff --git a/tests/core/test_schemas.py b/tests/core/test_schemas.py index cd3988681..f6db2e819 100644 --- a/tests/core/test_schemas.py +++ b/tests/core/test_schemas.py @@ -88,7 +88,6 @@ def test_dataframe_schema(): # checks if 'a' is converted to float, while schema says int, will a schema # error be thrown with pytest.raises(errors.SchemaError): - df.assign(a=[1.7, 2.3, 3.1]).info() schema.validate(df.assign(a=[1.7, 2.3, 3.1])) @@ -191,10 +190,7 @@ def test_dataframe_dtype_coerce(): assert (df.dtypes == float_alias).all() # raises ValueError if _coerce_dtype is called when dtype is None - print("---") schema.dtype = None - print("----=-") - print(schema.dtype) with pytest.raises(ValueError): schema._coerce_dtype(df) @@ -316,7 +312,6 @@ def test_series_schema(): ) def f(series): - print(series) return series.isin(["foo", "bar", "baz"]) str_schema = SeriesSchema( @@ -1561,7 +1556,6 @@ def test_schema_str_repr(schema, fields): schema.__str__(), schema.__repr__(), ]: - print(x) assert x.startswith(f"") for field in fields: diff --git a/tests/io/test_io.py b/tests/io/test_io.py index 00da31761..f1b7507c5 100644 --- a/tests/io/test_io.py +++ b/tests/io/test_io.py @@ -2,8 +2,8 @@ import platform import tempfile -import unittest.mock as mock from pathlib import Path +from unittest import mock import pandas as pd import pytest diff --git a/tests/strategies/test_strategies.py b/tests/strategies/test_strategies.py index 850ed1292..6b68f6bb9 100644 --- a/tests/strategies/test_strategies.py +++ b/tests/strategies/test_strategies.py @@ -10,7 +10,7 @@ import pytest import pandera as pa -import pandera.strategies as strategies +from pandera import strategies from pandera.checks import _CheckBase, register_check_statistics from pandera.dtypes import is_category, is_complex, is_float from pandera.engines import pandas_engine