From eb223cac0fefa3852a9a0b0a5ce1b9d1bf1a6ca8 Mon Sep 17 00:00:00 2001
From: cosmicBboy <niels.bantilan@gmail.com>
Date: Mon, 8 Nov 2021 23:39:19 -0500
Subject: [PATCH 1/3] implement dataframe types

- added submodules in pandera.typing module for dask, modin, koalas
- new documentation for mypy integration, other dataframe library support
- update copy on existing documentation - expand scope
---
 README.md                               |  26 +++--
 docs/source/conf.py                     |   5 +-
 docs/source/dask.rst                    | 134 +++++++++++++++++++++
 docs/source/dataframe_schemas.rst       |   4 +
 docs/source/dtypes.rst                  |   4 +-
 docs/source/{scaling.rst => fugue.rst}  |  39 ++++---
 docs/source/index.rst                   |  34 ++++--
 docs/source/integrations.rst            |  78 ++++++++++++-
 docs/source/koalas.rst                  | 111 ++++++++++++++++++
 docs/source/modin.rst                   | 113 ++++++++++++++++++
 docs/source/schema_models.rst           |  64 ++++++++++-
 docs/source/supported_libraries.rst     |  50 ++++++++
 docs/source/third_party_schema.rst      |   4 +-
 pandera/external_config.py              |  10 +-
 pandera/io.py                           |   1 -
 pandera/model.py                        |  38 +++---
 pandera/typing/__init__.py              |  58 ++++++++++
 pandera/{typing.py => typing/common.py} | 147 +++++-------------------
 pandera/typing/dask.py                  |  67 +++++++++++
 pandera/typing/koalas.py                |  54 +++++++++
 pandera/typing/modin.py                 |  46 ++++++++
 pandera/typing/pandas.py                | 101 ++++++++++++++++
 tests/core/static/pandas_dataframe.py   |  44 +++++--
 tests/core/test_model.py                |  12 +-
 tests/core/test_static_type_checking.py |  15 +--
 tests/core/test_typing.py               |  32 +++++-
 tests/dask/test_dask.py                 |  51 +++++++-
 tests/koalas/test_schemas_on_koalas.py  |  53 +++++++--
 tests/modin/test_schemas_on_modin.py    |  51 ++++++--
 29 files changed, 1220 insertions(+), 226 deletions(-)
 create mode 100644 docs/source/dask.rst
 rename docs/source/{scaling.rst => fugue.rst} (91%)
 create mode 100644 docs/source/koalas.rst
 create mode 100644 docs/source/modin.rst
 create mode 100644 docs/source/supported_libraries.rst
 create mode 100644 pandera/typing/__init__.py
 rename pandera/{typing.py => typing/common.py} (59%)
 create mode 100644 pandera/typing/dask.py
 create mode 100644 pandera/typing/koalas.py
 create mode 100644 pandera/typing/modin.py
 create mode 100644 pandera/typing/pandas.py
diff --git a/README.md b/README.md
index 1e5b3cd57..f86c68ce0 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 
 <hr>
 
-*A data validation library for scientists, engineers, and analysts seeking
+*A dataframe validation library for scientists, engineers, and analysts seeking
 correctness.*
 
 <br>
@@ -22,10 +22,18 @@ correctness.*
 [![Downloads](https://pepy.tech/badge/pandera/month)](https://pepy.tech/project/pandera)
 [![Downloads](https://pepy.tech/badge/pandera)](https://pepy.tech/project/pandera)
 
-`pandas` data structures contain information that `pandera` explicitly
-validates at runtime. This is useful in production-critical or reproducible
-research settings. With `pandera`, you can:
+`pandera` provides a flexible and expressive API for performing data
+validation on dataframes to make data processing pipelines more readable and
+robust.
 
+Dataframes contain information that `pandera` explicitly validates at runtime.
+This is useful in production-critical or reproducible research settings. With
+`pandera`, you can:
+
+1. Define a schema once and use it to validate
+   [different dataframe types](https://pandera.readthedocs.io/en/stable/supported_libraries.html)
+   including [pandas](http://pandas.pydata.org), [dask](https://dask.org),
+   [modin](https://modin.readthedocs.io/), and [koalas](https://koalas.readthedocs.io).
 1. [Check](https://pandera.readthedocs.io/en/stable/checks.html) the types and
    properties of columns in a `DataFrame` or values in a `Series`.
 1. Perform more complex statistical validation like
@@ -37,11 +45,11 @@ research settings. With `pandera`, you can:
    with pydantic-style syntax and validate dataframes using the typing syntax.
 1. [Synthesize data](https://pandera.readthedocs.io/en/stable/data_synthesis_strategies.html#data-synthesis-strategies)
    from schema objects for property-based testing with pandas data structures.
-
-`pandera` provides a flexible and expressive API for performing data validation
-on tidy (long-form) and wide data to make data processing pipelines more
-readable and robust.
-
+1. [Lazily Validate](https://pandera.readthedocs.io/en/stable/lazy_validation.html)
+   dataframes so that all validation checks are executed before raising an error.
+1. [Integrate](https://pandera.readthedocs.io/en/stable/integrations.html) with
+   a rich ecosystem of python tools like [pydantic](https://pydantic-docs.helpmanual.io)
+   and [mypy](http://mypy-lang.org/).
 
 ## Documentation
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 9abb7ebce..77c9b06f3 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -175,7 +175,10 @@
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
     "numpy": ("https://docs.scipy.org/doc/numpy/", None),
-    "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+    "dask": ("https://docs.dask.org/en/latest/", None),
+    "koalas": ("https://koalas.readthedocs.io/en/latest/", None),
+    "modin": ("https://modin.readthedocs.io/en/latest/", None),
 }
 
 # strip prompts
diff --git a/docs/source/dask.rst b/docs/source/dask.rst
new file mode 100644
index 000000000..93676a255
--- /dev/null
+++ b/docs/source/dask.rst
@@ -0,0 +1,134 @@
+.. currentmodule:: pandera
+
+.. _scaling_dask:
+
+Data Validation with Dask
+=========================
+
+*new in 0.8.0*
+
+`Dask <https://docs.dask.org/en/latest/dataframe.html>`__ is a distributed
+compute framework that offers a pandas-like dataframe API.
+You can use pandera to validate :py:func:`~dask.dataframe.DataFrame`
+and :py:func:`~dask.dataframe.Series` objects directly. First, install
+``pandera`` with the ``dask`` extra:
+
+.. code:: bash
+
+   pip install pandera[dask]
+
+
+Then you can use pandera schemas to validate dask dataframes. In the example
+below we'll use the :ref:`class-based API <schema_models>` to define a
+:py:class:`SchemaModel` for validation.
+
+.. testcode:: scaling_dask
+
+    import dask.dataframe as dd
+    import pandas as pd
+    import pandera as pa
+
+    from pandera.typing.dask import DataFrame, Series
+
+
+    class Schema(pa.SchemaModel):
+        state: Series[str]
+        city: Series[str]
+        price: Series[int] = pa.Field(in_range={"min_value": 5, "max_value": 20})
+
+
+    ddf = dd.from_pandas(
+        pd.DataFrame(
+            {
+                'state': ['FL','FL','FL','CA','CA','CA'],
+                'city': [
+                    'Orlando',
+                    'Miami',
+                    'Tampa',
+                    'San Francisco',
+                    'Los Angeles',
+                    'San Diego',
+                ],
+                'price': [8, 12, 10, 16, 20, 18],
+            }
+        ),
+        npartitions=2
+    )
+    pandera_ddf = Schema(ddf)
+
+    print(pandera_ddf)
+
+
+.. testoutput:: scaling_dask
+
+    Dask DataFrame Structure:
+                    state    city  price
+    npartitions=2
+    0              object  object  int64
+    3                 ...     ...    ...
+    5                 ...     ...    ...
+    Dask Name: validate, 4 tasks
+
+
+As you can see, passing the dask dataframe into ``Schema`` will produce
+another dask dataframe which hasn't been evaluated yet. What this means is
+that pandera will only validate when the dask graph is evaluated.
+
+.. testcode:: scaling_dask
+
+    print(pandera_ddf.compute())
+
+
+.. testoutput:: scaling_dask
+
+      state           city  price
+    0    FL        Orlando      8
+    1    FL          Miami     12
+    2    FL          Tampa     10
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
+
+
+You can also use the :py:func:`~pandera.check_types` decorator to validate
+dask dataframes at runtime:
+
+.. testcode:: scaling_dask
+
+    @pa.check_types
+    def function(ddf: DataFrame[Schema]) -> DataFrame[Schema]:
+        return ddf[ddf["state"] == "CA"]
+
+    print(function(ddf).compute())
+
+
+.. testoutput:: scaling_dask
+
+      state           city  price
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
+
+
+And of course, you can use the object-based API to validate dask dataframes:
+
+
+.. testcode:: scaling_dask
+
+    schema = pa.DataFrameSchema({
+        "state": pa.Column(str),
+        "city": pa.Column(str),
+        "price": pa.Column(int, pa.Check.in_range(min_value=5, max_value=20))
+    })
+    print(schema(ddf).compute())
+
+
+.. testoutput:: scaling_dask
+
+      state           city  price
+    0    FL        Orlando      8
+    1    FL          Miami     12
+    2    FL          Tampa     10
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
diff --git a/docs/source/dataframe_schemas.rst b/docs/source/dataframe_schemas.rst
index 9fcda7818..61462a2e9 100644
--- a/docs/source/dataframe_schemas.rst
+++ b/docs/source/dataframe_schemas.rst
@@ -39,6 +39,10 @@ The :class:`~pandera.schemas.DataFrameSchema` object consists of |column|_\s and
         coerce=True,
     )
 
+You can refer to :ref:`schema_models` to see how to define dataframe schemas
+using the alternative pydantic/dataclass-style syntax.
+
+
 .. _column:
 
 Column Validation
diff --git a/docs/source/dtypes.rst b/docs/source/dtypes.rst
index b687d0d19..148caa42b 100644
--- a/docs/source/dtypes.rst
+++ b/docs/source/dtypes.rst
@@ -4,8 +4,8 @@
 
 .. _dtypes:
 
-Pandera Data Types (new)
-========================
+Pandera Data Types
+==================
 
 *new in 0.7.0*
 
diff --git a/docs/source/scaling.rst b/docs/source/fugue.rst
similarity index 91%
rename from docs/source/scaling.rst
rename to docs/source/fugue.rst
index 221424b01..5abb3d175 100644
--- a/docs/source/scaling.rst
+++ b/docs/source/fugue.rst
@@ -1,9 +1,9 @@
 .. currentmodule:: pandera
 
-.. _scaling:
+.. _scaling_fugue:
 
-Scaling Pandera to Big Data
-=================================
+Data Validation with Fugue
+==========================
 
 Validation on big data comes in two forms. The first is performing one set of
 validations on data that doesn't fit in memory. The second happens when a large dataset
@@ -17,8 +17,8 @@ code can be used on top of ``Spark`` or ``Dask`` engines with
 to be performed in a distributed setting. ``Fugue`` is an open source abstraction layer that
 ports ``Python``, ``pandas``, and ``SQL`` code to ``Spark`` and ``Dask``.
 
-Fugue
------
+What is Fugue?
+--------------
 
 ``Fugue`` serves as an interface to distributed computing. Because of its non-invasive design,
 existing ``Python`` code can be scaled to a distributed setting without significant changes.
@@ -40,17 +40,22 @@ In this example, a pandas ``DataFrame`` is created with ``state``, ``city`` and
 columns. ``Pandera`` will be used to validate that the ``price`` column values are within
 a certain range.
 
-.. testcode:: scaling_pandera
+.. testcode:: scaling_fugue
 
     import pandas as pd
 
-    data = pd.DataFrame({'state': ['FL','FL','FL','CA','CA','CA'],
-                        'city': ['Orlando', 'Miami', 'Tampa',
-                                'San Francisco', 'Los Angeles', 'San Diego'],
-                        'price': [8, 12, 10, 16, 20, 18]})
+    data = pd.DataFrame(
+        {
+            'state': ['FL','FL','FL','CA','CA','CA'],
+            'city': [
+                'Orlando', 'Miami', 'Tampa', 'San Francisco', 'Los Angeles', 'San Diego'
+            ],
+            'price': [8, 12, 10, 16, 20, 18],
+        }
+    )
     print(data)
 
-.. testoutput:: scaling_pandera
+.. testoutput:: scaling_fugue
 
       state           city  price
     0    FL        Orlando      8
@@ -64,7 +69,7 @@ a certain range.
 Validation is then applied using pandera. A ``price_validation`` function is
 created that runs the validation. None of this will be new.
 
-.. testcode:: scaling_pandera
+.. testcode:: scaling_fugue
 
     from pandera import Column, DataFrameSchema, Check
 
@@ -85,7 +90,7 @@ to run the code on top of ``Spark``. ``Fugue`` also has a ``DaskExecutionEngine`
 the default pandas-based ``ExecutionEngine``. Because the ``SparkExecutionEngine`` is used, the result
 becomes a ``Spark DataFrame``.
 
-.. testcode:: scaling_pandera
+.. testcode:: scaling_fugue
     :skipif: SKIP_SCALING
 
     from fugue import transform
@@ -94,7 +99,7 @@ becomes a ``Spark DataFrame``.
     spark_df = transform(data, price_validation, schema="*", engine=SparkExecutionEngine)
     spark_df.show()
 
-.. testoutput:: scaling_pandera
+.. testoutput:: scaling_fugue
     :skipif: SKIP_SCALING
 
     +-----+-------------+-----+
@@ -118,7 +123,7 @@ price range for the records with ``state`` FL is lower than the range for the ``
 Two :class:`~pandera.schemas.DataFrameSchema` will be created to reflect this. Notice their ranges
 for the :class:`~pandera.checks.Check` differ.
 
-.. testcode:: scaling_pandera
+.. testcode:: scaling_fugue
 
     price_check_FL = DataFrameSchema({
         "price": Column(int, Check.in_range(min_value=7,max_value=13)),
@@ -139,7 +144,7 @@ To partition our data by ``state``, all we need to do is pass it into the ``tran
 through the ``partition`` argument. This splits up the data across different workers before they
 each run the ``price_validation`` function. Again, this is like a groupby-validation.
 
-.. testcode:: scaling_pandera
+.. testcode:: scaling_fugue
     :skipif: SKIP_SCALING
 
     def price_validation(df:pd.DataFrame) -> pd.DataFrame:
@@ -156,7 +161,7 @@ each run the ``price_validation`` function. Again, this is like a groupby-valida
 
     spark_df.show()
 
-.. testoutput:: scaling_pandera
+.. testoutput:: scaling_fugue
     :skipif: SKIP_SCALING
 
     SparkDataFrame
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 93afabd8e..c303439b5 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,9 +1,9 @@
 .. pandera documentation master file
 
-Statistical Data Validation for Pandas
-======================================
+A Statistical DataFrame Testing Toolkit
+=======================================
 
-*A data validation library for scientists, engineers, and analysts seeking
+*A dataframe validation library for scientists, engineers, and analysts seeking
 correctness.*
 
 
@@ -62,14 +62,16 @@ correctness.*
 |
 
 ``pandera`` provides a flexible and expressive API for performing data
-validation on tidy (long-form) and wide data to make data processing pipelines
-more readable and robust.
+validation on dataframes to make data processing pipelines more readable and
+robust.
 
-`pandas <http://pandas.pydata.org>`_ data structures contain information that
-``pandera`` explicitly validates at runtime. This is useful in
-production-critical data pipelines or reproducible research settings. With
-``pandera``, you can:
+Dataframes contain information that ``pandera`` explicitly validates at runtime.
+This is useful in production-critical data pipelines or reproducible research
+settings. With ``pandera``, you can:
 
+#. Define a schema once and use it to validate :ref:`different dataframe types <supported-dataframe-libraries>`
+   including `pandas <http://pandas.pydata.org>`_, `dask <https://dask.org/>`_,
+   `modin <https://modin.readthedocs.io/>`_, and `koalas <https://koalas.readthedocs.io/>`_.
 #. :ref:`Check<checks>` the types and properties of columns in a
    ``pd.DataFrame`` or values in a ``pd.Series``.
 #. Perform more complex statistical validation like
@@ -80,6 +82,11 @@ production-critical data pipelines or reproducible research settings. With
    pydantic-style syntax and validate dataframes using the typing syntax.
 #. :ref:`Synthesize data<data synthesis strategies>` from schema objects for
    property-based testing with pandas data structures.
+#. :ref:`Lazily Validate<lazy_validation>` dataframes so that all validation
+   rules are executed before raising an error.
+#. :ref:`Integrate <integrations>` with a rich ecosystem of python tools like
+   `pydantic <https://pydantic-docs.helpmanual.io/>`_ and
+   `mypy <http://mypy-lang.org/>`_.
 
 
 .. _installation:
@@ -101,6 +108,11 @@ Installing optional functionality:
     pip install pandera[hypotheses]  # hypothesis checks
     pip install pandera[io]          # yaml/script schema io utilities
     pip install pandera[strategies]  # data synthesis strategies
+    pip install pandera[dask]        # validate dask dataframes
+    pip install pandera[koalas]      # validate koalas dataframes
+    pip install pandera[modin]       # validate modin dataframes
+    pip install pandera[modin-ray]   # validate modin dataframes with ray
+    pip install pandera[modin-dask]  # validate modin dataframes with dask
     pip install pandera[all]         # all packages
 
 
@@ -303,18 +315,18 @@ Submit issues, feature requests or bugfixes on
    :hidden:
 
    dataframe_schemas
+   schema_models
    series_schemas
    checks
    hypothesis
    dtypes
    decorators
    schema_inference
-   schema_models
    lazy_validation
    data_synthesis_strategies
    extensions
    third_party_schema
-   scaling
+   supported_libraries
    integrations
 
 .. toctree::
diff --git a/docs/source/integrations.rst b/docs/source/integrations.rst
index 94dfe9d00..79c475f80 100644
--- a/docs/source/integrations.rst
+++ b/docs/source/integrations.rst
@@ -5,10 +5,11 @@
 Integrations
 ============
 
-
 Pydantic
 --------
 
+*new in 0.8.0*
+
 :class:`~pandera.model.SchemaModel` is fully compatible with
 `pydantic <https://pydantic-docs.helpmanual.io/>`_.
 
@@ -53,3 +54,78 @@ Other pandera components are also compatible with pydantic:
 - :class:`~pandera.schema_components.MultiIndex`
 - :class:`~pandera.schema_components.Column`
 - :class:`~pandera.schema_components.Index`
+
+
+Mypy
+----
+
+*new in 0.8.0*
+
+Pandera integrates with mypy out of the box to provide static type-linting of
+dataframes, relying on `pandas-stubs <https://github.com/VirtusLab/pandas-stubs>`__
+for typing information.
+
+.. ::
+
+   Mypy static type-linting is supported for only pandas dataframes.
+
+In the example below, we define a few schemas to see how type-linting with
+pandera works.
+
+.. literalinclude:: ../../tests/core/static/pandas_dataframe.py
+    :lines: 8-27
+
+The mypy linter will complain if the output type of the function body doesn't
+match the function's return signature.
+
+.. literalinclude:: ../../tests/core/static/pandas_dataframe.py
+    :lines: 30-43
+
+It'll also complain if the input type doesn't match the expected input type.
+Note that we're using the :py:class:`pandera.typing.pandas.DataFrame` generic
+type to define dataframes that are validated against the
+:py:class:`~pandera.model.SchemaModel` type variable on initialization.
+
+.. literalinclude:: ../../tests/core/static/pandas_dataframe.py
+    :lines: 47-60
+
+
+To make mypy happy with respect to the return type, you can either initialize
+a dataframe of the expected type:
+
+.. literalinclude:: ../../tests/core/static/pandas_dataframe.py
+    :lines: 63-64
+
+.. note::
+    If you use the approach above with the :py:func:`~pandera.check_types`
+    decorator, pandera will do its best to not to validate the dataframe twice
+    if it's already been initialized with the
+    ``DataFrame[Schema](**data)`` syntax.
+
+Or use :py:func:`typing.cast` to indicate to mypy that the return value of
+the function is of the correct type.
+
+.. literalinclude:: ../../tests/core/static/pandas_dataframe.py
+    :lines: 67-68
+
+
+Limitations
+^^^^^^^^^^^
+
+An important caveat to static type-linting with pandera dataframe types is that,
+since pandas dataframes are mutable objects, there's no way for ``mypy`` to
+know whether a mutated instance of a
+:py:class:`~pandera.model.SchemaModel`-typed dataframe has the correct
+contents. Fortunately, we can simply rely on the :py:func:`~pandera.check_types`
+decorator to verify that the output dataframe is valid.
+
+Consider the examples below:
+
+.. literalinclude:: ../../tests/core/static/pandas_dataframe.py
+    :lines: 63-72
+
+Even though the outputs of these functions are incorrect, mypy doesn't catch
+the error during static type-linting but pandera will raise a
+:py:class:`~pandera.errors.SchemaError` or :py:class:`~pandera.errors.SchemaErrors`
+exception at runtime, depending on whether you're doing
+:ref:`lazy validation<lazy_validation>` or not.
diff --git a/docs/source/koalas.rst b/docs/source/koalas.rst
new file mode 100644
index 000000000..5c9e6787b
--- /dev/null
+++ b/docs/source/koalas.rst
@@ -0,0 +1,111 @@
+.. currentmodule:: pandera
+
+.. _scaling_koalas:
+
+Data Validation with Koalas
+===========================
+
+*new in 0.8.0*
+
+`Koalas <https://koalas.readthedocs.io/en/latest/>`__ is a distributed
+compute framework that offers pandas drop-in replacement dataframe
+implementation. You can use pandera to validate :py:func:`~databricks.koalas.DataFrame`
+and :py:func:`~databricks.koalas.Series` objects directly. First, install
+``pandera`` with the ``dask`` extra:
+
+.. code:: bash
+
+   pip install pandera[koalas]
+
+
+Then you can use pandera schemas to validate koalas dataframes. In the example
+below we'll use the :ref:`class-based API <schema_models>` to define a
+:py:class:`SchemaModel` for validation.
+
+.. testcode:: scaling_koalas
+
+    import databricks.koalas as ks
+    import pandas as pd
+    import pandera as pa
+
+    from pandera.typing.koalas import DataFrame, Series
+
+
+    class Schema(pa.SchemaModel):
+        state: Series[str]
+        city: Series[str]
+        price: Series[int] = pa.Field(in_range={"min_value": 5, "max_value": 20})
+
+
+    # create a koalas dataframe that's validated on object initialization
+    df = DataFrame[Schema](
+        {
+            'state': ['FL','FL','FL','CA','CA','CA'],
+            'city': [
+                'Orlando',
+                'Miami',
+                'Tampa',
+                'San Francisco',
+                'Los Angeles',
+                'San Diego',
+            ],
+            'price': [8, 12, 10, 16, 20, 18],
+        }
+    )
+    print(df)
+
+
+.. testoutput:: scaling_koalas
+
+      state           city  price
+    0    FL        Orlando      8
+    1    FL          Miami     12
+    2    FL          Tampa     10
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
+
+
+You can also use the :py:func:`~pandera.check_types` decorator to validate
+koalas dataframes at runtime:
+
+
+.. testcode:: scaling_koalas
+
+    @pa.check_types
+    def function(df: DataFrame[Schema]) -> DataFrame[Schema]:
+        return df[df["state"] == "CA"]
+
+    print(function(df))
+
+
+.. testoutput:: scaling_koalas
+
+      state           city  price
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
+
+
+And of course, you can use the object-based API to validate dask dataframes:
+
+
+.. testcode:: scaling_koalas
+
+    schema = pa.DataFrameSchema({
+        "state": pa.Column(str),
+        "city": pa.Column(str),
+        "price": pa.Column(int, pa.Check.in_range(min_value=5, max_value=20))
+    })
+    print(schema(df))
+
+
+.. testoutput:: scaling_koalas
+
+      state           city  price
+    0    FL        Orlando      8
+    1    FL          Miami     12
+    2    FL          Tampa     10
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
diff --git a/docs/source/modin.rst b/docs/source/modin.rst
new file mode 100644
index 000000000..3d5b1bb1b
--- /dev/null
+++ b/docs/source/modin.rst
@@ -0,0 +1,113 @@
+.. currentmodule:: pandera
+
+.. _scaling_modin:
+
+Data Validation with Modin
+==========================
+
+*new in 0.8.0*
+
+`Modin <https://modin.readthedocs.io/en/latest/>`__ is a distributed
+compute framework that offers pandas drop-in replacement dataframe
+implementation. You can use pandera to validate :py:func:`~modin.pandas.DataFrame`
+and :py:func:`~modin.pandas.Series` objects directly. First, install
+``pandera`` with the ``dask`` extra:
+
+.. code:: bash
+
+   pip install pandera[modin]       # installs both ray and dask backends
+   pip install pandera[modin-ray]   # only ray backend
+   pip install pandera[modin-dask]  # only dask backend
+
+
+Then you can use pandera schemas to validate modin dataframes. In the example
+below we'll use the :ref:`class-based API <schema_models>` to define a
+:py:class:`SchemaModel` for validation.
+
+.. testcode:: scaling_modin
+
+    import modin.pandas as pd
+    import pandas as pd
+    import pandera as pa
+
+    from pandera.typing.modin import DataFrame, Series
+
+
+    class Schema(pa.SchemaModel):
+        state: Series[str]
+        city: Series[str]
+        price: Series[int] = pa.Field(in_range={"min_value": 5, "max_value": 20})
+
+
+    # create a modin dataframe that's validated on object initialization
+    df = DataFrame[Schema](
+        {
+            'state': ['FL','FL','FL','CA','CA','CA'],
+            'city': [
+                'Orlando',
+                'Miami',
+                'Tampa',
+                'San Francisco',
+                'Los Angeles',
+                'San Diego',
+            ],
+            'price': [8, 12, 10, 16, 20, 18],
+        }
+    )
+    print(df)
+
+
+.. testoutput:: scaling_modin
+
+      state           city  price
+    0    FL        Orlando      8
+    1    FL          Miami     12
+    2    FL          Tampa     10
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
+
+
+You can also use the :py:func:`~pandera.check_types` decorator to validate
+modin dataframes at runtime:
+
+
+.. testcode:: scaling_modin
+
+    @pa.check_types
+    def function(df: DataFrame[Schema]) -> DataFrame[Schema]:
+        return df[df["state"] == "CA"]
+
+    print(function(df))
+
+
+.. testoutput:: scaling_modin
+
+      state           city  price
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
+
+
+And of course, you can use the object-based API to validate dask dataframes:
+
+
+.. testcode:: scaling_modin
+
+    schema = pa.DataFrameSchema({
+        "state": pa.Column(str),
+        "city": pa.Column(str),
+        "price": pa.Column(int, pa.Check.in_range(min_value=5, max_value=20))
+    })
+    print(schema(df))
+
+
+.. testoutput:: scaling_modin
+
+      state           city  price
+    0    FL        Orlando      8
+    1    FL          Miami     12
+    2    FL          Tampa     10
+    3    CA  San Francisco     16
+    4    CA    Los Angeles     20
+    5    CA      San Diego     18
diff --git a/docs/source/schema_models.rst b/docs/source/schema_models.rst
index fcbc56e30..5ad259a30 100644
--- a/docs/source/schema_models.rst
+++ b/docs/source/schema_models.rst
@@ -107,6 +107,52 @@ In the example above, this will simply be the string `"year"`.
     2  2003  365
 
 
+Validate on Initialization
+--------------------------
+
+*new in 0.8.0*
+
+Pandera provides an interface for validating dataframes on initialization.
+This API uses the :py:class:`pandera.typing.pandas.DataFrame` generic type
+to validated against the :py:class:`~pandera.model.SchemaModel` type variable
+on initialization:
+
+.. testcode:: validate_on_init
+
+    import pandas as pd
+    import pandera as pa
+
+    from pandera.typing import DataFrame, Series
+
+
+    class Schema(pa.SchemaModel):
+        state: Series[str]
+        city: Series[str]
+        price: Series[int] = pa.Field(in_range={"min_value": 5, "max_value": 20})
+
+    df = DataFrame[Schema](
+        {
+            'state': ['NY','FL','GA','CA'],
+            'city': ['New York', 'Miami', 'Atlanta', 'San Francisco'],
+            'price': [8, 12, 10, 16],
+        }
+    )
+    print(df)
+
+
+.. testoutput:: validate_on_init
+
+      state           city  price
+    0    NY       New York      8
+    1    FL          Miami     12
+    2    GA        Atlanta     10
+    3    CA  San Francisco     16
+
+
+Refer to :ref:`supported-dataframe-libraries` to see how this syntax applies
+to other supported dataframe types.
+
+
 Converting to DataFrameSchema
 -----------------------------
 
@@ -134,7 +180,8 @@ You can easily convert a :class:`~pandera.model.SchemaModel` class into a
         ordered=False
     )>
 
-Or use the :meth:`~pandera.model.SchemaModel.validate` method to validate dataframes:
+You can also use the :meth:`~pandera.model.SchemaModel.validate` method to
+validate dataframes:
 
 .. testcode:: dataframe_schema_model
 
@@ -147,6 +194,21 @@ Or use the :meth:`~pandera.model.SchemaModel.validate` method to validate datafr
     1  2002      6  156
     2  2003     12  365
 
+Or you can use the :meth:`~pandera.model.SchemaModel` class directly to
+validate dataframes, which is syntactic sugar that simply delegates to the
+:meth:`~pandera.model.SchemaModel.validate` method.
+
+.. testcode:: dataframe_schema_model
+
+    print(InputSchema(df))
+
+.. testoutput:: dataframe_schema_model
+
+       year  month  day
+    0  2001      3  200
+    1  2002      6  156
+    2  2003     12  365
+
 Excluded attributes
 -------------------
 
diff --git a/docs/source/supported_libraries.rst b/docs/source/supported_libraries.rst
new file mode 100644
index 000000000..60ae94f4c
--- /dev/null
+++ b/docs/source/supported_libraries.rst
@@ -0,0 +1,50 @@
+.. currentmodule:: pandera
+
+.. _supported-dataframe-libraries:
+
+Supported DataFrame Libraries (New)
+===================================
+
+Pandera started out as a pandas-specific dataframe validation library, and
+moving forward its core functionality will continue to support pandas. However,
+pandera's adoption has resulted in the realization that it can be a much more
+powerful tool by supporting other dataframe-like formats.
+
+Scaling Up Data Validation
+--------------------------
+
+Pandera provides multiple ways of scaling up data validation to dataframes
+that don't fit into memory. Fortunately, pandera doesn't have to re-invent
+the wheel. Standing on shoulders of giants, it integrates with the existing
+ecosystem of libraries that allow you to perform validations on out-of-memory
+dataframes.
+
+.. list-table::
+   :widths: 25 75
+
+   * - :ref:`Dask <scaling_dask>`
+     - Apply pandera schemas to Dask dataframe partitions.
+   * - :ref:`Fugue <scaling_fugue>`
+     - Apply pandera schemas to distributed dataframe partitions with Fugue.
+   * - :ref:`Koalas <scaling_koalas>`
+     - A pandas drop-in replacement, distributed using a Spark backend.
+   * - :ref:`Modin <scaling_modin>`
+     - A pandas drop-in replacement, distributed using a Ray or Dask backend.
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Introduction
+    :hidden:
+
+    Dask <dask>
+    Fugue <fugue>
+    Koalas <koalas>
+    Modin <modin>
+
+.. note::
+
+   Don't see a library that you want supported? Check out the
+   `github issues <https://github.com/pandera-dev/pandera/issues>`__ to see if
+   that library is in the roadmap. If it isn't, open up a
+   `new issue <https://github.com/pandera-dev/pandera/issues/new?assignees=&labels=enhancement&template=feature_request.md&title=>`__
+   to add support for it!
diff --git a/docs/source/third_party_schema.rst b/docs/source/third_party_schema.rst
index 233604435..e6d54b0ec 100644
--- a/docs/source/third_party_schema.rst
+++ b/docs/source/third_party_schema.rst
@@ -4,8 +4,8 @@
 
 .. _third_party_schema:
 
-Reading Third-Party Schema (new)
-================================
+Reading Third-Party Schema
+
 
 *new in 0.7.0*
 
diff --git a/pandera/external_config.py b/pandera/external_config.py
index b28aa45ca..699b0c9fb 100644
--- a/pandera/external_config.py
+++ b/pandera/external_config.py
@@ -9,9 +9,13 @@
     # Series and DataFrames to support type hinting:
     # https://koalas.readthedocs.io/en/latest/user_guide/typehints.html#type-hinting-with-names
     # pylint: disable=unused-import
-    import databricks.koalas as ks
-
     if os.getenv("SPARK_LOCAL_IP") is None:
         os.environ["SPARK_LOCAL_IP"] = "127.0.0.1"
+    if os.getenv("PYARROW_IGNORE_TIMEZONE") is None:
+        # This can be overriden by the user
+        os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
+
+    import databricks.koalas as ks
 except ImportError:
-    pass
+    os.environ.pop("SPARK_LOCAL_IP")
+    os.environ.pop("PYARROW_IGNORE_TIMEZONE")
diff --git a/pandera/io.py b/pandera/io.py
index 15d9c3fde..07ade9546 100644
--- a/pandera/io.py
+++ b/pandera/io.py
@@ -252,7 +252,6 @@ def _deserialize_schema(serialized_schema):
         index = MultiIndex(
             indexes=[Index(**index_properties) for index_properties in index]
         )
-
     return DataFrameSchema(
         columns=columns,
         checks=checks,
diff --git a/pandera/model.py b/pandera/model.py
index b937c9d7c..1816f0214 100644
--- a/pandera/model.py
+++ b/pandera/model.py
@@ -34,7 +34,8 @@
     FieldInfo,
 )
 from .schemas import DataFrameSchema
-from .typing import AnnotationInfo, DataFrame, Index, Series
+from .typing import INDEX_TYPES, SERIES_TYPES, AnnotationInfo
+from .typing.common import DataFrameBase
 
 if sys.version_info[:2] < (3, 9):
     from typing_extensions import get_type_hints
@@ -173,8 +174,11 @@ class SchemaModel(metaclass=_MetaSchema):
     __checks__: Dict[str, List[Check]] = {}
     __dataframe_checks__: List[Check] = []
 
-    def __new__(cls, *args, **kwargs):
-        raise TypeError(f"{cls.__name__} may not be instantiated.")
+    # This is syntantic sugar that delegates to the validate method
+    @docstring_substitution(validate_doc=DataFrameSchema.validate.__doc__)
+    def __new__(cls, *args, **kwargs) -> DataFrameBase[TSchemaModel]:  # type: ignore [misc]
+        """%(validate_doc)s"""
+        return cast(DataFrameBase[TSchemaModel], cls.validate(*args, **kwargs))
 
     def __init_subclass__(cls, **kwargs):
         """Ensure :class:`~pandera.model_components.FieldInfo` instances."""
@@ -250,10 +254,13 @@ def validate(
         random_state: Optional[int] = None,
         lazy: bool = False,
         inplace: bool = False,
-    ) -> DataFrame[TSchemaModel]:
+    ) -> DataFrameBase[TSchemaModel]:
         """%(validate_doc)s"""
-        return cls.to_schema().validate(
-            check_obj, head, tail, sample, random_state, lazy, inplace
+        return cast(
+            DataFrameBase[TSchemaModel],
+            cls.to_schema().validate(
+                check_obj, head, tail, sample, random_state, lazy, inplace
+            ),
         )
 
     @classmethod
@@ -261,7 +268,7 @@ def validate(
     @st.strategy_import_error
     def strategy(
         cls: Type[TSchemaModel], *, size: Optional[int] = None
-    ) -> DataFrame[TSchemaModel]:
+    ) -> DataFrameBase[TSchemaModel]:
         """%(strategy_doc)s"""
         return cls.to_schema().strategy(size=size)
 
@@ -270,9 +277,11 @@ def strategy(
     @st.strategy_import_error
     def example(
         cls: Type[TSchemaModel], *, size: Optional[int] = None
-    ) -> DataFrame[TSchemaModel]:
+    ) -> DataFrameBase[TSchemaModel]:
         """%(example_doc)s"""
-        return cls.to_schema().example(size=size)
+        return cast(
+            DataFrameBase[TSchemaModel], cls.to_schema().example(size=size)
+        )
 
     @classmethod
     def _build_columns_index(  # pylint:disable=too-many-locals
@@ -285,7 +294,8 @@ def _build_columns_index(  # pylint:disable=too-many-locals
         Optional[Union[schema_components.Index, schema_components.MultiIndex]],
     ]:
         index_count = sum(
-            annotation.origin is Index for annotation, _ in fields.values()
+            annotation.origin in INDEX_TYPES
+            for annotation, _ in fields.values()
         )
 
         columns: Dict[str, schema_components.Column] = {}
@@ -310,8 +320,8 @@ def _build_columns_index(  # pylint:disable=too-many-locals
             dtype = None if dtype is Any else dtype
 
             if (
-                annotation.origin is Series
-                or annotation.raw_annotation is Series
+                annotation.origin in SERIES_TYPES
+                or annotation.raw_annotation in SERIES_TYPES
             ):
                 col_constructor = (
                     field.to_column if field else schema_components.Column
@@ -329,8 +339,8 @@ def _build_columns_index(  # pylint:disable=too-many-locals
                     name=field_name,
                 )
             elif (
-                annotation.origin is Index
-                or annotation.raw_annotation is Index
+                annotation.origin in INDEX_TYPES
+                or annotation.raw_annotation in INDEX_TYPES
             ):
                 if annotation.optional:
                     raise SchemaInitError(
diff --git a/pandera/typing/__init__.py b/pandera/typing/__init__.py
new file mode 100644
index 000000000..1090ee02a
--- /dev/null
+++ b/pandera/typing/__init__.py
@@ -0,0 +1,58 @@
+"""Typing module.
+
+For backwards compatibility, pandas types are exposed to the top-level scope of
+the typing module.
+"""
+
+from . import dask, koalas, modin
+from .common import (
+    INT8,
+    INT16,
+    INT32,
+    INT64,
+    STRING,
+    UINT8,
+    UINT16,
+    UINT32,
+    UINT64,
+    AnnotationInfo,
+    Bool,
+    Category,
+    DateTime,
+    Float,
+    Float16,
+    Float32,
+    Float64,
+    Int,
+    Int8,
+    Int16,
+    Int32,
+    Int64,
+    Object,
+    String,
+    Timedelta,
+    UInt8,
+    UInt16,
+    UInt32,
+    UInt64,
+)
+from .pandas import DataFrame, Index, Series
+
+DATAFRAME_TYPES = {DataFrame}
+SERIES_TYPES = {Series}
+INDEX_TYPES = {Index}
+
+if dask.DASK_INSTALLED:
+    DATAFRAME_TYPES.update({dask.DataFrame})
+    SERIES_TYPES.update({dask.Series})
+    INDEX_TYPES.update({dask.Index})
+
+if modin.MODIN_INSTALLED:
+    DATAFRAME_TYPES.update({modin.DataFrame})
+    SERIES_TYPES.update({modin.Series})
+    INDEX_TYPES.update({modin.Index})
+
+if koalas.KOALAS_INSTALLED:
+    DATAFRAME_TYPES.update({koalas.DataFrame})
+    SERIES_TYPES.update({koalas.Series})
+    INDEX_TYPES.update({koalas.Index})
diff --git a/pandera/typing.py b/pandera/typing/common.py
similarity index 59%
rename from pandera/typing.py
rename to pandera/typing/common.py
index 72da6031b..99861ddb0 100644
--- a/pandera/typing.py
+++ b/pandera/typing/common.py
@@ -1,32 +1,14 @@
-"""Typing definitions and helpers."""
+"""Common typing functionality."""
 # pylint:disable=abstract-method,disable=too-many-ancestors
+
 import inspect
 from typing import TYPE_CHECKING, Any, Generic, Type, TypeVar
 
 import pandas as pd
 import typing_inspect
 
-from . import dtypes
-from .engines import numpy_engine, pandas_engine
-from .errors import SchemaError, SchemaInitError
-
-try:
-    from typing import _GenericAlias  # type: ignore[attr-defined]
-except ImportError:  # pragma: no cover
-    _GenericAlias = None
-
-
-try:
-    from pydantic.fields import ModelField
-except ImportError:
-    ModelField = Any  # type: ignore
-
-try:
-    import dask.dataframe as dd
-
-    _DASK_INSTALLED = True
-except ImportError:
-    _DASK_INSTALLED = False
+from .. import dtypes
+from ..engines import numpy_engine, pandas_engine
 
 Bool = dtypes.Bool  #: ``"bool"`` numpy dtype
 DateTime = dtypes.DateTime  #: ``"datetime64[ns]"`` numpy dtype
@@ -99,35 +81,6 @@
 Schema = TypeVar("Schema", bound="SchemaModel")  # type: ignore
 
 
-# pylint:disable=too-few-public-methods
-class Index(pd.Index, Generic[GenericDtype]):
-    """Representation of pandas.Index, only used for type annotation.
-
-    *new in 0.5.0*
-    """
-
-
-# pylint:disable=too-few-public-methods
-class Series(pd.Series, Generic[GenericDtype]):  # type: ignore
-    """Representation of pandas.Series, only used for type annotation.
-
-    *new in 0.5.0*
-    """
-
-    if hasattr(pd.Series, "__class_getitem__") and _GenericAlias:
-
-        def __class_getitem__(cls, item):
-            """Define this to override the patch that koalas performs on pandas.
-            https://github.com/databricks/koalas/blob/master/databricks/koalas/__init__.py#L207-L223
-            """
-            return _GenericAlias(cls, item)
-
-    def __get__(
-        self, instance: object, owner: Type
-    ) -> str:  # pragma: no cover
-        raise AttributeError("Series should resolve to Field-s")
-
-
 # pylint:disable=invalid-name
 if TYPE_CHECKING:
     T = TypeVar("T")  # pragma: no cover
@@ -135,16 +88,19 @@ def __get__(
     T = Schema
 
 
-class DataFrameBase(pd.DataFrame):
+class DataFrameBase(Generic[T]):
+    # pylint: disable=too-few-public-methods
     """
-    Pandera pandas.Dataframe base class for validating dataframes on
+    Pandera Dataframe base class for validating dataframes on
     initialization.
     """
 
     def __setattr__(self, name: str, value: Any) -> None:
+        # pylint: disable=no-member
         object.__setattr__(self, name, value)
         if name == "__orig_class__":
-            class_args = getattr(self.__orig_class__, "__args__", None)
+            orig_class = getattr(self, "__orig_class__")
+            class_args = getattr(orig_class, "__args__", None)
             if any(
                 x.__name__ == "SchemaModel"
                 for x in inspect.getmro(class_args[0])
@@ -153,73 +109,32 @@ def __setattr__(self, name: str, value: Any) -> None:
 
             # prevent the double validation problem by preventing checks for
             # dataframes with a defined pandera.schema
+            pandera = getattr(self, "pandera")
             if (
-                self.pandera.schema is None
-                or self.pandera.schema != schema_model.to_schema()
+                pandera.schema is None
+                or pandera.schema != schema_model.to_schema()
             ):
                 # pylint: disable=self-cls-assignment
                 self = schema_model.validate(self)
-                self.pandera.add_schema(schema_model.to_schema())
+                pandera.add_schema(schema_model.to_schema())
 
 
 # pylint:disable=too-few-public-methods
-class DataFrame(Generic[T], DataFrameBase):
-    """
-    Representation of pandas.DataFrame, only used for type annotation.
-
-    *new in 0.5.0*
-    """
+class SeriesBase(Generic[GenericDtype]):
+    """Pandera Series base class to use for all pandas-like APIs."""
 
-    if hasattr(pd.Series, "__class_getitem__") and _GenericAlias:
-
-        def __class_getitem__(cls, item):
-            """Define this to override the patch that koalas performs on pandas.
-            https://github.com/databricks/koalas/blob/master/databricks/koalas/__init__.py#L207-L223
-            """
-            return _GenericAlias(cls, item)
-
-    @classmethod
-    def __get_validators__(cls):
-        yield cls._pydantic_validate
-
-    @classmethod
-    def _pydantic_validate(
-        cls, df: pd.DataFrame, field: ModelField
-    ) -> pd.DataFrame:
-        """Verify that the input is a pandas dataframe that meets all
-        schema requirements."""
-        if not isinstance(df, pd.DataFrame):
-            raise TypeError("Expected a pandas DataFrame")
-
-        if not field.sub_fields:
-            raise TypeError(
-                "Expected a typed pandera.typing.DataFrame,"
-                " e.g. DataFrame[Schema]"
-            )
-        schema_model = field.sub_fields[0].type_
-        try:
-            schema = schema_model.to_schema()
-        except SchemaInitError as exc:
-            raise ValueError(
-                f"Cannot use {cls.__name__} as a pydantic type as its "
-                "SchemaModel cannot be converted to a DataFrameSchema.\n"
-                f"Please revisit the model to address the following errors:"
-                f"\n{exc}"
-            ) from exc
+    def __get__(
+        self, instance: object, owner: Type
+    ) -> str:  # pragma: no cover
+        raise AttributeError("Series should resolve to Field-s")
 
-        try:
-            return schema.validate(df)
-        except SchemaError as exc:
-            raise ValueError(str(exc)) from exc
 
+# pylint:disable=too-few-public-methods
+class IndexBase(Generic[GenericDtype]):
+    """Representation of pandas.Index, only used for type annotation.
 
-if _DASK_INSTALLED:
-    # pylint:disable=too-few-public-methods
-    class DaskDataFrame(dd.DataFrame, Generic[T]):
-        """
-        Representation of dask.dataframe.DataFrame, only used for type
-        annotation.
-        """
+    *new in 0.5.0*
+    """
 
 
 class AnnotationInfo:  # pylint:disable=too-few-public-methods
@@ -227,7 +142,8 @@ class AnnotationInfo:  # pylint:disable=too-few-public-methods
 
     Attributes:
         origin: The non-parameterized generic class.
-        arg: The first generic type (SchemaModel does not support more than 1 argument).
+        arg: The first generic type (SchemaModel does not support more than
+            1 argument).
         literal: Whether the annotation is a literal.
         optional: Whether the annotation is optional.
         raw_annotation: The raw annotation.
@@ -239,16 +155,11 @@ def __init__(self, raw_annotation: Type) -> None:
 
     @property
     def is_generic_df(self) -> bool:
-        """True if the annotation is a pandera.typing.DataFrame or
-        pandera.typing.DaskDataFrame.
-        """
+        """True if the annotation is a DataFrameBase subclass."""
         try:
             if self.origin is None:
                 return False
-            if _DASK_INSTALLED:
-                return issubclass(self.origin, (DataFrame, DaskDataFrame))
-            else:
-                return issubclass(self.origin, DataFrame)
+            return issubclass(self.origin, DataFrameBase)
         except TypeError:
             return False
 
diff --git a/pandera/typing/dask.py b/pandera/typing/dask.py
new file mode 100644
index 000000000..ae807c036
--- /dev/null
+++ b/pandera/typing/dask.py
@@ -0,0 +1,67 @@
+"""Pandera type annotations for Dask."""
+
+import inspect
+from typing import TYPE_CHECKING, Any, Generic, TypeVar
+
+from .common import DataFrameBase, IndexBase, SeriesBase
+from .pandas import GenericDtype, Schema
+
+try:
+    import dask.dataframe as dd
+
+    DASK_INSTALLED = True
+except ImportError:
+    DASK_INSTALLED = False
+
+
+# pylint:disable=invalid-name
+if TYPE_CHECKING:
+    T = TypeVar("T")  # pragma: no cover
+else:
+    T = Schema
+
+
+if DASK_INSTALLED:
+
+    # pylint: disable=too-few-public-methods
+    class DataFrame(DataFrameBase, dd.DataFrame, Generic[T]):
+        """
+        Representation of dask.dataframe.DataFrame, only used for type
+        annotation.
+
+        *new in 0.8.0*
+        """
+
+        def __setattr__(self, name: str, value: Any) -> None:
+            object.__setattr__(self, name, value)
+            if name == "__orig_class__":
+                class_args = getattr(self.__orig_class__, "__args__", None)
+                if any(
+                    x.__name__ == "SchemaModel"
+                    for x in inspect.getmro(class_args[0])
+                ):
+                    schema_model = value.__args__[0]
+
+                # prevent the double validation problem by preventing checks
+                # for dataframes with a defined pandera.schema
+                if (
+                    self.pandera.schema is None
+                    or self.pandera.schema != schema_model.to_schema()
+                ):
+                    # pylint: disable=self-cls-assignment
+                    self.__dict__ = schema_model.validate(self).__dict__
+                    self.pandera.add_schema(schema_model.to_schema())
+
+    # pylint:disable=too-few-public-methods
+    class Series(SeriesBase, dd.Series, Generic[GenericDtype]):  # type: ignore
+        """Representation of pandas.Series, only used for type annotation.
+
+        *new in 0.8.0*
+        """
+
+    # pylint:disable=too-few-public-methods
+    class Index(IndexBase, dd.Index, Generic[GenericDtype]):
+        """Representation of pandas.Index, only used for type annotation.
+
+        *new in 0.8.0*
+        """
diff --git a/pandera/typing/koalas.py b/pandera/typing/koalas.py
new file mode 100644
index 000000000..052338f7f
--- /dev/null
+++ b/pandera/typing/koalas.py
@@ -0,0 +1,54 @@
+"""Pandera type annotations for Dask."""
+
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+from .common import DataFrameBase, IndexBase, SeriesBase
+from .pandas import GenericDtype, Schema, _GenericAlias
+
+try:
+    import databricks.koalas as ks
+
+    KOALAS_INSTALLED = True
+except ImportError:
+    KOALAS_INSTALLED = False
+
+
+# pylint:disable=invalid-name
+if TYPE_CHECKING:
+    T = TypeVar("T")  # pragma: no cover
+else:
+    T = Schema
+
+
+if KOALAS_INSTALLED:
+
+    # pylint: disable=too-few-public-methods
+    class DataFrame(DataFrameBase, ks.DataFrame, Generic[T]):
+        """
+        Representation of dask.dataframe.DataFrame, only used for type
+        annotation.
+
+        *new in 0.8.0*
+        """
+
+        def __class_getitem__(cls, item):
+            """Define this to override's koalas generic type."""
+            return _GenericAlias(cls, item)
+
+    # pylint:disable=too-few-public-methods
+    class Series(SeriesBase, ks.Series, Generic[GenericDtype]):
+        """Representation of pandas.Series, only used for type annotation.
+
+        *new in 0.8.0*
+        """
+
+        def __class_getitem__(cls, item):
+            """Define this to override koalas generic type"""
+            return _GenericAlias(cls, item)
+
+    # pylint:disable=too-few-public-methods
+    class Index(IndexBase, ks.Index, Generic[GenericDtype]):
+        """Representation of pandas.Index, only used for type annotation.
+
+        *new in 0.8.0*
+        """
diff --git a/pandera/typing/modin.py b/pandera/typing/modin.py
new file mode 100644
index 000000000..c69782f21
--- /dev/null
+++ b/pandera/typing/modin.py
@@ -0,0 +1,46 @@
+"""Pandera type annotations for Dask."""
+
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+from .common import DataFrameBase, IndexBase, SeriesBase
+from .pandas import GenericDtype, Schema
+
+try:
+    import modin.pandas as mpd
+
+    MODIN_INSTALLED = True
+except ImportError:
+    MODIN_INSTALLED = False
+
+
+# pylint:disable=invalid-name
+if TYPE_CHECKING:
+    T = TypeVar("T")  # pragma: no cover
+else:
+    T = Schema
+
+
+if MODIN_INSTALLED:
+
+    # pylint: disable=too-few-public-methods
+    class DataFrame(DataFrameBase, mpd.DataFrame, Generic[T]):
+        """
+        Representation of dask.dataframe.DataFrame, only used for type
+        annotation.
+
+        *new in 0.8.0*
+        """
+
+    # pylint:disable=too-few-public-methods
+    class Series(SeriesBase, mpd.Series, Generic[GenericDtype]):
+        """Representation of pandas.Series, only used for type annotation.
+
+        *new in 0.8.0*
+        """
+
+    # pylint:disable=too-few-public-methods
+    class Index(IndexBase, mpd.Index, Generic[GenericDtype]):
+        """Representation of pandas.Index, only used for type annotation.
+
+        *new in 0.8.0*
+        """
diff --git a/pandera/typing/pandas.py b/pandera/typing/pandas.py
new file mode 100644
index 000000000..37c0824b5
--- /dev/null
+++ b/pandera/typing/pandas.py
@@ -0,0 +1,101 @@
+"""Typing definitions and helpers."""
+# pylint:disable=abstract-method,disable=too-many-ancestors
+from typing import TYPE_CHECKING, Any, Generic, TypeVar
+
+import pandas as pd
+
+from ..errors import SchemaError, SchemaInitError
+from .common import DataFrameBase, GenericDtype, IndexBase, Schema, SeriesBase
+
+try:
+    from typing import _GenericAlias  # type: ignore[attr-defined]
+except ImportError:  # pragma: no cover
+    _GenericAlias = None
+
+
+try:
+    from pydantic.fields import ModelField
+except ImportError:
+    ModelField = Any  # type: ignore
+
+
+# pylint:disable=too-few-public-methods
+class Index(IndexBase, pd.Index, Generic[GenericDtype]):
+    """Representation of pandas.Index, only used for type annotation.
+
+    *new in 0.5.0*
+    """
+
+
+# pylint:disable=too-few-public-methods
+class Series(SeriesBase, pd.Series, Generic[GenericDtype]):  # type: ignore
+    """Representation of pandas.Series, only used for type annotation.
+
+    *new in 0.5.0*
+    """
+
+    if hasattr(pd.Series, "__class_getitem__") and _GenericAlias:
+
+        def __class_getitem__(cls, item):
+            """Define this to override the patch that koalas performs on pandas.
+            https://github.com/databricks/koalas/blob/master/databricks/koalas/__init__.py#L207-L223
+            """
+            return _GenericAlias(cls, item)
+
+
+# pylint:disable=invalid-name
+if TYPE_CHECKING:
+    T = TypeVar("T")  # pragma: no cover
+else:
+    T = Schema
+
+
+# pylint:disable=too-few-public-methods
+class DataFrame(DataFrameBase, pd.DataFrame, Generic[T]):
+    """
+    Representation of pandas.DataFrame, only used for type annotation.
+
+    *new in 0.5.0*
+    """
+
+    if hasattr(pd.DataFrame, "__class_getitem__") and _GenericAlias:
+
+        def __class_getitem__(cls, item):
+            """Define this to override the patch that koalas performs on pandas.
+            https://github.com/databricks/koalas/blob/master/databricks/koalas/__init__.py#L207-L223
+            """
+            return _GenericAlias(cls, item)
+
+    @classmethod
+    def __get_validators__(cls):
+        yield cls._pydantic_validate
+
+    @classmethod
+    def _pydantic_validate(
+        cls, df: pd.DataFrame, field: ModelField
+    ) -> pd.DataFrame:
+        """Verify that the input is a pandas dataframe that meets all
+        schema requirements."""
+        if not isinstance(df, pd.DataFrame):
+            raise TypeError("Expected a pandas DataFrame")
+
+        if not field.sub_fields:
+            raise TypeError(
+                "Expected a typed pandera.typing.DataFrame,"
+                " e.g. DataFrame[Schema]"
+            )
+        schema_model = field.sub_fields[0].type_
+        try:
+            schema = schema_model.to_schema()
+        except SchemaInitError as exc:
+            raise ValueError(
+                f"Cannot use {cls.__name__} as a pydantic type as its "
+                "SchemaModel cannot be converted to a DataFrameSchema.\n"
+                f"Please revisit the model to address the following errors:"
+                f"\n{exc}"
+            ) from exc
+
+        try:
+            return schema.validate(df)
+        except SchemaError as exc:
+            raise ValueError(str(exc)) from exc
diff --git a/tests/core/static/pandas_dataframe.py b/tests/core/static/pandas_dataframe.py
index 58b718395..2d320851a 100644
--- a/tests/core/static/pandas_dataframe.py
+++ b/tests/core/static/pandas_dataframe.py
@@ -27,37 +27,61 @@ class AnotherSchema(pa.SchemaModel):
     first_name: Series[str]
 
 
-pd_df = pd.DataFrame({"id": [1], "name": ["foo"]})
-valid_df = DataFrame[Schema]({"id": [1], "name": ["foo"]})
-another_df = DataFrame[AnotherSchema]({"id": [1], "first_name": ["foo"]})
-
-
 def fn(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
-    return df.assign(age=30).pipe(DataFrame[SchemaOut])
+    return df.assign(age=30).pipe(DataFrame[SchemaOut])  # mypy okay
 
 
 def fn_pipe_incorrect_type(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
     return df.assign(age=30).pipe(DataFrame[AnotherSchema])  # mypy error
+    # error: Argument 1 to "pipe" of "NDFrame" has incompatible type "Type[DataFrame[Any]]";  # noqa
+    # expected "Union[Callable[..., DataFrame[SchemaOut]], Tuple[Callable[..., DataFrame[SchemaOut]], str]]"  [arg-type]  # noqa
 
 
 def fn_assign_copy(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
     return df.assign(age=30)  # mypy error
+    # error: Incompatible return value type (got "pandas.core.frame.DataFrame",
+    # expected "pandera.typing.pandas.DataFrame[SchemaOut]")  [return-value]
+
+
+# Define a few dataframe objects
+schema_df = DataFrame[Schema]({"id": [1], "name": ["foo"]})
+pandas_df = pd.DataFrame({"id": [1], "name": ["foo"]})
+another_df = DataFrame[AnotherSchema]({"id": [1], "first_name": ["foo"]})
+
 
+fn(schema_df)  # mypy okay
+
+fn(pandas_df)  # mypy error
+# error: Argument 1 to "fn" has incompatible type "pandas.core.frame.DataFrame";  # noqa
+# expected "pandera.typing.pandas.DataFrame[Schema]"  [arg-type]
 
-fn(valid_df)
-fn(pd_df)  # mypy error
 fn(another_df)  # mypy error
+# error: Argument 1 to "fn" has incompatible type "DataFrame[AnotherSchema]";
+# expected "DataFrame[Schema]"  [arg-type]
+
 
+def fn_pipe_dataframe(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
+    return df.assign(age=30).pipe(DataFrame[SchemaOut])  # mypy okay
 
+
+def fn_cast_dataframe(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
+    return cast(DataFrame[SchemaOut], df.assign(age=30))  # mypy okay
+
+
+@pa.check_types
 def fn_mutate_inplace(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
     out = df.assign(age=30).pipe(DataFrame[SchemaOut])
     out.drop(["age"], axis=1, inplace=True)
     return out  # okay for mypy, pandera raises error
 
 
+@pa.check_types
 def fn_assign_and_get_index(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
     return df.assign(foo=30).iloc[:3]  # okay for mypy, pandera raises error
 
 
-def fn_cast_dataframe(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
-    return cast(DataFrame[SchemaOut], df)  # okay for mypy
+@pa.check_types
+def fn_cast_dataframe_invalid(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
+    return cast(
+        DataFrame[SchemaOut], df
+    )  # okay for mypy, pandera raises error  # noqa
diff --git a/tests/core/test_model.py b/tests/core/test_model.py
index d93303dd3..315aa9b37 100644
--- a/tests/core/test_model.py
+++ b/tests/core/test_model.py
@@ -12,8 +12,11 @@
 from pandera.typing import DataFrame, Index, Series, String
 
 
-def test_to_schema() -> None:
-    """Test that SchemaModel.to_schema() can produce the correct schema."""
+def test_to_schema_and_validate() -> None:
+    """
+    Test that SchemaModel.to_schema() can produce the correct schema and
+    can validate dataframe objects.
+    """
 
     class Schema(pa.SchemaModel):
         a: Series[int]
@@ -27,8 +30,9 @@ class Schema(pa.SchemaModel):
     )
     assert expected == Schema.to_schema()
 
-    with pytest.raises(TypeError):
-        Schema()
+    Schema(pd.DataFrame({"a": [1], "b": ["foo"], "c": [3.4]}, index=["1"]))
+    with pytest.raises(pa.errors.SchemaError):
+        Schema(pd.DataFrame({"a": [1]}))
 
 
 def test_empty_schema() -> None:
diff --git a/tests/core/test_static_type_checking.py b/tests/core/test_static_type_checking.py
index 28ef13dc2..20fe64226 100644
--- a/tests/core/test_static_type_checking.py
+++ b/tests/core/test_static_type_checking.py
@@ -51,7 +51,7 @@ def test_mypy_pandas_dataframe(capfd) -> None:
     )
     errors = _get_mypy_errors(capfd.readouterr().out)
     # assert error messages on particular lines of code
-    assert errors[40] == {
+    assert errors[35] == {
         "msg": (
             'Argument 1 to "pipe" of "NDFrame" has incompatible type '
             '"Type[DataFrame[Any]]"; expected '
@@ -60,23 +60,23 @@ def test_mypy_pandas_dataframe(capfd) -> None:
         ),
         "errcode": "arg-type",
     }
-    assert errors[44] == {
+    assert errors[41] == {
         "msg": (
             "Incompatible return value type (got "
             '"pandas.core.frame.DataFrame", expected '
-            '"pandera.typing.DataFrame[SchemaOut]")'
+            '"pandera.typing.pandas.DataFrame[SchemaOut]")'
         ),
         "errcode": "return-value",
     }
-    assert errors[48] == {
+    assert errors[54] == {
         "msg": (
             'Argument 1 to "fn" has incompatible type '
             '"pandas.core.frame.DataFrame"; expected '
-            '"pandera.typing.DataFrame[Schema]"'
+            '"pandera.typing.pandas.DataFrame[Schema]"'
         ),
         "errcode": "arg-type",
     }
-    assert errors[49] == {
+    assert errors[58] == {
         "msg": (
             'Argument 1 to "fn" has incompatible type '
             '"DataFrame[AnotherSchema]"; expected "DataFrame[Schema]"'
@@ -90,6 +90,7 @@ def test_mypy_pandas_dataframe(capfd) -> None:
     [
         pandas_dataframe.fn_mutate_inplace,
         pandas_dataframe.fn_assign_and_get_index,
+        pandas_dataframe.fn_cast_dataframe_invalid,
     ],
 )
 def test_pandera_runtime_errors(fn) -> None:
@@ -97,6 +98,6 @@ def test_pandera_runtime_errors(fn) -> None:
 
     # both functions don't add a required column "age"
     try:
-        pa.check_types(fn)(pandas_dataframe.valid_df)
+        fn(pandas_dataframe.schema_df)
     except pa.errors.SchemaError as e:
         assert e.failure_cases["failure_case"].item() == "age"
diff --git a/tests/core/test_typing.py b/tests/core/test_typing.py
index 652871f4f..585be8cb1 100644
--- a/tests/core/test_typing.py
+++ b/tests/core/test_typing.py
@@ -9,7 +9,7 @@
 
 import pandera as pa
 from pandera.dtypes import DataType
-from pandera.typing import Series
+from pandera.typing import DataFrame, Index, Series
 
 try:  # python 3.9+
     from typing import Annotated  # type: ignore
@@ -456,3 +456,33 @@ def test_new_pandas_extension_dtype_class(
 ):
     """Test type annotations with the new nullable pandas dtypes."""
     _test_default_annotated_dtype(model, dtype, has_mandatory_args)
+
+
+class InitSchema(pa.SchemaModel):
+    col1: Series[int]
+    col2: Series[float]
+    col3: Series[str]
+    index: Index[int]
+
+
+def test_init_pandas_dataframe():
+    """Test initialization of pandas.typing.DataFrame with Schema."""
+    assert isinstance(
+        DataFrame[InitSchema]({"col1": [1], "col2": [1.0], "col3": ["1"]}),
+        DataFrame,
+    )
+
+
+@pytest.mark.parametrize(
+    "invalid_data",
+    [
+        {"col1": [1.0], "col2": [1.0], "col3": ["1"]},
+        {"col1": [1], "col2": [1], "col3": ["1"]},
+        {"col1": [1], "col2": [1.0], "col3": [1]},
+        {"col1": [1]},
+    ],
+)
+def test_init_pandas_dataframe_errors(invalid_data):
+    """Test errors from initializing a pandas.typing.DataFrame with Schema."""
+    with pytest.raises(pa.errors.SchemaError):
+        DataFrame[InitSchema](invalid_data)
diff --git a/tests/dask/test_dask.py b/tests/dask/test_dask.py
index 696376345..859834c3f 100644
--- a/tests/dask/test_dask.py
+++ b/tests/dask/test_dask.py
@@ -6,7 +6,7 @@
 import pytest
 
 import pandera as pa
-from pandera.typing import DaskDataFrame, Series
+from pandera.typing.dask import DataFrame, Index, Series
 
 
 class IntSchema(pa.SchemaModel):  # pylint: disable=missing-class-docstring
@@ -25,17 +25,17 @@ def test_model_validation() -> None:
     ddf = dd.from_pandas(df, npartitions=1)
 
     ddf = StrSchema.validate(ddf)
-    pd.testing.assert_frame_equal(df, ddf.compute())
+    pd.testing.assert_frame_equal(df, ddf.compute())  # type: ignore [attr-defined]
 
     ddf = IntSchema.validate(ddf)
 
     with pytest.raises(pa.errors.SchemaError):
-        ddf.compute()
+        ddf.compute()  # type: ignore [attr-defined]
 
     IntSchema.validate(ddf, inplace=True)
 
     with pytest.raises(pa.errors.SchemaError):
-        ddf.compute()
+        ddf.compute()  # type: ignore [attr-defined]
 
 
 def test_dataframe_schema() -> None:
@@ -91,11 +91,11 @@ def test_decorator() -> None:
     """Test that pandera check_types decorator works with Dask DataFrames."""
 
     @pa.check_types
-    def str_func(x: DaskDataFrame[StrSchema]) -> DaskDataFrame[StrSchema]:
+    def str_func(x: DataFrame[StrSchema]) -> DataFrame[StrSchema]:
         return x
 
     @pa.check_types
-    def int_func(x: DaskDataFrame[IntSchema]) -> DaskDataFrame[IntSchema]:
+    def int_func(x: DataFrame[IntSchema]) -> DataFrame[IntSchema]:
         return x
 
     df = pd.DataFrame({"col": ["1"]})
@@ -106,3 +106,42 @@ def int_func(x: DaskDataFrame[IntSchema]) -> DaskDataFrame[IntSchema]:
 
     with pytest.raises(pa.errors.SchemaError):
         print(result.compute())
+
+
+class InitSchema(pa.SchemaModel):
+    """Schema used to test dataframe initialization."""
+
+    col1: Series[int]
+    col2: Series[float]
+    col3: Series[str]
+    index: Index[int]
+
+
+def test_init_dask_dataframe():
+    """Test initialization of pandas.typing.dask.DataFrame with Schema."""
+    ddf = dd.from_pandas(
+        pd.DataFrame({"col1": [1], "col2": [1.0], "col3": ["1"]}),
+        npartitions=2,
+    )
+    assert isinstance(
+        DataFrame[InitSchema](ddf.dask, ddf._name, ddf._meta, ddf.divisions),
+        DataFrame,
+    )
+
+
+@pytest.mark.parametrize(
+    "invalid_data",
+    [
+        {"col1": [1.0], "col2": [1.0], "col3": ["1"]},
+        {"col1": [1], "col2": [1], "col3": ["1"]},
+        {"col1": [1], "col2": [1.0], "col3": [1]},
+        {"col1": [1]},
+    ],
+)
+def test_init_pandas_dataframe_errors(invalid_data):
+    """Test errors from initializing a pandas.typing.DataFrame with Schema."""
+    ddf = dd.from_pandas(pd.DataFrame(invalid_data), npartitions=2)
+    with pytest.raises(pa.errors.SchemaError):
+        DataFrame[InitSchema](
+            ddf.dask, ddf._name, ddf._meta, ddf.divisions
+        ).compute()
diff --git a/tests/koalas/test_schemas_on_koalas.py b/tests/koalas/test_schemas_on_koalas.py
index d6d15cd5d..5d529c0ee 100644
--- a/tests/koalas/test_schemas_on_koalas.py
+++ b/tests/koalas/test_schemas_on_koalas.py
@@ -10,6 +10,7 @@
 import pandera as pa
 from pandera import dtypes, extensions, system
 from pandera.engines import numpy_engine, pandas_engine
+from pandera.typing import DataFrame, Index, Series
 from tests.strategies.test_strategies import NULLABLE_DTYPES
 from tests.strategies.test_strategies import (
     UNSUPPORTED_DTYPE_CLS as UNSUPPORTED_STRATEGY_DTYPE_CLS,
@@ -430,9 +431,11 @@ def test_schema_model():
 
     # pylint: disable=too-few-public-methods
     class Schema(pa.SchemaModel):
-        int_field: pa.typing.Series[int] = pa.Field(gt=0)
-        float_field: pa.typing.Series[float] = pa.Field(lt=0)
-        str_field: pa.typing.Series[str] = pa.Field(isin=["a", "b", "c"])
+        int_field: pa.typing.koalas.Series[int] = pa.Field(gt=0)
+        float_field: pa.typing.koalas.Series[float] = pa.Field(lt=0)
+        str_field: pa.typing.koalas.Series[str] = pa.Field(
+            isin=["a", "b", "c"]
+        )
 
     valid_df = ks.DataFrame(
         {
@@ -495,10 +498,10 @@ def test_check_decorators():
 
     # pylint: disable=too-few-public-methods
     class InSchema(pa.SchemaModel):
-        a: pa.typing.Series[int]
+        a: pa.typing.koalas.Series[int]
 
     class OutSchema(InSchema):
-        b: pa.typing.Series[int]
+        b: pa.typing.koalas.Series[int]
 
     @pa.check_input(in_schema)
     @pa.check_output(out_schema)
@@ -522,15 +525,15 @@ def function_check_io_invalid(df: ks.DataFrame) -> ks.DataFrame:
 
     @pa.check_types
     def function_check_types(
-        df: pa.typing.DataFrame[InSchema],
-    ) -> pa.typing.DataFrame[OutSchema]:
+        df: pa.typing.koalas.DataFrame[InSchema],
+    ) -> pa.typing.koalas.DataFrame[OutSchema]:
         df["b"] = df["a"] + 1
         return df
 
     @pa.check_types
     def function_check_types_invalid(
-        df: pa.typing.DataFrame[InSchema],
-    ) -> pa.typing.DataFrame[OutSchema]:
+        df: pa.typing.koalas.DataFrame[InSchema],
+    ) -> pa.typing.koalas.DataFrame[OutSchema]:
         return df
 
     valid_df = ks.DataFrame({"a": [1, 2, 3]})
@@ -558,3 +561,35 @@ def function_check_types_invalid(
     ):
         with pytest.raises(pa.errors.SchemaError):
             fn(valid_df)
+
+
+class InitSchema(pa.SchemaModel):
+    """Schema used to test dataframe initialization."""
+
+    col1: Series[int]
+    col2: Series[float]
+    col3: Series[str]
+    index: Index[int]
+
+
+def test_init_koalas_dataframe():
+    """Test initialization of pandas.typing.dask.DataFrame with Schema."""
+    assert isinstance(
+        DataFrame[InitSchema]({"col1": [1], "col2": [1.0], "col3": ["1"]}),
+        DataFrame,
+    )
+
+
+@pytest.mark.parametrize(
+    "invalid_data",
+    [
+        {"col1": [1.0], "col2": [1.0], "col3": ["1"]},
+        {"col1": [1], "col2": [1], "col3": ["1"]},
+        {"col1": [1], "col2": [1.0], "col3": [1]},
+        {"col1": [1]},
+    ],
+)
+def test_init_koalas_dataframe_errors(invalid_data):
+    """Test errors from initializing a pandas.typing.DataFrame with Schema."""
+    with pytest.raises(pa.errors.SchemaError):
+        DataFrame[InitSchema](invalid_data)
diff --git a/tests/modin/test_schemas_on_modin.py b/tests/modin/test_schemas_on_modin.py
index d8f39c197..b80d09d17 100644
--- a/tests/modin/test_schemas_on_modin.py
+++ b/tests/modin/test_schemas_on_modin.py
@@ -11,6 +11,7 @@
 import pandera as pa
 from pandera import extensions
 from pandera.engines import numpy_engine, pandas_engine
+from pandera.typing.modin import DataFrame, Index, Series
 from tests.strategies.test_strategies import NULLABLE_DTYPES
 from tests.strategies.test_strategies import (
     SUPPORTED_DTYPES as SUPPORTED_STRATEGY_DTYPES,
@@ -338,9 +339,9 @@ def test_schema_model():
 
     # pylint: disable=too-few-public-methods
     class Schema(pa.SchemaModel):
-        int_field: pa.typing.Series[int] = pa.Field(gt=0)
-        float_field: pa.typing.Series[float] = pa.Field(lt=0)
-        str_field: pa.typing.Series[str] = pa.Field(isin=["a", "b", "c"])
+        int_field: pa.typing.modin.Series[int] = pa.Field(gt=0)
+        float_field: pa.typing.modin.Series[float] = pa.Field(lt=0)
+        str_field: pa.typing.modin.Series[str] = pa.Field(isin=["a", "b", "c"])
 
     valid_df = mpd.DataFrame(
         {
@@ -403,10 +404,10 @@ def test_check_decorators():
 
     # pylint: disable=too-few-public-methods
     class InSchema(pa.SchemaModel):
-        a: pa.typing.Series[int]
+        a: pa.typing.modin.Series[int]
 
     class OutSchema(InSchema):
-        b: pa.typing.Series[int]
+        b: pa.typing.modin.Series[int]
 
     @pa.check_input(in_schema)
     @pa.check_output(out_schema)
@@ -432,15 +433,15 @@ def function_check_io_invalid(df: mpd.DataFrame) -> mpd.DataFrame:
 
     @pa.check_types
     def function_check_types(
-        df: pa.typing.DataFrame[InSchema],
-    ) -> pa.typing.DataFrame[OutSchema]:
+        df: pa.typing.modin.DataFrame[InSchema],
+    ) -> pa.typing.modin.DataFrame[OutSchema]:
         df["b"] = df["a"] + 1
         return df
 
     @pa.check_types
     def function_check_types_invalid(
-        df: pa.typing.DataFrame[InSchema],
-    ) -> pa.typing.DataFrame[OutSchema]:
+        df: pa.typing.modin.DataFrame[InSchema],
+    ) -> pa.typing.modin.DataFrame[OutSchema]:
         return df
 
     valid_df = mpd.DataFrame({"a": [1, 2, 3]})
@@ -468,3 +469,35 @@ def function_check_types_invalid(
     ):
         with pytest.raises(pa.errors.SchemaError):
             fn(valid_df)
+
+
+class InitSchema(pa.SchemaModel):
+    """Schema used for dataframe initialization."""
+
+    col1: Series[int]
+    col2: Series[float]
+    col3: Series[str]
+    index: Index[int]
+
+
+def test_init_modin_dataframe():
+    """Test initialization of pandas.typing.dask.DataFrame with Schema."""
+    assert isinstance(
+        DataFrame[InitSchema]({"col1": [1], "col2": [1.0], "col3": ["1"]}),
+        DataFrame,
+    )
+
+
+@pytest.mark.parametrize(
+    "invalid_data",
+    [
+        {"col1": [1.0], "col2": [1.0], "col3": ["1"]},
+        {"col1": [1], "col2": [1], "col3": ["1"]},
+        {"col1": [1], "col2": [1.0], "col3": [1]},
+        {"col1": [1]},
+    ],
+)
+def test_init_modin_dataframe_errors(invalid_data):
+    """Test errors from initializing a pandas.typing.DataFrame with Schema."""
+    with pytest.raises(pa.errors.SchemaError):
+        DataFrame[InitSchema](invalid_data)

From d368f1236f3c9d995357ad437f500ec101cfd64c Mon Sep 17 00:00:00 2001
From: cosmicBboy <niels.bantilan@gmail.com>
Date: Tue, 9 Nov 2021 00:11:20 -0500
Subject: [PATCH 2/3] fix lint

---
 pandera/typing/dask.py  | 2 +-
 pandera/typing/modin.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandera/typing/dask.py b/pandera/typing/dask.py
index ae807c036..168c02037 100644
--- a/pandera/typing/dask.py
+++ b/pandera/typing/dask.py
@@ -23,7 +23,7 @@
 
 if DASK_INSTALLED:
 
-    # pylint: disable=too-few-public-methods
+    # pylint: disable=too-few-public-methods,abstract-method
     class DataFrame(DataFrameBase, dd.DataFrame, Generic[T]):
         """
         Representation of dask.dataframe.DataFrame, only used for type
diff --git a/pandera/typing/modin.py b/pandera/typing/modin.py
index c69782f21..b497890d8 100644
--- a/pandera/typing/modin.py
+++ b/pandera/typing/modin.py
@@ -31,14 +31,14 @@ class DataFrame(DataFrameBase, mpd.DataFrame, Generic[T]):
         *new in 0.8.0*
         """
 
-    # pylint:disable=too-few-public-methods
+    # pylint:disable=too-few-public-methods,abstract-method
     class Series(SeriesBase, mpd.Series, Generic[GenericDtype]):
         """Representation of pandas.Series, only used for type annotation.
 
         *new in 0.8.0*
         """
 
-    # pylint:disable=too-few-public-methods
+    # pylint:disable=too-few-public-methods,abstract-method
     class Index(IndexBase, mpd.Index, Generic[GenericDtype]):
         """Representation of pandas.Index, only used for type annotation.
 

From e0fa3bba100b65ee067322f375f838703d5cc900 Mon Sep 17 00:00:00 2001
From: cosmicBboy <niels.bantilan@gmail.com>
Date: Tue, 9 Nov 2021 08:57:25 -0500
Subject: [PATCH 3/3] fix lint, docs tests

---
 docs/source/conf.py      | 3 +++
 docs/source/modin.rst    | 6 ++++++
 pandera/typing/koalas.py | 4 ++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 77c9b06f3..8e47ab41c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -50,6 +50,7 @@
 ]
 
 doctest_global_setup = """
+import platform
 import sys
 import pandas as pd
 import numpy as np
@@ -76,6 +77,8 @@
 SKIP_PANDAS_LT_V1 = version.parse(pd.__version__).release < (1, 0) or PY36
 SKIP_SCALING = True
 SKIP_SCHEMA_MODEL = SKIP_PANDAS_LT_V1 or KOALAS_INSTALLED
+SKIP_MODIN = platform.system() == "Windows"
+
 """
 
 doctest_default_flags = (
diff --git a/docs/source/modin.rst b/docs/source/modin.rst
index 3d5b1bb1b..fc6c58fd2 100644
--- a/docs/source/modin.rst
+++ b/docs/source/modin.rst
@@ -25,6 +25,7 @@ below we'll use the :ref:`class-based API <schema_models>` to define a
 :py:class:`SchemaModel` for validation.
 
 .. testcode:: scaling_modin
+    :skipif: SKIP_MODIN
 
     import modin.pandas as pd
     import pandas as pd
@@ -58,6 +59,7 @@ below we'll use the :ref:`class-based API <schema_models>` to define a
 
 
 .. testoutput:: scaling_modin
+    :skipif: SKIP_MODIN
 
       state           city  price
     0    FL        Orlando      8
@@ -73,6 +75,7 @@ modin dataframes at runtime:
 
 
 .. testcode:: scaling_modin
+    :skipif: SKIP_MODIN
 
     @pa.check_types
     def function(df: DataFrame[Schema]) -> DataFrame[Schema]:
@@ -82,6 +85,7 @@ modin dataframes at runtime:
 
 
 .. testoutput:: scaling_modin
+    :skipif: SKIP_MODIN
 
       state           city  price
     3    CA  San Francisco     16
@@ -93,6 +97,7 @@ And of course, you can use the object-based API to validate dask dataframes:
 
 
 .. testcode:: scaling_modin
+    :skipif: SKIP_MODIN
 
     schema = pa.DataFrameSchema({
         "state": pa.Column(str),
@@ -103,6 +108,7 @@ And of course, you can use the object-based API to validate dask dataframes:
 
 
 .. testoutput:: scaling_modin
+    :skipif: SKIP_MODIN
 
       state           city  price
     0    FL        Orlando      8
diff --git a/pandera/typing/koalas.py b/pandera/typing/koalas.py
index 052338f7f..940ce8f76 100644
--- a/pandera/typing/koalas.py
+++ b/pandera/typing/koalas.py
@@ -22,7 +22,7 @@
 
 if KOALAS_INSTALLED:
 
-    # pylint: disable=too-few-public-methods
+    # pylint: disable=too-few-public-methods,arguments-renamed
     class DataFrame(DataFrameBase, ks.DataFrame, Generic[T]):
         """
         Representation of dask.dataframe.DataFrame, only used for type
@@ -35,7 +35,7 @@ def __class_getitem__(cls, item):
             """Define this to override's koalas generic type."""
             return _GenericAlias(cls, item)
 
-    # pylint:disable=too-few-public-methods
+    # pylint:disable=too-few-public-methods,arguments-renamed
     class Series(SeriesBase, ks.Series, Generic[GenericDtype]):
         """Representation of pandas.Series, only used for type annotation.