From 9d5e5e9e12e54ed9bdf97ba60f18013f3a364455 Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Wed, 28 Apr 2021 22:33:17 -0400 Subject: [PATCH 1/9] coerce=True and pandas_dtype=None should be a noop before these two options were incompatible and pandera would raise a SchemaInitError. This diff loosens this by allowing for this combination, in which case no coercion will happen if pandas_dtype=None --- .pre-commit-config.yaml | 2 +- environment.yml | 4 +++- pandera/schema_components.py | 8 ++------ pandera/schemas.py | 25 +++++++++---------------- requirements-dev.txt | 4 ++-- tests/core/test_schemas.py | 22 +++++++++------------- 6 files changed, 26 insertions(+), 39 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f041fbf9c..97e2f5504 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -41,7 +41,7 @@ repos: args: ["--line-length=79"] - repo: https://github.com/pycqa/pylint - rev: pylint-2.6.0 + rev: pylint-2.8.1 hooks: - id: pylint args: ["--disable=import-error"] diff --git a/environment.yml b/environment.yml index 1541e5b7d..9aa7f2c4e 100644 --- a/environment.yml +++ b/environment.yml @@ -38,7 +38,6 @@ dependencies: - sphinx-autodoc-typehints - sphinx-copybutton - recommonmark - - furo # packaging - twine @@ -48,3 +47,6 @@ dependencies: # optional - pre_commit + + - pip: + - furo diff --git a/pandera/schema_components.py b/pandera/schema_components.py index 39185b1b6..eead56190 100644 --- a/pandera/schema_components.py +++ b/pandera/schema_components.py @@ -50,7 +50,8 @@ def __init__( :param allow_duplicates: Whether or not column can contain duplicate values. :param coerce: If True, when schema.validate is called the column will - be coerced into the specified dtype. + be coerced into the specified dtype. This has no effect on columns + where ``pandas_dtype=None``. :param required: Whether or not column is allowed to be missing :param name: column name in dataframe to validate. :param regex: whether the ``name`` attribute should be treated as a @@ -90,11 +91,6 @@ def __init__( self._name = name self._regex = regex - if coerce and self._pandas_dtype is None: - raise errors.SchemaInitError( - "Must specify dtype if coercing a Column's type" - ) - @property def regex(self) -> bool: """True if ``name`` attribute should be treated as a regex pattern.""" diff --git a/pandera/schemas.py b/pandera/schemas.py index 041600d15..64a1eef64 100644 --- a/pandera/schemas.py +++ b/pandera/schemas.py @@ -91,7 +91,8 @@ def __init__( `validate` will verify properties of the columns and return the transformed dataframe object. :param coerce: whether or not to coerce all of the columns on - validation. + validation. This has no effect on columns where + ``pandas_dtype=None`` :param strict: ensure that all and only the columns defined in the schema are present in the dataframe. If set to 'filter', only the columns in the schema will be passed to the validated @@ -140,19 +141,6 @@ def __init__( self.columns = {} if columns is None else columns - if coerce: - missing_pandas_type = [ - name - for name, col in self.columns.items() - if col.pandas_dtype is None - ] - if missing_pandas_type: - raise errors.SchemaInitError( - "Must specify dtype in all Columns if coercing " - "DataFrameSchema ; columns with missing pandas_type:" - + ", ".join(missing_pandas_type) - ) - if transformer is not None: warnings.warn( "The `transformers` argument has been deprecated and will no " @@ -318,7 +306,10 @@ def _coerce_dtype(self, obj: pd.DataFrame) -> pd.DataFrame: try: return obj.astype(self.pdtype.str_alias) except (ValueError, TypeError) as exc: - msg = f"Error while coercing '{self.name}' to type {self.dtype}: {exc}" + msg = ( + f"Error while coercing '{self.name}' to type {self.dtype}: " + f"{exc}" + ) raise errors.SchemaError( self, obj, @@ -1639,7 +1630,9 @@ def coerce_dtype(self, obj: Union[pd.Series, pd.Index]) -> pd.Series: (including time series). :returns: ``Series`` with coerced data type """ - if ( + if self._pandas_dtype is None: + return obj + elif ( self._pandas_dtype is PandasDtype.String or self._pandas_dtype is str or self._pandas_dtype == "str" diff --git a/requirements-dev.txt b/requirements-dev.txt index b1ad45fdc..dca83a883 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -27,7 +27,7 @@ sphinx_rtd_theme sphinx-autodoc-typehints sphinx-copybutton recommonmark -furo twine asv -pre_commit \ No newline at end of file +pre_commit +furo \ No newline at end of file diff --git a/tests/core/test_schemas.py b/tests/core/test_schemas.py index 73a4ecb71..cef8054ef 100644 --- a/tests/core/test_schemas.py +++ b/tests/core/test_schemas.py @@ -657,11 +657,12 @@ def test_no_dtype_series(): def test_coerce_without_dtype(): """Test that an error is thrown when a dtype isn't specified and coerce is True.""" - with pytest.raises(errors.SchemaInitError): - DataFrameSchema({"col": Column(coerce=True)}) - - with pytest.raises(errors.SchemaInitError): - DataFrameSchema({"col": Column()}, coerce=True) + df = pd.DataFrame({"col": [1, 2, 3]}) + for schema in [ + DataFrameSchema({"col": Column(coerce=True)}), + DataFrameSchema({"col": Column()}, coerce=True), + ]: + assert isinstance(schema(df), pd.DataFrame) def test_required(): @@ -1027,15 +1028,10 @@ def test_rename_columns(): # Check if new column names are indeed present in the new schema assert all( - [ - col_name in rename_dict.values() - for col_name in schema_renamed.columns - ] + col_name in rename_dict.values() for col_name in schema_renamed.columns ) # Check if original schema didn't change in the process - assert all( - [col_name in schema_original.columns for col_name in rename_dict] - ) + assert all(col_name in schema_original.columns for col_name in rename_dict) with pytest.raises(errors.SchemaInitError): schema_original.rename_columns({"foo": "bar"}) @@ -1525,7 +1521,7 @@ def test_invalid_keys(schema_simple): def test_update_columns(schema_simple): - """ Catch-all test for update columns functionality """ + """Catch-all test for update columns functionality""" # Basic function test_schema = schema_simple.update_columns({"col2": {"pandas_dtype": Int}}) From 7a3809604323da0c642900eb8163e7867c927677 Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Thu, 29 Apr 2021 21:31:55 -0400 Subject: [PATCH 2/9] ignore python=3.7 pandas=0.25.3 in noxfile, minor changes --- noxfile.py | 15 +++++++++++++-- pandera/io.py | 10 ++++++---- tests/strategies/test_strategies.py | 9 +++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index 580447a70..278eea8b8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -32,6 +32,7 @@ SOURCE_PATHS = PACKAGE, "tests", "noxfile.py" REQUIREMENT_PATH = "requirements-dev.txt" +ALWAYS_USE_PIP = ["furo"] CI_RUN = os.environ.get("CI") == "true" if CI_RUN: @@ -153,7 +154,9 @@ def install(session: Session, *args: str): def install_from_requirements(session: Session, *packages: str) -> None: - """Install dependencies, respecting the version specified in requirements.""" + """ + Install dependencies, respecting the version specified in requirements. + """ for package in packages: try: specs = REQUIRES["all"][package] @@ -172,6 +175,7 @@ def install_extras( specs = [ spec if spec != "pandas" else f"pandas{pandas_version}" for spec in REQUIRES[extra].values() + if spec not in ALWAYS_USE_PIP ] if isinstance(session.virtualenv, nox.virtualenv.CondaEnv): print("using conda installer") @@ -179,6 +183,8 @@ def install_extras( else: print("using pip installer") session.install(*specs) + # always use pip for these packages + session.install(*ALWAYS_USE_PIP) session.install("-e", ".", "--no-deps") # install pandera @@ -272,7 +278,11 @@ def mypy(session: Session) -> None: def _invalid_python_pandas_versions(session: Session, pandas: str) -> bool: python_version = version.parse(cast(str, session.python)) - if pandas == "0.25.3" and python_version >= version.parse("3.9"): + if pandas == "0.25.3" and ( + python_version >= version.parse("3.9") + # this is just a bandaid until support for 0.25.3 is dropped + or python_version == version.parse("3.7") + ): print("Python 3.9 does not support pandas 0.25.3") return True return False @@ -328,6 +338,7 @@ def docs(session: Session, pandas: str) -> None: install_extras(session, pandas, extra="all") session.chdir("docs") + shutil.rmtree(os.path.join("_build"), ignore_errors=True) args = session.posargs or ["-W", "-E", "-b=doctest", "source", "_build"] session.run("sphinx-build", *args) diff --git a/pandera/io.py b/pandera/io.py index bff044cdf..05441a04d 100644 --- a/pandera/io.py +++ b/pandera/io.py @@ -50,13 +50,14 @@ def handle_stat_dtype(stat): def _serialize_dataframe_stats(dataframe_checks): """ - Serialize global dataframe check statistics into json/yaml-compatible format. + Serialize global dataframe check statistics into json/yaml-compatible + format. """ serialized_checks = {} for check_name, check_stats in dataframe_checks.items(): - # The case that `check_name` is not registered is handled in `parse_checks`, - # so we know that `check_name` exists. + # The case that `check_name` is not registered is handled in + # `parse_checks` so we know that `check_name` exists. # infer dtype of statistics and serialize them serialized_checks[check_name] = _serialize_check_stats(check_stats) @@ -305,7 +306,8 @@ def _format_checks(checks_dict): for check_name, check_kwargs in checks_dict.items(): if check_kwargs is None: warnings.warn( - f"Check {check_name} cannot be serialized. This check will be ignored" + f"Check {check_name} cannot be serialized. " + "This check will be ignored" ) else: args = ", ".join( diff --git a/tests/strategies/test_strategies.py b/tests/strategies/test_strategies.py index 13f33f797..715c595eb 100644 --- a/tests/strategies/test_strategies.py +++ b/tests/strategies/test_strategies.py @@ -86,6 +86,9 @@ def test_pandas_dtype_strategy(pdtype, data): @pytest.mark.parametrize("pdtype", NUMERIC_DTYPES) @hypothesis.given(st.data()) +@hypothesis.settings( + suppress_health_check=[hypothesis.HealthCheck.too_slow], +) def test_check_strategy_continuous(pdtype, data): """Test built-in check strategies can generate continuous data.""" value = data.draw( @@ -416,6 +419,9 @@ def test_series_example(): @hypothesis.given(st.data()) +@hypothesis.settings( + suppress_health_check=[hypothesis.HealthCheck.too_slow], +) def test_column_strategy(data): """Test Column schema strategy.""" column_schema = pa.Column(pa.Int, pa.Check.gt(0), name="column") @@ -803,6 +809,9 @@ class Schema(pa.SchemaModel): @hypothesis.given(st.data()) +@hypothesis.settings( + suppress_health_check=[hypothesis.HealthCheck.too_slow], +) def test_schema_model_strategy(schema_model, data): """Test that strategy can be created from a SchemaModel.""" strat = schema_model.strategy(size=10) From 81bf0e56ffbc49fadd1c8d7ade930a241bfa7e0e Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Thu, 29 Apr 2021 21:39:04 -0400 Subject: [PATCH 3/9] update pylint --- environment.yml | 2 +- requirements-dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 9aa7f2c4e..4bb77fd25 100644 --- a/environment.yml +++ b/environment.yml @@ -24,7 +24,7 @@ dependencies: - isort >= 5.7.0 - codecov - mypy - - pylint = 2.6.0 + - pylint = 2.8.1 - pytest - pytest-cov - pytest-xdist diff --git a/requirements-dev.txt b/requirements-dev.txt index dca83a883..7eb6c154c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,7 +15,7 @@ black >= 20.8b1 isort >= 5.7.0 codecov mypy -pylint == 2.6.0 +pylint == 2.8.1 pytest pytest-cov pytest-xdist From ba44c65725e22ad736317a4cd573dfd8ab2fbf7c Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Thu, 29 Apr 2021 21:44:51 -0400 Subject: [PATCH 4/9] update pylint --- .pre-commit-config.yaml | 2 +- environment.yml | 2 +- requirements-dev.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 97e2f5504..24dfe769e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -41,7 +41,7 @@ repos: args: ["--line-length=79"] - repo: https://github.com/pycqa/pylint - rev: pylint-2.8.1 + rev: pylint-2.7.2 hooks: - id: pylint args: ["--disable=import-error"] diff --git a/environment.yml b/environment.yml index 4bb77fd25..7a934fbfb 100644 --- a/environment.yml +++ b/environment.yml @@ -24,7 +24,7 @@ dependencies: - isort >= 5.7.0 - codecov - mypy - - pylint = 2.8.1 + - pylint = 2.7.2 - pytest - pytest-cov - pytest-xdist diff --git a/requirements-dev.txt b/requirements-dev.txt index 7eb6c154c..a0c89805b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,7 +15,7 @@ black >= 20.8b1 isort >= 5.7.0 codecov mypy -pylint == 2.8.1 +pylint == 2.7.2 pytest pytest-cov pytest-xdist From 2b40d17da21c3912286d2d0e969bea5133c4d78f Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Thu, 29 Apr 2021 21:58:18 -0400 Subject: [PATCH 5/9] update pylint --- .pre-commit-config.yaml | 2 +- environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 24dfe769e..f041fbf9c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -41,7 +41,7 @@ repos: args: ["--line-length=79"] - repo: https://github.com/pycqa/pylint - rev: pylint-2.7.2 + rev: pylint-2.6.0 hooks: - id: pylint args: ["--disable=import-error"] diff --git a/environment.yml b/environment.yml index 7a934fbfb..9aa7f2c4e 100644 --- a/environment.yml +++ b/environment.yml @@ -24,7 +24,7 @@ dependencies: - isort >= 5.7.0 - codecov - mypy - - pylint = 2.7.2 + - pylint = 2.6.0 - pytest - pytest-cov - pytest-xdist From cda454f37051c110a297dbe4337b06b5837d810b Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Thu, 29 Apr 2021 22:20:04 -0400 Subject: [PATCH 6/9] include typed-ast --- environment.yml | 1 + requirements-dev.txt | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 9aa7f2c4e..12a281cea 100644 --- a/environment.yml +++ b/environment.yml @@ -18,6 +18,7 @@ dependencies: - typing_extensions >= 3.7.4.3 # testing and dependencies + - typed-ast - black >= 20.8b1 # testing diff --git a/requirements-dev.txt b/requirements-dev.txt index a0c89805b..7ea704f66 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,11 +11,12 @@ wrapt pyyaml >=5.1 typing_inspect >= 0.6.0 typing_extensions >= 3.7.4.3 +typed-ast black >= 20.8b1 isort >= 5.7.0 codecov mypy -pylint == 2.7.2 +pylint == 2.6.0 pytest pytest-cov pytest-xdist From 556e6007048ea06b607c0b95037b628035afc26a Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Thu, 29 Apr 2021 22:39:55 -0400 Subject: [PATCH 7/9] update cache --- .github/workflows/ci-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 9b363e829..4e60e0a1f 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -17,7 +17,7 @@ env: DEFAULT_PYTHON: 3.8 CI: "true" # Increase this value to reset cache if environment.yml has not changed - CACHE_VERSION: 1 + CACHE_VERSION: 2 jobs: codestyle: From 0d09089fd9770cf732f32323d03f76fb97bf9c64 Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Fri, 30 Apr 2021 00:13:37 -0400 Subject: [PATCH 8/9] hack for mypy, typed-ast, python3.7 --- environment.yml | 1 - noxfile.py | 23 ++++++++++++++++++----- requirements-dev.txt | 1 - 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/environment.yml b/environment.yml index 12a281cea..9aa7f2c4e 100644 --- a/environment.yml +++ b/environment.yml @@ -18,7 +18,6 @@ dependencies: - typing_extensions >= 3.7.4.3 # testing and dependencies - - typed-ast - black >= 20.8b1 # testing diff --git a/noxfile.py b/noxfile.py index 278eea8b8..82823ff4a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -32,7 +32,7 @@ SOURCE_PATHS = PACKAGE, "tests", "noxfile.py" REQUIREMENT_PATH = "requirements-dev.txt" -ALWAYS_USE_PIP = ["furo"] +ALWAYS_USE_PIP = ["furo", "mypy"] CI_RUN = os.environ.get("CI") == "true" if CI_RUN: @@ -168,7 +168,10 @@ def install_from_requirements(session: Session, *packages: str) -> None: def install_extras( - session: Session, pandas: str = "latest", extra: str = "core" + session: Session, + pandas: str = "latest", + extra: str = "core", + force_pip=False, ) -> None: """Install dependencies.""" pandas_version = "" if pandas == "latest" else f"=={pandas}" @@ -177,14 +180,16 @@ def install_extras( for spec in REQUIRES[extra].values() if spec not in ALWAYS_USE_PIP ] - if isinstance(session.virtualenv, nox.virtualenv.CondaEnv): + if ( + isinstance(session.virtualenv, nox.virtualenv.CondaEnv) + and not force_pip + ): print("using conda installer") conda_install(session, *specs) else: print("using pip installer") session.install(*specs) # always use pip for these packages - session.install(*ALWAYS_USE_PIP) session.install("-e", ".", "--no-deps") # install pandera @@ -271,7 +276,15 @@ def lint(session: Session) -> None: @nox.session(python=PYTHON_VERSIONS) def mypy(session: Session) -> None: """Type-check using mypy.""" - install_extras(session, extra="all") + python_version = version.parse(cast(str, session.python)) + install_extras( + session, + extra="all", + # this is a hack until typed-ast conda package starts working again, + # basically this issue comes up: + # https://github.com/python/mypy/pull/2906 + force_pip=python_version == version.parse("3.7"), + ) args = session.posargs or SOURCE_PATHS session.run("mypy", "--follow-imports=silent", *args, silent=True) diff --git a/requirements-dev.txt b/requirements-dev.txt index 7ea704f66..dca83a883 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,7 +11,6 @@ wrapt pyyaml >=5.1 typing_inspect >= 0.6.0 typing_extensions >= 3.7.4.3 -typed-ast black >= 20.8b1 isort >= 5.7.0 codecov From f337495c442e8e15740d5159c952b84602416d0c Mon Sep 17 00:00:00 2001 From: cosmicBboy Date: Fri, 30 Apr 2021 00:22:57 -0400 Subject: [PATCH 9/9] hack typed ast --- noxfile.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 82823ff4a..02eac2c76 100644 --- a/noxfile.py +++ b/noxfile.py @@ -315,7 +315,16 @@ def tests(session: Session, pandas: str, extra: str) -> None: """Run the test suite.""" if _invalid_python_pandas_versions(session, pandas): return - install_extras(session, pandas, extra) + python_version = version.parse(cast(str, session.python)) + install_extras( + session, + pandas, + extra, + # this is a hack until typed-ast conda package starts working again, + # basically this issue comes up: + # https://github.com/python/mypy/pull/2906 + force_pip=python_version == version.parse("3.7"), + ) if session.posargs: args = session.posargs @@ -348,7 +357,16 @@ def docs(session: Session, pandas: str) -> None: """Build the documentation.""" if _invalid_python_pandas_versions(session, pandas): return - install_extras(session, pandas, extra="all") + python_version = version.parse(cast(str, session.python)) + install_extras( + session, + pandas, + extra="all", + # this is a hack until typed-ast conda package starts working again, + # basically this issue comes up: + # https://github.com/python/mypy/pull/2906 + force_pip=python_version == version.parse("3.7"), + ) session.chdir("docs") shutil.rmtree(os.path.join("_build"), ignore_errors=True)