diff --git a/pandera/strategies.py b/pandera/strategies.py index 2c71d17fe..6549c8f60 100644 --- a/pandera/strategies.py +++ b/pandera/strategies.py @@ -990,7 +990,7 @@ def _dataframe_strategy(draw): } row_strategy = None - if checks: + if row_strategy_checks: row_strategy = st.fixed_dictionaries( { col_name: make_row_strategy(col, row_strategy_checks) diff --git a/tests/core/test_schemas.py b/tests/core/test_schemas.py index 38b8e3bbc..cc9f5671e 100644 --- a/tests/core/test_schemas.py +++ b/tests/core/test_schemas.py @@ -1248,6 +1248,81 @@ def test_lazy_dataframe_validation_nullable(): ) +def test_lazy_dataframe_validation_with_checks(): + """Test that all failure cases are reported for schemas with checks.""" + schema = DataFrameSchema( + columns={ + "analysis_path": Column(String), + "run_id": Column(String), + "sample_type": Column(String, Check.isin(["DNA", "RNA"])), + "sample_valid": Column(String, Check.isin(["Yes", "No"])), + }, + strict=False, + coerce=True, + ) + + df = pd.DataFrame.from_dict( + { + "analysis_path": ["/", "/", "/", "/", "/"], + "run_id": ["1", "2", "3", "4", "5"], + "sample_type": ["DNA", "RNA", "DNA", "RNA", "RNA"], + "sample_valid": ["Yes", "YES", "YES", "NO", "NO"], + } + ) + + try: + schema(df, lazy=True) + except errors.SchemaErrors as err: + failure_case = err.failure_cases.failure_case.tolist() + assert failure_case == ["YES", "YES", "NO", "NO"] + + +def test_lazy_dataframe_validation_nullable_with_checks(): + """ + Test that checks in non-nullable column failure cases are correctly + processed during lazy validation. + """ + schema = DataFrameSchema( + { + "id": Column( + String, + checks=Check.str_matches(r"^ID[\d]{3}$"), + name="id", + required=True, + allow_duplicates=False, + ) + } + ) + df = pd.DataFrame({"id": ["ID001", None, "XXX"]}) + try: + schema(df, lazy=True) + except errors.SchemaErrors as err: + expected_failure_cases = pd.DataFrame.from_dict( + { + 0: { + "schema_context": "Column", + "column": "id", + "check": "not_nullable", + "check_number": None, + "failure_case": None, + "index": 1, + }, + 1: { + "schema_context": "Column", + "column": "id", + "check": r"str_matches(re.compile('^ID[\\d]{3}$'))", + "check_number": 0, + "failure_case": "XXX", + "index": 2, + }, + }, + orient="index", + ).astype({"check_number": object}) + pd.testing.assert_frame_equal( + err.failure_cases, expected_failure_cases + ) + + @pytest.mark.parametrize( "schema_cls, data", [ diff --git a/tests/strategies/test_strategies.py b/tests/strategies/test_strategies.py index af0b97876..6abf9dcba 100644 --- a/tests/strategies/test_strategies.py +++ b/tests/strategies/test_strategies.py @@ -794,35 +794,51 @@ def test_unsatisfiable_checks(): schema.example(size=10) -@pytest.fixture(scope="module") -def schema_model(): - """Schema model fixture.""" +class Schema(pa.SchemaModel): + """Schema model for strategy testing.""" - class Schema(pa.SchemaModel): - """Schema model for strategy testing.""" + col1: pa.typing.Series[int] + col2: pa.typing.Series[float] + col3: pa.typing.Series[str] - col1: pa.typing.Series[int] - col2: pa.typing.Series[float] - col3: pa.typing.Series[str] - return Schema +@hypothesis.given(st.data()) +@hypothesis.settings( + suppress_health_check=[hypothesis.HealthCheck.too_slow], +) +def test_schema_model_strategy(data): + """Test that strategy can be created from a SchemaModel.""" + strat = Schema.strategy(size=10) + sample_data = data.draw(strat) + Schema.validate(sample_data) @hypothesis.given(st.data()) @hypothesis.settings( suppress_health_check=[hypothesis.HealthCheck.too_slow], ) -def test_schema_model_strategy(schema_model, data): - """Test that strategy can be created from a SchemaModel.""" - strat = schema_model.strategy(size=10) +def test_schema_model_strategy_df_check(data): + """Test that schema with custom checks produce valid data.""" + + class SchemaWithDFCheck(Schema): + """Schema with a custom dataframe-level check with no strategy.""" + + # pylint:disable=no-self-use + @pa.dataframe_check + @classmethod + def non_empty(cls, df: pd.DataFrame) -> bool: + """Checks that dataframe is not empty.""" + return not df.empty + + strat = SchemaWithDFCheck.strategy(size=10) sample_data = data.draw(strat) - schema_model.validate(sample_data) + Schema.validate(sample_data) -def test_schema_model_example(schema_model): +def test_schema_model_example(): """Test that examples can be drawn from a SchemaModel.""" - sample_data = schema_model.example(size=10) - schema_model.validate(sample_data) + sample_data = Schema.example(size=10) + Schema.validate(sample_data) def test_schema_component_with_no_pdtype():