Skip to content

Commit

Permalink
bugfixes: lazy validation, strategies (#550)
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicBboy authored Jul 12, 2021
1 parent 8d57dd5 commit 8b222b6
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 17 deletions.
2 changes: 1 addition & 1 deletion pandera/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,7 @@ def _dataframe_strategy(draw):
}

row_strategy = None
if checks:
if row_strategy_checks:
row_strategy = st.fixed_dictionaries(
{
col_name: make_row_strategy(col, row_strategy_checks)
Expand Down
75 changes: 75 additions & 0 deletions tests/core/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,81 @@ def test_lazy_dataframe_validation_nullable():
)


def test_lazy_dataframe_validation_with_checks():
"""Test that all failure cases are reported for schemas with checks."""
schema = DataFrameSchema(
columns={
"analysis_path": Column(String),
"run_id": Column(String),
"sample_type": Column(String, Check.isin(["DNA", "RNA"])),
"sample_valid": Column(String, Check.isin(["Yes", "No"])),
},
strict=False,
coerce=True,
)

df = pd.DataFrame.from_dict(
{
"analysis_path": ["/", "/", "/", "/", "/"],
"run_id": ["1", "2", "3", "4", "5"],
"sample_type": ["DNA", "RNA", "DNA", "RNA", "RNA"],
"sample_valid": ["Yes", "YES", "YES", "NO", "NO"],
}
)

try:
schema(df, lazy=True)
except errors.SchemaErrors as err:
failure_case = err.failure_cases.failure_case.tolist()
assert failure_case == ["YES", "YES", "NO", "NO"]


def test_lazy_dataframe_validation_nullable_with_checks():
"""
Test that checks in non-nullable column failure cases are correctly
processed during lazy validation.
"""
schema = DataFrameSchema(
{
"id": Column(
String,
checks=Check.str_matches(r"^ID[\d]{3}$"),
name="id",
required=True,
allow_duplicates=False,
)
}
)
df = pd.DataFrame({"id": ["ID001", None, "XXX"]})
try:
schema(df, lazy=True)
except errors.SchemaErrors as err:
expected_failure_cases = pd.DataFrame.from_dict(
{
0: {
"schema_context": "Column",
"column": "id",
"check": "not_nullable",
"check_number": None,
"failure_case": None,
"index": 1,
},
1: {
"schema_context": "Column",
"column": "id",
"check": r"str_matches(re.compile('^ID[\\d]{3}$'))",
"check_number": 0,
"failure_case": "XXX",
"index": 2,
},
},
orient="index",
).astype({"check_number": object})
pd.testing.assert_frame_equal(
err.failure_cases, expected_failure_cases
)


@pytest.mark.parametrize(
"schema_cls, data",
[
Expand Down
48 changes: 32 additions & 16 deletions tests/strategies/test_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,35 +794,51 @@ def test_unsatisfiable_checks():
schema.example(size=10)


@pytest.fixture(scope="module")
def schema_model():
"""Schema model fixture."""
class Schema(pa.SchemaModel):
"""Schema model for strategy testing."""

class Schema(pa.SchemaModel):
"""Schema model for strategy testing."""
col1: pa.typing.Series[int]
col2: pa.typing.Series[float]
col3: pa.typing.Series[str]

col1: pa.typing.Series[int]
col2: pa.typing.Series[float]
col3: pa.typing.Series[str]

return Schema
@hypothesis.given(st.data())
@hypothesis.settings(
suppress_health_check=[hypothesis.HealthCheck.too_slow],
)
def test_schema_model_strategy(data):
"""Test that strategy can be created from a SchemaModel."""
strat = Schema.strategy(size=10)
sample_data = data.draw(strat)
Schema.validate(sample_data)


@hypothesis.given(st.data())
@hypothesis.settings(
suppress_health_check=[hypothesis.HealthCheck.too_slow],
)
def test_schema_model_strategy(schema_model, data):
"""Test that strategy can be created from a SchemaModel."""
strat = schema_model.strategy(size=10)
def test_schema_model_strategy_df_check(data):
"""Test that schema with custom checks produce valid data."""

class SchemaWithDFCheck(Schema):
"""Schema with a custom dataframe-level check with no strategy."""

# pylint:disable=no-self-use
@pa.dataframe_check
@classmethod
def non_empty(cls, df: pd.DataFrame) -> bool:
"""Checks that dataframe is not empty."""
return not df.empty

strat = SchemaWithDFCheck.strategy(size=10)
sample_data = data.draw(strat)
schema_model.validate(sample_data)
Schema.validate(sample_data)


def test_schema_model_example(schema_model):
def test_schema_model_example():
"""Test that examples can be drawn from a SchemaModel."""
sample_data = schema_model.example(size=10)
schema_model.validate(sample_data)
sample_data = Schema.example(size=10)
Schema.validate(sample_data)


def test_schema_component_with_no_pdtype():
Expand Down

0 comments on commit 8b222b6

Please sign in to comment.