diff --git a/pandera/io.py b/pandera/io.py index aabdd1d8d..ef3f3451d 100644 --- a/pandera/io.py +++ b/pandera/io.py @@ -64,8 +64,13 @@ def _serialize_component_stats(component_stats): serialized_checks[check_name] = _serialize_check_stats( check_stats, component_stats["pandas_dtype"] ) + + pandas_dtype = component_stats.get("pandas_dtype") + if pandas_dtype: + pandas_dtype = pandas_dtype.value + return { - "pandas_dtype": component_stats["pandas_dtype"].value, + "pandas_dtype": pandas_dtype, "nullable": component_stats["nullable"], "checks": serialized_checks, **{ @@ -134,9 +139,10 @@ def handle_stat_dtype(stat): def _deserialize_component_stats(serialized_component_stats): from pandera import Check # pylint: disable=import-outside-toplevel - pandas_dtype = PandasDtype.from_str_alias( - serialized_component_stats["pandas_dtype"] - ) + pandas_dtype = serialized_component_stats.get("pandas_dtype") + if pandas_dtype: + pandas_dtype = PandasDtype.from_str_alias(pandas_dtype) + checks = None if serialized_component_stats.get("checks") is not None: checks = [ @@ -333,8 +339,13 @@ def to_script(dataframe_schema, path_or_buf=None): columns = {} for colname, properties in statistics["columns"].items(): + pandas_dtype = properties.get("pandas_dtype") column_code = COLUMN_TEMPLATE.format( - pandas_dtype=f"PandasDtype.{properties['pandas_dtype'].name}", + pandas_dtype=( + None + if pandas_dtype is None + else f"PandasDtype.{properties['pandas_dtype'].name}" + ), checks=_format_checks(properties["checks"]), nullable=properties["nullable"], allow_duplicates=properties["allow_duplicates"], diff --git a/tests/io/test_io.py b/tests/io/test_io.py index 68a0e5561..848c9108b 100644 --- a/tests/io/test_io.py +++ b/tests/io/test_io.py @@ -99,6 +99,7 @@ def _create_schema(index="single"): regex=True, checks=[pa.Check.str_length(1, 3)], ), + "empty_column": pa.Column(), }, index=index, coerce=False, @@ -183,6 +184,14 @@ def _create_schema(index="single"): coerce: true required: false regex: true + empty_column: + pandas_dtype: null + nullable: false + checks: null + allow_duplicates: true + coerce: false + required: true + regex: false index: - pandas_dtype: int nullable: false