diff --git a/py-polars/polars/_utils/construction/dataframe.py b/py-polars/polars/_utils/construction/dataframe.py index 54e340c36248..d41fd0da3529 100644 --- a/py-polars/polars/_utils/construction/dataframe.py +++ b/py-polars/polars/_utils/construction/dataframe.py @@ -1040,6 +1040,14 @@ def pandas_to_pydf( include_index: bool = False, ) -> PyDataFrame: """Construct a PyDataFrame from a pandas DataFrame.""" + stringified_cols = {str(col) for col in data.columns} + if len(stringified_cols) < len(data.columns): + msg = ( + "Polars dataframes must have unique string column names." + "Please check your pandas dataframe for duplicates." + ) + raise ValueError(msg) + convert_index = include_index and not _pandas_has_default_index(data) if not convert_index and all( is_simple_numpy_backed_pandas_series(data[col]) for col in data.columns diff --git a/py-polars/tests/unit/interop/test_from_pandas.py b/py-polars/tests/unit/interop/test_from_pandas.py index bc9a42cbc90f..27b4f005e468 100644 --- a/py-polars/tests/unit/interop/test_from_pandas.py +++ b/py-polars/tests/unit/interop/test_from_pandas.py @@ -172,9 +172,15 @@ def test_from_pandas_include_indexes() -> None: assert df.to_dict(as_series=False) == data +def test_duplicate_cols_diff_types() -> None: + df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["0", 0, "1", 1]) + with pytest.raises(ValueError, match="Polars dataframes must have unique string"): + pl.from_pandas(df) + + def test_from_pandas_duplicated_columns() -> None: df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["a", "b", "c", "b"]) - with pytest.raises(ValueError, match="duplicate column names found: "): + with pytest.raises(ValueError, match="Polars dataframes must have unique string"): pl.from_pandas(df)