Skip to content

Commit

Permalink
fix(python): Verify the integrity of pandas column names before impli…
Browse files Browse the repository at this point in the history
…ed string conversion (#17433)
  • Loading branch information
tylerriccio33 authored Jul 6, 2024
1 parent 200c6a4 commit c256a02
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
8 changes: 8 additions & 0 deletions py-polars/polars/_utils/construction/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,14 @@ def pandas_to_pydf(
include_index: bool = False,
) -> PyDataFrame:
"""Construct a PyDataFrame from a pandas DataFrame."""
stringified_cols = {str(col) for col in data.columns}
if len(stringified_cols) < len(data.columns):
msg = (
"Polars dataframes must have unique string column names."
"Please check your pandas dataframe for duplicates."
)
raise ValueError(msg)

convert_index = include_index and not _pandas_has_default_index(data)
if not convert_index and all(
is_simple_numpy_backed_pandas_series(data[col]) for col in data.columns
Expand Down
8 changes: 7 additions & 1 deletion py-polars/tests/unit/interop/test_from_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,15 @@ def test_from_pandas_include_indexes() -> None:
assert df.to_dict(as_series=False) == data


def test_duplicate_cols_diff_types() -> None:
df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["0", 0, "1", 1])
with pytest.raises(ValueError, match="Polars dataframes must have unique string"):
pl.from_pandas(df)


def test_from_pandas_duplicated_columns() -> None:
df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["a", "b", "c", "b"])
with pytest.raises(ValueError, match="duplicate column names found: "):
with pytest.raises(ValueError, match="Polars dataframes must have unique string"):
pl.from_pandas(df)


Expand Down

0 comments on commit c256a02

Please sign in to comment.