Skip to content

Commit

Permalink
feat: show possible correct key(s) in .__getitem__ KeyError message (
Browse files Browse the repository at this point in the history
…#1097)

* feat: show possible correct key(s) in .__getitem__ KeyError message

* Keep one if statment
  • Loading branch information
arwas11 authored Oct 23, 2024
1 parent 6b3ceaa commit 32fab96
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 17 deletions.
16 changes: 15 additions & 1 deletion bigframes/core/groupby/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import bigframes_vendored.constants as constants
import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
import jellyfish
import pandas as pd

from bigframes.core import log_adapter
Expand Down Expand Up @@ -91,8 +92,21 @@ def __getitem__(

bad_keys = [key for key in keys if key not in self._block.column_labels]

# Raise a KeyError message with the possible correct key(s)
if len(bad_keys) > 0:
raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
possible_key = []
for bad_key in bad_keys:
possible_key.append(
min(
self._block.column_labels,
key=lambda item: jellyfish.damerau_levenshtein_distance(
bad_key, item
),
)
)
raise KeyError(
f"Columns not found: {str(bad_keys)[1:-1]}. Did you mean {str(possible_key)[1:-1]}?"
)

columns = [
col_id for col_id, label in self._col_id_labels.items() if label in keys
Expand Down
37 changes: 21 additions & 16 deletions tests/system/small/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,24 +426,12 @@ def test_dataframe_groupby_getitem_error(
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(KeyError, match="\"Columns not found: 'not_in_group'\""):
(
scalars_df_index[col_names]
.groupby("string_col")["not_in_group"]
.min()
.to_pandas()
)


def test_dataframe_groupby_getitem_multiple_columns_error(
scalars_df_index,
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(KeyError, match="\"Columns not found: 'col1', 'col2'\""):
with pytest.raises(
KeyError, match=r"Columns not found: 'not_in_group'. Did you mean 'string_col'?"
):
(
scalars_df_index[col_names]
.groupby("string_col")["col1", "col2"]
.groupby("bool_col")["not_in_group"]
.min()
.to_pandas()
)
Expand All @@ -464,6 +452,23 @@ def test_dataframe_groupby_getitem_list(
pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)


def test_dataframe_groupby_getitem_list_error(
scalars_df_index,
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(
KeyError,
match=r"Columns not found: 'col1', 'float'. Did you mean 'bool_col', 'float64_col'?",
):
(
scalars_df_index[col_names]
.groupby("string_col")["col1", "float"]
.min()
.to_pandas()
)


def test_dataframe_groupby_nonnumeric_with_mean():
df = pd.DataFrame(
{
Expand Down

0 comments on commit 32fab96

Please sign in to comment.