diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index 06d9c4bbab..6c65077528 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -16,6 +16,7 @@ import typing from typing import Tuple, Union +import warnings import bigframes_vendored.constants as constants import ibis @@ -27,6 +28,8 @@ import bigframes.core.indexes as indexes import bigframes.core.scalar import bigframes.dataframe +import bigframes.dtypes +import bigframes.exceptions import bigframes.operations as ops import bigframes.series @@ -370,6 +373,7 @@ def _perform_loc_list_join( # right join based on the old index so that the matching rows from the user's # original dataframe will be duplicated and reordered appropriately if isinstance(series_or_dataframe, bigframes.series.Series): + _struct_accessor_check_and_warn(series_or_dataframe, keys_index) original_name = series_or_dataframe.name name = series_or_dataframe.name if series_or_dataframe.name is not None else "0" result = typing.cast( @@ -391,6 +395,25 @@ def _perform_loc_list_join( return result +def _struct_accessor_check_and_warn( + series: bigframes.series.Series, index: indexes.Index +): + if not bigframes.dtypes.is_struct_like(series.dtype): + # No need to check series that do not have struct values + return + + if not bigframes.dtypes.is_string_like(index.dtype): + # No need to check indexing with non-string values. + return + + if not bigframes.dtypes.is_string_like(series.index.dtype): + warnings.warn( + "Are you trying to access struct fields? If so, please use Series.struct.field(...) method instead.", + category=bigframes.exceptions.BadIndexerKeyWarning, + stacklevel=7, # Stack depth from series.__getitem__ to here + ) + + @typing.overload def _iloc_getitem_series_or_dataframe( series_or_dataframe: bigframes.series.Series, key diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index 462bdf2bdd..27f3508ff4 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -73,3 +73,7 @@ class UnknownDataTypeWarning(Warning): class ApiDeprecationWarning(FutureWarning): """The API has been deprecated.""" + + +class BadIndexerKeyWarning(Warning): + """The indexer key is not used correctly.""" diff --git a/tests/system/small/core/__init__.py b/tests/system/small/core/__init__.py new file mode 100644 index 0000000000..6d5e14bcf4 --- /dev/null +++ b/tests/system/small/core/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system/small/core/test_indexers.py b/tests/system/small/core/test_indexers.py new file mode 100644 index 0000000000..2c670f790d --- /dev/null +++ b/tests/system/small/core/test_indexers.py @@ -0,0 +1,94 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +import pyarrow as pa +import pytest + +import bigframes.exceptions +import bigframes.pandas as bpd + + +@pytest.fixture(scope="module") +def string_indexed_struct_series(session): + return bpd.Series( + [ + {"project": "pandas", "version": 1}, + ], + dtype=bpd.ArrowDtype( + pa.struct([("project", pa.string()), ("version", pa.int64())]) + ), + index=["a"], + session=session, + ) + + +@pytest.fixture(scope="module") +def number_series(session): + return bpd.Series( + [0], + dtype=bpd.Int64Dtype, + session=session, + ) + + +@pytest.fixture(scope="module") +def string_indexed_number_series(session): + return bpd.Series( + [0], + dtype=bpd.Int64Dtype, + index=["a"], + session=session, + ) + + +def test_non_string_indexed_struct_series_with_string_key_should_warn(session): + s = bpd.Series( + [ + {"project": "pandas", "version": 1}, + ], + dtype=bpd.ArrowDtype( + pa.struct([("project", pa.string()), ("version", pa.int64())]) + ), + session=session, + ) + + with pytest.warns(bigframes.exceptions.BadIndexerKeyWarning): + s["a"] + + +@pytest.mark.parametrize( + "series", + [ + "string_indexed_struct_series", + "number_series", + "string_indexed_number_series", + ], +) +@pytest.mark.parametrize( + "key", + [ + 0, + "a", + ], +) +def test_struct_series_indexers_should_not_warn(request, series, key): + s = request.getfixturevalue(series) + + with warnings.catch_warnings(): + warnings.simplefilter( + "error", category=bigframes.exceptions.BadIndexerKeyWarning + ) + s[key]