diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index aa9649f272..9e17dc2752 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -287,7 +287,6 @@ def reset_index(self, drop: bool = True) -> Block: A new Block because dropping index columns can break references from Index classes that point to this block. """ - block = self new_index_col_id = guid.generate_guid() expr = self._expr.promote_offsets(new_index_col_id) if drop: @@ -295,7 +294,7 @@ def reset_index(self, drop: bool = True) -> Block: # ordering expression as reset_index shouldn't change the row # order. expr = expr.drop_columns(self.index_columns) - block = Block( + return Block( expr, index_columns=[new_index_col_id], column_labels=self.column_labels, @@ -321,13 +320,12 @@ def reset_index(self, drop: bool = True) -> Block: # See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html column_labels_modified = column_labels_modified.insert(level, label) - block = Block( + return Block( expr, index_columns=[new_index_col_id], column_labels=column_labels_modified, index_labels=[None], ) - return block def set_index( self, diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index 6998d0e974..0a47c3a78e 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -294,7 +294,7 @@ def _loc_getitem_series_or_dataframe( keys_df = keys_df.set_index(temp_name, drop=True) return _perform_loc_list_join(series_or_dataframe, keys_df) elif isinstance(key, bigframes.core.indexes.Index): - block = key._data._get_block() + block = key._block block = block.select_columns(()) keys_df = bigframes.dataframe.DataFrame(block) return _perform_loc_list_join(series_or_dataframe, keys_df) diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/index.py index 8b3613d82c..78a4fc6f0b 100644 --- a/bigframes/core/indexes/index.py +++ b/bigframes/core/indexes/index.py @@ -17,8 +17,9 @@ from __future__ import annotations import typing -from typing import Mapping, Sequence, Tuple, Union +from typing import Hashable, Mapping, Optional, Sequence, Tuple, Union +import google.cloud.bigquery as bigquery import numpy as np import pandas @@ -33,16 +34,60 @@ import bigframes.core.utils as utils import bigframes.dtypes import bigframes.dtypes as bf_dtypes +import bigframes.formatting_helpers as formatter import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops import third_party.bigframes_vendored.pandas.core.indexes.base as vendored_pandas_index +if typing.TYPE_CHECKING: + import bigframes.dataframe + import bigframes.series + class Index(vendored_pandas_index.Index): __doc__ = vendored_pandas_index.Index.__doc__ - def __init__(self, data: blocks.BlockHolder): - self._data = data + def __init__( + self, + data=None, + dtype=None, + *, + name=None, + ): + import bigframes.dataframe as df + import bigframes.series as series + + if isinstance(data, blocks.Block): + block = data.select_columns([]) + elif isinstance(data, df.DataFrame): + raise ValueError("Cannot construct index from dataframe.") + elif isinstance(data, series.Series) or isinstance(data, Index): + if isinstance(data, series.Series): + block = data._block + block = block.set_index( + col_ids=[data._value_column], + ) + elif isinstance(data, Index): + block = data._block + index = Index(data=block) + name = data.name if name is None else name + if name is not None: + index.name = name + if dtype is not None: + index = index.astype(dtype) + block = index._block + else: + pd_index = pandas.Index(data=data, dtype=dtype, name=name) + pd_df = pandas.DataFrame(index=pd_index) + block = df.DataFrame(pd_df)._block + self._query_job = None + self._block: blocks.Block = block + + @classmethod + def from_frame( + cls, frame: Union[bigframes.series.Series, bigframes.dataframe.DataFrame] + ) -> Index: + return FrameIndex(frame) @property def name(self) -> blocks.Label: @@ -55,15 +100,16 @@ def name(self, value: blocks.Label): @property def names(self) -> typing.Sequence[blocks.Label]: """Returns the names of the Index.""" - return self._data._get_block()._index_labels + return self._block._index_labels @names.setter def names(self, values: typing.Sequence[blocks.Label]): - return self._data._set_block(self._block.with_index_labels(values)) + new_block = self._block.with_index_labels(values) + self._block = new_block @property def nlevels(self) -> int: - return len(self._data._get_block().index_columns) + return len(self._block.index_columns) @property def values(self) -> np.ndarray: @@ -75,7 +121,7 @@ def ndim(self) -> int: @property def shape(self) -> typing.Tuple[int]: - return (self._data._get_block().shape[0],) + return (self._block.shape[0],) @property def dtype(self): @@ -107,9 +153,7 @@ def is_monotonic_increasing(self) -> bool: """ return typing.cast( bool, - self._data._get_block().is_monotonic_increasing( - self._data._get_block().index_columns - ), + self._block.is_monotonic_increasing(self._block.index_columns), ) @property @@ -122,9 +166,7 @@ def is_monotonic_decreasing(self) -> bool: """ return typing.cast( bool, - self._data._get_block().is_monotonic_decreasing( - self._data._get_block().index_columns - ), + self._block.is_monotonic_decreasing(self._block.index_columns), ) @property @@ -149,14 +191,65 @@ def has_duplicates(self) -> bool: duplicates_df = df.DataFrame(duplicates_block) return duplicates_df["is_duplicate"].any() - @property - def _block(self) -> blocks.Block: - return self._data._get_block() - @property def T(self) -> Index: return self.transpose() + @property + def query_job(self) -> Optional[bigquery.QueryJob]: + """BigQuery job metadata for the most recent query. + + Returns: + The most recent `QueryJob + `_. + """ + if self._query_job is None: + self._query_job = self._block._compute_dry_run() + return self._query_job + + def __repr__(self) -> str: + # TODO(swast): Add a timeout here? If the query is taking a long time, + # maybe we just print the job metadata that we have so far? + # TODO(swast): Avoid downloading the whole series by using job + # metadata, like we do with DataFrame. + opts = bigframes.options.display + max_results = opts.max_rows + if opts.repr_mode == "deferred": + return formatter.repr_query_job(self.query_job) + + pandas_df, _, query_job = self._block.retrieve_repr_request_results(max_results) + self._query_job = query_job + return repr(pandas_df.index) + + def copy(self, name: Optional[Hashable] = None): + copy_index = Index(self._block) + if name is not None: + copy_index.name = name + return copy_index + + def to_series( + self, index: Optional[Index] = None, name: Optional[Hashable] = None + ) -> bigframes.series.Series: + if self.nlevels != 1: + NotImplementedError( + f"Converting multi-index to series is not yet supported. {constants.FEEDBACK_LINK}" + ) + + import bigframes.series + + name = self.name if name is None else name + if index is None: + return bigframes.series.Series(data=self, index=self, name=name) + else: + return bigframes.series.Series(data=self, index=Index(index), name=name) + + def get_level_values(self, level) -> Index: + level_n = level if isinstance(level, int) else self.names.index(level) + block = self._block.drop_levels( + [self._block.index_columns[i] for i in range(self.nlevels) if i != level_n] + ) + return Index(block) + def _memory_usage(self) -> int: (n_rows,) = self.shape return sum( @@ -180,7 +273,7 @@ def sort_values(self, *, ascending: bool = True, na_position: str = "last"): order.OrderingColumnReference(column, direction=direction, na_last=na_last) for column in index_columns ] - return Index._from_block(self._block.order_by(ordering)) + return Index(self._block.order_by(ordering)) def astype( self, @@ -269,7 +362,7 @@ def rename(self, name: Union[str, Sequence[str]]) -> Index: names = [name] if isinstance(name, str) else list(name) if len(names) != self.nlevels: raise ValueError("'name' must be same length as levels") - return Index._from_block(self._block.with_index_labels(names)) + return Index(self._block.with_index_labels(names)) def drop( self, @@ -291,17 +384,17 @@ def drop( ) block = block.filter(condition_id, keep_null=True) block = block.drop_columns([condition_id]) - return Index._from_block(block) + return Index(block) def dropna(self, how: str = "any") -> Index: if how not in ("any", "all"): raise ValueError("'how' must be one of 'any', 'all'") result = block_ops.dropna(self._block, self._block.index_columns, how=how) # type: ignore - return Index._from_block(result) + return Index(result) def drop_duplicates(self, *, keep: str = "first") -> Index: block = block_ops.drop_duplicates(self._block, self._block.index_columns, keep) - return Index._from_block(block) + return Index(block) def isin(self, values) -> Index: if not utils.is_list_like(values): @@ -330,7 +423,7 @@ def _apply_unary_expr( result_ids.append(result_id) block = block.set_index(result_ids, index_labels=self._block.index_labels) - return Index._from_block(block) + return Index(block) def _apply_aggregation(self, op: agg_ops.AggregateOp) -> typing.Any: if self.nlevels > 1: @@ -344,7 +437,7 @@ def __getitem__(self, key: int) -> typing.Any: result_pd_df, _ = self._block.slice(key, key + 1, 1).to_pandas() else: # special case, want [-1:] instead of [-1:0] result_pd_df, _ = self._block.slice(key).to_pandas() - if result_pd_df.empty: + if result_pd_df.index.empty: raise IndexError("single positional indexer is out-of-bounds") return result_pd_df.index[0] else: @@ -367,11 +460,36 @@ def to_numpy(self, dtype=None, **kwargs) -> np.ndarray: def __len__(self): return self.shape[0] - @classmethod - def _from_block(cls, block: blocks.Block) -> Index: - import bigframes.dataframe as df - return Index(df.DataFrame(block)) +# Index that mutates the originating dataframe/series +class FrameIndex(Index): + def __init__( + self, + series_or_dataframe: typing.Union[ + bigframes.series.Series, bigframes.dataframe.DataFrame + ], + ): + super().__init__(series_or_dataframe._block) + self._whole_frame = series_or_dataframe + + @property + def name(self) -> blocks.Label: + return self.names[0] + + @name.setter + def name(self, value: blocks.Label): + self.names = [value] + + @property + def names(self) -> typing.Sequence[blocks.Label]: + """Returns the names of the Index.""" + return self._block._index_labels + + @names.setter + def names(self, values: typing.Sequence[blocks.Label]): + new_block = self._whole_frame._get_block().with_index_labels(values) + self._whole_frame._set_block(new_block) + self._block = new_block class IndexValue: @@ -406,15 +524,6 @@ def dtypes( def session(self) -> core.Session: return self._expr.session - def __repr__(self) -> str: - """Converts an Index to a string.""" - # TODO(swast): Add a timeout here? If the query is taking a long time, - # maybe we just print the job metadata that we have so far? - # TODO(swast): Avoid downloading the whole index by using job - # metadata, like we do with DataFrame. - preview = self.to_pandas() - return repr(preview) - def to_pandas(self) -> pandas.Index: """Executes deferred operations and downloads the results.""" # Project down to only the index column. So the query can be cached to visualize other data. diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 9989831e1b..2a20a4aabb 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -146,10 +146,15 @@ def __init__( block = result_index._block if block: - if index: - raise NotImplementedError( - "DataFrame 'index' constructor parameter not supported " - f"when passing BigQuery-backed objects. {constants.FEEDBACK_LINK}" + if index is not None: + bf_index = indexes.Index(index) + idx_block = bf_index._block + idx_cols = idx_block.index_columns + join_idx, (_, r_mapping) = block.reset_index().index.join( + bf_index._block.reset_index().index, how="inner" + ) + block = join_idx._block.set_index( + [r_mapping[idx_col] for idx_col in idx_cols] ) if columns: block = block.select_columns(list(columns)) # type:ignore @@ -250,7 +255,7 @@ def _sql_names( def index( self, ) -> indexes.Index: - return indexes.Index(self) + return indexes.Index.from_frame(self) @index.setter def index(self, value): @@ -661,6 +666,14 @@ def _apply_binop( ): if isinstance(other, (float, int)): return self._apply_scalar_binop(other, op, reverse=reverse) + elif isinstance(other, indexes.Index): + return self._apply_series_binop( + other.to_series(index=self.index), + op, + axis=axis, + how=how, + reverse=reverse, + ) elif isinstance(other, bigframes.series.Series): return self._apply_series_binop( other, op, axis=axis, how=how, reverse=reverse @@ -1183,7 +1196,7 @@ def drop( return DataFrame(block) def _drop_by_index(self, index: indexes.Index) -> DataFrame: - block = index._data._get_block() + block = index._block block, ordering_col = block.promote_offsets() joined_index, (get_column_left, get_column_right) = self._block.index.join( block.index @@ -1319,9 +1332,7 @@ def _assign_single_item_listlike(self, k: str, v: Sequence) -> DataFrame: f"Length of values ({given_rows}) does not match length of index ({actual_rows})" ) - local_df = bigframes.dataframe.DataFrame( - {k: v}, session=self._get_block().expr.session - ) + local_df = DataFrame({k: v}, session=self._get_block().expr.session) # local_df is likely (but not guaranteed) to be cached locally # since the original list came from memory and so is probably < MAX_INLINE_DF_SIZE @@ -1622,7 +1633,7 @@ def _reindex_rows( raise ValueError("Original index must be unique to reindex") keep_original_names = False if isinstance(index, indexes.Index): - new_indexer = DataFrame(data=index._data._get_block())[[]] + new_indexer = DataFrame(data=index._block)[[]] else: if not isinstance(index, pandas.Index): keep_original_names = True diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index 077815a9d6..6829d3faab 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -21,6 +21,7 @@ import bigframes.constants as constants import bigframes.core.blocks as blocks import bigframes.core.expression as ex +import bigframes.core.indexes as indexes import bigframes.core.scalar as scalars import bigframes.dtypes import bigframes.operations as ops @@ -54,10 +55,34 @@ def __init__( if isinstance(data, blocks.Block): assert len(data.value_columns) == 1 assert len(data.column_labels) == 1 + assert index is None block = data elif isinstance(data, SeriesMethods): - block = data._get_block() + block = data._block + if index is not None: + # reindex + bf_index = indexes.Index(index) + idx_block = bf_index._block + idx_cols = idx_block.value_columns + block_idx, _ = idx_block.index.join(block.index, how="left") + block = block_idx._block.with_index_labels(bf_index.names) + + elif isinstance(data, indexes.Index): + if data.nlevels != 1: + raise NotImplementedError("Cannot interpret multi-index as Series.") + # Reset index to promote index columns to value columns, set default index + block = data._block.reset_index(drop=False) + if index is not None: + # Align by offset + bf_index = indexes.Index(index) + idx_block = bf_index._block.reset_index(drop=False) + idx_cols = idx_block.value_columns + block_idx, (l_mapping, _) = idx_block.index.join( + block.index, how="left" + ) + block = block_idx._block.set_index([l_mapping[col] for col in idx_cols]) + block = block.with_index_labels(bf_index.names) if block: if name: @@ -66,16 +91,10 @@ def __init__( f"BigQuery DataFrames only supports hashable series names. {constants.FEEDBACK_LINK}" ) block = block.with_column_labels([name]) - if index: - raise NotImplementedError( - f"Series 'index' constructor parameter not supported when passing BigQuery-backed objects. {constants.FEEDBACK_LINK}" - ) if dtype: block = block.multi_apply_unary_op( block.value_columns, ops.AsTypeOp(to_type=dtype) ) - self._block = block - else: import bigframes.pandas @@ -95,14 +114,15 @@ def __init__( if isinstance(dt, pd.ArrowDtype) ) ): - self._block = blocks.block_from_local(pd_dataframe) + block = blocks.block_from_local(pd_dataframe) elif session: - self._block = session.read_pandas(pd_dataframe)._get_block() + block = session.read_pandas(pd_dataframe)._get_block() else: # Uses default global session - self._block = bigframes.pandas.read_pandas(pd_dataframe)._get_block() + block = bigframes.pandas.read_pandas(pd_dataframe)._get_block() if pd_series.name is None: - self._block = self._block.with_column_labels([None]) + block = block.with_column_labels([None]) + self._block: blocks.Block = block @property def _value_column(self) -> str: diff --git a/bigframes/series.py b/bigframes/series.py index e049b41461..6a21727975 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -76,10 +76,6 @@ def dtype(self): def dtypes(self): return self._dtype - @property - def index(self) -> indexes.Index: - return indexes.Index(self) - @property def loc(self) -> bigframes.core.indexers.LocSeriesIndexer: return bigframes.core.indexers.LocSeriesIndexer(self) @@ -120,6 +116,10 @@ def empty(self) -> bool: def values(self) -> numpy.ndarray: return self.to_numpy() + @property + def index(self) -> indexes.Index: + return indexes.Index.from_frame(self) + @property def query_job(self) -> Optional[bigquery.QueryJob]: """BigQuery job metadata for the most recent query. @@ -978,7 +978,7 @@ def idxmax(self) -> blocks.Label: ] ) block = block.slice(0, 1) - return indexes.Index._from_block(block).to_pandas()[0] + return indexes.Index(block).to_pandas()[0] def idxmin(self) -> blocks.Label: block = self._block.order_by( @@ -991,7 +991,7 @@ def idxmin(self) -> blocks.Label: ] ) block = block.slice(0, 1) - return indexes.Index._from_block(block).to_pandas()[0] + return indexes.Index(block).to_pandas()[0] @property def is_monotonic_increasing(self) -> bool: @@ -1279,9 +1279,7 @@ def reindex(self, index=None, *, validate: typing.Optional[bool] = None): raise ValueError("Original index must be unique to reindex") keep_original_names = False if isinstance(index, indexes.Index): - new_indexer = bigframes.dataframe.DataFrame(data=index._data._get_block())[ - [] - ] + new_indexer = bigframes.dataframe.DataFrame(data=index._block)[[]] else: if not isinstance(index, pandas.Index): keep_original_names = True diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py index f7fa0f0855..2961884ebf 100644 --- a/tests/system/small/test_index.py +++ b/tests/system/small/test_index.py @@ -16,9 +16,44 @@ import pandas as pd import pytest +import bigframes.pandas as bpd from tests.system.utils import assert_pandas_index_equal_ignore_index_type +def test_index_construct_from_list(): + bf_result = bpd.Index( + [3, 14, 159], dtype=pd.Int64Dtype(), name="my_index" + ).to_pandas() + pd_result = pd.Index([3, 14, 159], dtype=pd.Int64Dtype(), name="my_index") + pd.testing.assert_index_equal(bf_result, pd_result) + + +def test_index_construct_from_series(): + bf_result = bpd.Index( + bpd.Series([3, 14, 159], dtype=pd.Float64Dtype(), name="series_name"), + name="index_name", + dtype=pd.Int64Dtype(), + ).to_pandas() + pd_result = pd.Index( + pd.Series([3, 14, 159], dtype=pd.Float64Dtype(), name="series_name"), + name="index_name", + dtype=pd.Int64Dtype(), + ) + pd.testing.assert_index_equal(bf_result, pd_result) + + +def test_index_construct_from_index(): + bf_index_input = bpd.Index( + [3, 14, 159], dtype=pd.Float64Dtype(), name="series_name" + ) + bf_result = bpd.Index( + bf_index_input, dtype=pd.Int64Dtype(), name="index_name" + ).to_pandas() + pd_index_input = pd.Index([3, 14, 159], dtype=pd.Float64Dtype(), name="series_name") + pd_result = pd.Index(pd_index_input, dtype=pd.Int64Dtype(), name="index_name") + pd.testing.assert_index_equal(bf_result, pd_result) + + def test_get_index(scalars_df_index, scalars_pandas_df_index): index = scalars_df_index.index bf_result = index.to_pandas() @@ -240,6 +275,43 @@ def test_index_value_counts(scalars_df_index, scalars_pandas_df_index): pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) +@pytest.mark.parametrize( + ("level",), + [ + ("int64_too",), + ("rowindex_2",), + (1,), + ], +) +def test_index_get_level_values(scalars_df_index, scalars_pandas_df_index, level): + bf_result = ( + scalars_df_index.set_index(["int64_too", "rowindex_2"]) + .index.get_level_values(level) + .to_pandas() + ) + pd_result = scalars_pandas_df_index.set_index( + ["int64_too", "rowindex_2"] + ).index.get_level_values(level) + + pd.testing.assert_index_equal(bf_result, pd_result) + + +def test_index_to_series( + scalars_df_index, + scalars_pandas_df_index, +): + bf_result = ( + scalars_df_index.set_index(["int64_too"]) + .index.to_series(index=scalars_df_index["float64_col"], name="new_name") + .to_pandas() + ) + pd_result = scalars_pandas_df_index.set_index(["int64_too"]).index.to_series( + index=scalars_pandas_df_index["float64_col"], name="new_name" + ) + + pd.testing.assert_series_equal(bf_result, pd_result) + + @pytest.mark.parametrize( ("how",), [ diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 69b35d102c..5d8fb0b29c 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -42,6 +42,40 @@ def test_series_construct_copy(scalars_dfs): pd.testing.assert_series_equal(bf_result, pd_result) +def test_series_construct_copy_with_index(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + bf_result = series.Series( + scalars_df["int64_col"], + name="test_series", + dtype="Float64", + index=scalars_df["int64_too"], + ).to_pandas() + pd_result = pd.Series( + scalars_pandas_df["int64_col"], + name="test_series", + dtype="Float64", + index=scalars_pandas_df["int64_too"], + ) + pd.testing.assert_series_equal(bf_result, pd_result) + + +def test_series_construct_copy_index(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + bf_result = series.Series( + scalars_df.index, + name="test_series", + dtype="Float64", + index=scalars_df["int64_too"], + ).to_pandas() + pd_result = pd.Series( + scalars_pandas_df.index, + name="test_series", + dtype="Float64", + index=scalars_pandas_df["int64_too"], + ) + pd.testing.assert_series_equal(bf_result, pd_result) + + def test_series_construct_pandas(scalars_dfs): _, scalars_pandas_df = scalars_dfs bf_result = series.Series( diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 10cdbf8f7c..93fba9f3aa 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4714,7 +4714,7 @@ def index(self): [3 rows x 3 columns] >>> df.index # doctest: +ELLIPSIS - + Index([10, 20, 30], dtype='Int64') >>> df.index.values array([10, 20, 30], dtype=object) @@ -4731,7 +4731,10 @@ def index(self): [3 rows x 1 columns] >>> df1.index # doctest: +ELLIPSIS - + MultiIndex([( 'Alice', 'Seattle'), + ( 'Bob', 'New York'), + ('Aritra', 'Kona')], + name='Name') >>> df1.index.values array([('Alice', 'Seattle'), ('Bob', 'New York'), ('Aritra', 'Kona')], dtype=object) diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py index e8737341a3..3ad8729271 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/base.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py @@ -58,6 +58,23 @@ def T(self) -> Index: """Return the transpose, which is by definition self.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def copy( + self, + name=None, + ) -> Index: + """ + Make a copy of this object. + + Name is set on the new object. + + Args: + name (Label, optional): + Set name for new object. + Returns: + Index: Index refer to new object which is a copy of this object. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def transpose(self) -> Index: """ Return the transpose, which is by definition self. @@ -81,6 +98,40 @@ def astype(self, dtype): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def get_level_values(self, level) -> Index: + """ + Return an Index of values for requested level. + + This is primarily useful to get an individual level of values from a + MultiIndex, but is provided on Index as well for compatibility. + + Args: + level (int or str): + It is either the integer position or the name of the level. + + Returns: + Index: Calling object, as there is only one level in the Index. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def to_series(self): + """ + Create a Series with both index and values equal to the index keys. + + Useful with map for returning an indexer based on an index. + + Args: + index (Index, optional): + Index of resulting Series. If None, defaults to original index. + name (str, optional): + Name of resulting Series. If None, defaults to name of original + index. + + Returns: + Series: The dtype will be based on the type of the Index values. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def isin(self, values): """ Return a boolean array where the index values are in `values`. diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 1aa4ffffbb..33f03572f1 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -69,7 +69,7 @@ def index(self): 30 35 Name: Age, dtype: Int64 >>> s.index # doctest: +ELLIPSIS - + Index([10, 20, 30], dtype='Int64') >>> s.index.values array([10, 20, 30], dtype=object) @@ -84,7 +84,10 @@ def index(self): Aritra Kona 35 Name: Age, dtype: Int64 >>> s1.index # doctest: +ELLIPSIS - + MultiIndex([( 'Alice', 'Seattle'), + ( 'Bob', 'New York'), + ('Aritra', 'Kona')], + name='Name') >>> s1.index.values array([('Alice', 'Seattle'), ('Bob', 'New York'), ('Aritra', 'Kona')], dtype=object)