Skip to content

Commit

Permalink
Merge branch 'main' into shuowei-add-sklearn-samples
Browse files Browse the repository at this point in the history
  • Loading branch information
shuoweil authored Dec 26, 2024
2 parents b0a0530 + ed47ef1 commit e5c5854
Show file tree
Hide file tree
Showing 19 changed files with 461 additions and 98 deletions.
15 changes: 11 additions & 4 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,12 +406,19 @@ def arrow_dtype_to_bigframes_dtype(arrow_dtype: pa.DataType) -> Dtype:
return pd.ArrowDtype(arrow_dtype)
if pa.types.is_struct(arrow_dtype):
return pd.ArrowDtype(arrow_dtype)

# BigFrames doesn't distinguish between string and large_string because the
# largest string (2 GB) is already larger than the largest BigQuery row.
if pa.types.is_string(arrow_dtype) or pa.types.is_large_string(arrow_dtype):
return STRING_DTYPE

if arrow_dtype == pa.null():
return DEFAULT_DTYPE
else:
raise ValueError(
f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"
)

# No other types matched.
raise ValueError(
f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"
)


_BIGFRAMES_TO_ARROW = {
Expand Down
39 changes: 34 additions & 5 deletions bigframes/pandas/io/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
Union,
)

import bigframes_vendored.constants as constants
import bigframes_vendored.pandas.io.gbq as vendored_pandas_gbq
from google.cloud import bigquery
import numpy
Expand Down Expand Up @@ -103,6 +104,7 @@ def read_csv(
Literal["c", "python", "pyarrow", "python-fwf", "bigquery"]
] = None,
encoding: Optional[str] = None,
write_engine: constants.WriteEngineType = "default",
**kwargs,
) -> bigframes.dataframe.DataFrame:
return global_session.with_default_session(
Expand All @@ -116,6 +118,7 @@ def read_csv(
dtype=dtype,
engine=engine,
encoding=encoding,
write_engine=write_engine,
**kwargs,
)

Expand All @@ -133,6 +136,7 @@ def read_json(
encoding: Optional[str] = None,
lines: bool = False,
engine: Literal["ujson", "pyarrow", "bigquery"] = "ujson",
write_engine: constants.WriteEngineType = "default",
**kwargs,
) -> bigframes.dataframe.DataFrame:
return global_session.with_default_session(
Expand All @@ -143,6 +147,7 @@ def read_json(
encoding=encoding,
lines=lines,
engine=engine,
write_engine=write_engine,
**kwargs,
)

Expand Down Expand Up @@ -243,24 +248,41 @@ def read_gbq_table(


@typing.overload
def read_pandas(pandas_dataframe: pandas.DataFrame) -> bigframes.dataframe.DataFrame:
def read_pandas(
pandas_dataframe: pandas.DataFrame,
*,
write_engine: constants.WriteEngineType = "default",
) -> bigframes.dataframe.DataFrame:
...


@typing.overload
def read_pandas(pandas_dataframe: pandas.Series) -> bigframes.series.Series:
def read_pandas(
pandas_dataframe: pandas.Series,
*,
write_engine: constants.WriteEngineType = "default",
) -> bigframes.series.Series:
...


@typing.overload
def read_pandas(pandas_dataframe: pandas.Index) -> bigframes.core.indexes.Index:
def read_pandas(
pandas_dataframe: pandas.Index,
*,
write_engine: constants.WriteEngineType = "default",
) -> bigframes.core.indexes.Index:
...


def read_pandas(pandas_dataframe: Union[pandas.DataFrame, pandas.Series, pandas.Index]):
def read_pandas(
pandas_dataframe: Union[pandas.DataFrame, pandas.Series, pandas.Index],
*,
write_engine: constants.WriteEngineType = "default",
):
return global_session.with_default_session(
bigframes.session.Session.read_pandas,
pandas_dataframe,
write_engine=write_engine,
)


Expand All @@ -271,25 +293,32 @@ def read_pickle(
filepath_or_buffer: FilePath | ReadPickleBuffer,
compression: CompressionOptions = "infer",
storage_options: StorageOptions = None,
*,
write_engine: constants.WriteEngineType = "default",
):
return global_session.with_default_session(
bigframes.session.Session.read_pickle,
filepath_or_buffer=filepath_or_buffer,
compression=compression,
storage_options=storage_options,
write_engine=write_engine,
)


read_pickle.__doc__ = inspect.getdoc(bigframes.session.Session.read_pickle)


def read_parquet(
path: str | IO["bytes"], *, engine: str = "auto"
path: str | IO["bytes"],
*,
engine: str = "auto",
write_engine: constants.WriteEngineType = "default",
) -> bigframes.dataframe.DataFrame:
return global_session.with_default_session(
bigframes.session.Session.read_parquet,
path,
engine=engine,
write_engine=write_engine,
)


Expand Down
Loading

0 comments on commit e5c5854

Please sign in to comment.