-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Support bigframes.pandas.to_datetime for scalars, iterables and…
… series. (#372) * feat: Support pd.to_datetime for scalars, iterables and series. * update test and docstring * update types * format update * remove import. * update docstring * update arg conversion * update examples * update format * update code examples, and working logic. * docstring update. * type update. * format update. * Update docstring format * remove import * remove empty line * Remove extra code * remove prints. * Code logic updates. * Add constants. * Update comments * Move datetime helpers to the end of file. * Update helper * update format * String process logic updated. * update import * remove print * update docstring * update docstring * update docstring * update note * update docstring * Update code examples
- Loading branch information
1 parent
de1e0a4
commit ffb0d15
Showing
8 changed files
with
322 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Copyright 2024 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from bigframes.core.tools.datetimes import to_datetime | ||
|
||
__all__ = [ | ||
"to_datetime", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# Copyright 2024 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from collections.abc import Mapping | ||
from datetime import datetime | ||
from typing import Optional, Union | ||
|
||
import pandas as pd | ||
|
||
import bigframes.constants as constants | ||
import bigframes.core.global_session as global_session | ||
import bigframes.dataframe | ||
import bigframes.operations as ops | ||
import bigframes.series | ||
import third_party.bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes | ||
|
||
|
||
def to_datetime( | ||
arg: Union[ | ||
vendored_pandas_datetimes.local_scalars, | ||
vendored_pandas_datetimes.local_iterables, | ||
bigframes.series.Series, | ||
bigframes.dataframe.DataFrame, | ||
], | ||
*, | ||
utc: bool = False, | ||
format: Optional[str] = None, | ||
unit: Optional[str] = None, | ||
) -> Union[pd.Timestamp, datetime, bigframes.series.Series]: | ||
if isinstance(arg, (int, float, str, datetime)): | ||
return pd.to_datetime( | ||
arg, | ||
utc=utc, | ||
format=format, | ||
unit=unit, | ||
) | ||
|
||
if isinstance(arg, (Mapping, pd.DataFrame, bigframes.dataframe.DataFrame)): | ||
raise NotImplementedError( | ||
"Conversion of Mapping, pandas.DataFrame, or bigframes.dataframe.DataFrame " | ||
f"to datetime is not implemented. {constants.FEEDBACK_LINK}" | ||
) | ||
|
||
if not isinstance(arg, bigframes.series.Series): | ||
# This block ensures compatibility with local data formats, including | ||
# iterables and pandas.Series | ||
# TODO: Currently, data upload is performed using pandas DataFrames | ||
# combined with the `read_pandas` method due to the BigFrames DataFrame | ||
# constructor's limitations in handling various data types. Plan to update | ||
# the upload process to utilize the BigFrames DataFrame constructor directly | ||
# once it is enhanced for more related datatypes. | ||
arg = global_session.with_default_session( | ||
bigframes.session.Session.read_pandas, pd.DataFrame(arg) | ||
) | ||
if len(arg.columns) != 1: | ||
raise ValueError("Input must be 1-dimensional.") | ||
|
||
arg = arg[arg.columns[0]] | ||
|
||
if not utc and arg.dtype not in ("Int64", "Float64"): # type: ignore | ||
raise NotImplementedError( | ||
f"String and Timestamp requires utc=True. {constants.FEEDBACK_LINK}" | ||
) | ||
|
||
return arg._apply_unary_op( # type: ignore | ||
ops.ToDatetimeOp( | ||
utc=utc, | ||
format=format, | ||
unit=unit, | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
77 changes: 77 additions & 0 deletions
77
third_party/bigframes_vendored/pandas/core/tools/datetimes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/tools/datetimes.py | ||
|
||
from datetime import datetime | ||
from typing import Iterable, Mapping, Union | ||
|
||
import pandas as pd | ||
|
||
from bigframes import constants, series | ||
|
||
local_scalars = Union[int, float, str, datetime] | ||
local_iterables = Union[Iterable, pd.Series, pd.DataFrame, Mapping] | ||
|
||
|
||
def to_datetime( | ||
arg, | ||
*, | ||
utc=False, | ||
format=None, | ||
unit=None, | ||
) -> Union[pd.Timestamp, datetime, series.Series]: | ||
""" | ||
This function converts a scalar, array-like or Series to a datetime object. | ||
.. note:: | ||
BigQuery only supports precision up to microseconds (us). Therefore, when working | ||
with timestamps that have a finer granularity than microseconds, be aware that | ||
the additional precision will not be represented in BigQuery. | ||
.. note:: | ||
The format strings for specifying datetime representations in BigQuery and pandas | ||
are not completely identical. Ensure that the format string provided is compatible | ||
with BigQuery. | ||
**Examples:** | ||
>>> import bigframes.pandas as bpd | ||
>>> bpd.options.display.progress_bar = None | ||
Converting a Scalar to datetime: | ||
>>> scalar = 123456.789 | ||
>>> bpd.to_datetime(scalar, unit = 's') | ||
Timestamp('1970-01-02 10:17:36.789000') | ||
Converting a List of Strings without Timezone Information: | ||
>>> list_str = ["01-31-2021 14:30", "02-28-2021 15:45"] | ||
>>> bpd.to_datetime(list_str, format="%m-%d-%Y %H:%M", utc=True) | ||
0 2021-01-31 14:30:00+00:00 | ||
1 2021-02-28 15:45:00+00:00 | ||
Name: 0, dtype: timestamp[us, tz=UTC][pyarrow] | ||
Converting a Series of Strings with Timezone Information: | ||
>>> series_str = bpd.Series(["01-31-2021 14:30+08:00", "02-28-2021 15:45+00:00"]) | ||
>>> bpd.to_datetime(series_str, format="%m-%d-%Y %H:%M%Z", utc=True) | ||
0 2021-01-31 06:30:00+00:00 | ||
1 2021-02-28 15:45:00+00:00 | ||
dtype: timestamp[us, tz=UTC][pyarrow] | ||
Args: | ||
arg (int, float, str, datetime, list, tuple, 1-d array, Series): | ||
The object to convert to a datetime. | ||
utc (bool, default False): | ||
Control timezone-related parsing, localization and conversion. If True, the | ||
function always returns a timezone-aware UTC-localized timestamp or series. | ||
If False (default), inputs will not be coerced to UTC. | ||
format (str, default None): | ||
The strftime to parse time, e.g. "%d/%m/%Y". | ||
unit (str, default 'ns'): | ||
The unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer or | ||
float number. | ||
Returns: | ||
Timestamp, datetime.datetime or bigframes.series.Series: Return type depends on input. | ||
""" | ||
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) |