From a0d8daa5cad0530002416e23846e307cff8379d5 Mon Sep 17 00:00:00 2001 From: mfatihaktas Date: Tue, 26 Mar 2024 16:26:12 -0400 Subject: [PATCH] fix(datatypes): return pd.Timestamp or pd.Series[datetime64] for date.to_pandas() BREAKING CHANGE: The returned dtype of timestamp and date expressions in DataFrames and Series will be `datetime64` instead of object. --- docs/contribute/02_workflow.qmd | 8 ++++++ ibis/backends/oracle/converter.py | 8 +++--- ibis/backends/snowflake/converter.py | 8 +++--- ibis/backends/sqlite/tests/test_types.py | 2 ++ ibis/backends/tests/test_aggregation.py | 2 +- ibis/backends/tests/test_temporal.py | 33 ++++++++---------------- ibis/formats/pandas.py | 13 ++++++---- 7 files changed, 38 insertions(+), 36 deletions(-) diff --git a/docs/contribute/02_workflow.qmd b/docs/contribute/02_workflow.qmd index 02ed1f5df6dc..d945804a954f 100644 --- a/docs/contribute/02_workflow.qmd +++ b/docs/contribute/02_workflow.qmd @@ -274,3 +274,11 @@ you are going only up). ```bash $ colima delete ``` + +### `x86_64` or `amd64` based containers + +While starting the containers based on `x86_64` / `amd64`, the architecture flag needs to be set in two places: +1. Add `platform: linux/amd64` for the service in `compose.yaml`. +2. Set the `--arch` flag while starting the VM `colima start --arch x86_64` + +For instance, this step is necessary for the `oracle` service in `compose.yaml`. Otherwise, the container will fail shortly after getting started. diff --git a/ibis/backends/oracle/converter.py b/ibis/backends/oracle/converter.py index 7755cb595340..0b013847e047 100644 --- a/ibis/backends/oracle/converter.py +++ b/ibis/backends/oracle/converter.py @@ -1,6 +1,6 @@ from __future__ import annotations -import datetime +import pandas as pd from ibis.formats.pandas import PandasData @@ -8,12 +8,12 @@ class OraclePandasData(PandasData): @classmethod def convert_Timestamp_element(cls, dtype): - return datetime.datetime.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_Date_element(cls, dtype): - return datetime.date.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_Time_element(cls, dtype): - return datetime.time.fromisoformat + return pd.Timestamp.fromisoformat diff --git a/ibis/backends/snowflake/converter.py b/ibis/backends/snowflake/converter.py index a4f3443e23a2..ed4b4a272855 100644 --- a/ibis/backends/snowflake/converter.py +++ b/ibis/backends/snowflake/converter.py @@ -1,9 +1,9 @@ from __future__ import annotations -import datetime import json from typing import TYPE_CHECKING +import pandas as pd import pyarrow as pa from ibis.formats.pandas import PandasData @@ -52,15 +52,15 @@ def __arrow_ext_scalar_class__(self): class SnowflakePandasData(PandasData): @classmethod def convert_Timestamp_element(cls, dtype): - return datetime.datetime.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_Date_element(cls, dtype): - return datetime.date.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_Time_element(cls, dtype): - return datetime.time.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_JSON(cls, s, dtype, pandas_type): diff --git a/ibis/backends/sqlite/tests/test_types.py b/ibis/backends/sqlite/tests/test_types.py index fb6598391f41..224ce421ab56 100644 --- a/ibis/backends/sqlite/tests/test_types.py +++ b/ibis/backends/sqlite/tests/test_types.py @@ -92,6 +92,8 @@ def test_type_map(db): sol = pd.DataFrame( {"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")} ) + sol["date_col"] = sol["date_col"].astype(res["date_col"].dtype) + assert res.equals(sol) diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index c6c4c452d3fd..9f91899899d9 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1253,7 +1253,7 @@ def test_string_quantile(alltypes, func): ) def test_date_quantile(alltypes): expr = alltypes.timestamp_col.date().quantile(0.5) - result = expr.execute() + result = expr.execute().to_pydatetime().date() assert result == date(2009, 12, 31) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index c8f49f17cb31..aec375a9368a 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -646,9 +646,7 @@ def convert_to_offset(x): "ignore", category=(UserWarning, pd.errors.PerformanceWarning) ) expected = ( - pd.to_datetime(df.date_string_col) - .add(offset) - .map(lambda ts: ts.normalize().date(), na_action="ignore") + pd.to_datetime(df.date_string_col).add(offset).astype("datetime64[s]") ) expected = backend.default_series_rename(expected) @@ -723,12 +721,7 @@ def convert_to_offset(x): ), param( lambda t, _: t.timestamp_col.date() + ibis.interval(days=4), - lambda t, _: ( - t.timestamp_col.dt.floor("d") - .add(pd.Timedelta(days=4)) - .dt.normalize() - .dt.date - ), + lambda t, _: t.timestamp_col.dt.floor("d").add(pd.Timedelta(days=4)), id="date-add-interval", marks=[ pytest.mark.notimpl( @@ -739,12 +732,7 @@ def convert_to_offset(x): ), param( lambda t, _: t.timestamp_col.date() - ibis.interval(days=14), - lambda t, _: ( - t.timestamp_col.dt.floor("d") - .sub(pd.Timedelta(days=14)) - .dt.normalize() - .dt.date - ), + lambda t, _: t.timestamp_col.dt.floor("d").sub(pd.Timedelta(days=14)), id="date-subtract-interval", marks=[ pytest.mark.notimpl( @@ -999,14 +987,15 @@ def test_interval_add_cast_column(backend, alltypes, df): delta = alltypes.bigint_col.cast("interval('D')") expr = alltypes.select("id", (timestamp_date + delta).name("tmp")) result = expr.execute().sort_values("id").reset_index().tmp + df = df.sort_values("id").reset_index(drop=True) expected = ( df["timestamp_col"] .dt.normalize() .add(df.bigint_col.astype("timedelta64[D]")) .rename("tmp") - .dt.date ) + backend.assert_series_equal(result, expected.astype(result.dtype)) @@ -2239,21 +2228,21 @@ def test_time_literal_sql(dialect, snapshot, micros): ) def test_date_scalar(con, value, func): expr = ibis.date(func(value)).name("tmp") - result = con.execute(expr) - assert not isinstance(result, datetime.datetime) - assert isinstance(result, datetime.date) - - assert result == datetime.date.fromisoformat(value) + assert isinstance(result, pd.Timestamp) + assert result == pd.Timestamp.fromisoformat(value) @pytest.mark.notyet( ["datafusion", "druid", "exasol"], raises=com.OperationNotDefinedError ) def test_simple_unix_date_offset(con): - d = ibis.date("2023-04-07") + s = "2023-04-07" + d = ibis.date(s) expr = d.epoch_days() result = con.execute(expr) delta = datetime.date(2023, 4, 7) - datetime.date(1970, 1, 1) assert result == delta.days + assert isinstance(result, pd.Timestamp) + assert result == pd.Timestamp.fromisoformat(s) diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index c9336526a382..86b2f353cbb7 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -222,17 +222,20 @@ def convert_Timestamp(cls, s, dtype, pandas_type): def convert_Date(cls, s, dtype, pandas_type): if isinstance(s.dtype, pd.DatetimeTZDtype): s = s.dt.tz_convert("UTC").dt.tz_localize(None) + try: - return s.astype(pandas_type).dt.date + return s.astype(pandas_type) except (ValueError, TypeError, pd._libs.tslibs.OutOfBoundsDatetime): def try_date(v): - if isinstance(v, datetime.datetime): - return v.date() + if isinstance(v, datetime.date): + return pd.Timestamp(v) elif isinstance(v, str): if v.endswith("Z"): - return datetime.datetime.fromisoformat(v[:-1]).date() - return datetime.date.fromisoformat(v) + datetime_obj = datetime.datetime.fromisoformat(v[:-1]) + else: + datetime_obj = datetime.datetime.fromisoformat(v) + return pd.Timestamp(datetime_obj) else: return v