Skip to content

Commit

Permalink
fix(datatypes): return pd.Timestamp or pd.Series[datetime64] for date…
Browse files Browse the repository at this point in the history
….to_pandas()

BREAKING CHANGE: The returned dtype of timestamp and date expressions in DataFrames and Series will be `datetime64` instead of object.
  • Loading branch information
mfatihaktas authored and cpcloud committed Dec 31, 2024
1 parent 56a66de commit a0d8daa
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 36 deletions.
8 changes: 8 additions & 0 deletions docs/contribute/02_workflow.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,11 @@ you are going only up).
```bash
$ colima delete
```

### `x86_64` or `amd64` based containers

While starting the containers based on `x86_64` / `amd64`, the architecture flag needs to be set in two places:
1. Add `platform: linux/amd64` for the service in `compose.yaml`.
2. Set the `--arch` flag while starting the VM `colima start --arch x86_64`

For instance, this step is necessary for the `oracle` service in `compose.yaml`. Otherwise, the container will fail shortly after getting started.
8 changes: 4 additions & 4 deletions ibis/backends/oracle/converter.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from __future__ import annotations

import datetime
import pandas as pd

from ibis.formats.pandas import PandasData


class OraclePandasData(PandasData):
@classmethod
def convert_Timestamp_element(cls, dtype):
return datetime.datetime.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Time_element(cls, dtype):
return datetime.time.fromisoformat
return pd.Timestamp.fromisoformat
8 changes: 4 additions & 4 deletions ibis/backends/snowflake/converter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

import datetime
import json
from typing import TYPE_CHECKING

import pandas as pd
import pyarrow as pa

from ibis.formats.pandas import PandasData
Expand Down Expand Up @@ -52,15 +52,15 @@ def __arrow_ext_scalar_class__(self):
class SnowflakePandasData(PandasData):
@classmethod
def convert_Timestamp_element(cls, dtype):
return datetime.datetime.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Time_element(cls, dtype):
return datetime.time.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_JSON(cls, s, dtype, pandas_type):
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/sqlite/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def test_type_map(db):
sol = pd.DataFrame(
{"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")}
)
sol["date_col"] = sol["date_col"].astype(res["date_col"].dtype)

assert res.equals(sol)


Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,7 +1253,7 @@ def test_string_quantile(alltypes, func):
)
def test_date_quantile(alltypes):
expr = alltypes.timestamp_col.date().quantile(0.5)
result = expr.execute()
result = expr.execute().to_pydatetime().date()
assert result == date(2009, 12, 31)


Expand Down
33 changes: 11 additions & 22 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,9 +646,7 @@ def convert_to_offset(x):
"ignore", category=(UserWarning, pd.errors.PerformanceWarning)
)
expected = (
pd.to_datetime(df.date_string_col)
.add(offset)
.map(lambda ts: ts.normalize().date(), na_action="ignore")
pd.to_datetime(df.date_string_col).add(offset).astype("datetime64[s]")
)

expected = backend.default_series_rename(expected)
Expand Down Expand Up @@ -723,12 +721,7 @@ def convert_to_offset(x):
),
param(
lambda t, _: t.timestamp_col.date() + ibis.interval(days=4),
lambda t, _: (
t.timestamp_col.dt.floor("d")
.add(pd.Timedelta(days=4))
.dt.normalize()
.dt.date
),
lambda t, _: t.timestamp_col.dt.floor("d").add(pd.Timedelta(days=4)),
id="date-add-interval",
marks=[
pytest.mark.notimpl(
Expand All @@ -739,12 +732,7 @@ def convert_to_offset(x):
),
param(
lambda t, _: t.timestamp_col.date() - ibis.interval(days=14),
lambda t, _: (
t.timestamp_col.dt.floor("d")
.sub(pd.Timedelta(days=14))
.dt.normalize()
.dt.date
),
lambda t, _: t.timestamp_col.dt.floor("d").sub(pd.Timedelta(days=14)),
id="date-subtract-interval",
marks=[
pytest.mark.notimpl(
Expand Down Expand Up @@ -999,14 +987,15 @@ def test_interval_add_cast_column(backend, alltypes, df):
delta = alltypes.bigint_col.cast("interval('D')")
expr = alltypes.select("id", (timestamp_date + delta).name("tmp"))
result = expr.execute().sort_values("id").reset_index().tmp

df = df.sort_values("id").reset_index(drop=True)
expected = (
df["timestamp_col"]
.dt.normalize()
.add(df.bigint_col.astype("timedelta64[D]"))
.rename("tmp")
.dt.date
)

backend.assert_series_equal(result, expected.astype(result.dtype))


Expand Down Expand Up @@ -2239,21 +2228,21 @@ def test_time_literal_sql(dialect, snapshot, micros):
)
def test_date_scalar(con, value, func):
expr = ibis.date(func(value)).name("tmp")

result = con.execute(expr)

assert not isinstance(result, datetime.datetime)
assert isinstance(result, datetime.date)

assert result == datetime.date.fromisoformat(value)
assert isinstance(result, pd.Timestamp)
assert result == pd.Timestamp.fromisoformat(value)


@pytest.mark.notyet(
["datafusion", "druid", "exasol"], raises=com.OperationNotDefinedError
)
def test_simple_unix_date_offset(con):
d = ibis.date("2023-04-07")
s = "2023-04-07"
d = ibis.date(s)
expr = d.epoch_days()
result = con.execute(expr)
delta = datetime.date(2023, 4, 7) - datetime.date(1970, 1, 1)
assert result == delta.days
assert isinstance(result, pd.Timestamp)
assert result == pd.Timestamp.fromisoformat(s)
13 changes: 8 additions & 5 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,17 +222,20 @@ def convert_Timestamp(cls, s, dtype, pandas_type):
def convert_Date(cls, s, dtype, pandas_type):
if isinstance(s.dtype, pd.DatetimeTZDtype):
s = s.dt.tz_convert("UTC").dt.tz_localize(None)

try:
return s.astype(pandas_type).dt.date
return s.astype(pandas_type)
except (ValueError, TypeError, pd._libs.tslibs.OutOfBoundsDatetime):

def try_date(v):
if isinstance(v, datetime.datetime):
return v.date()
if isinstance(v, datetime.date):
return pd.Timestamp(v)
elif isinstance(v, str):
if v.endswith("Z"):
return datetime.datetime.fromisoformat(v[:-1]).date()
return datetime.date.fromisoformat(v)
datetime_obj = datetime.datetime.fromisoformat(v[:-1])
else:
datetime_obj = datetime.datetime.fromisoformat(v)
return pd.Timestamp(datetime_obj)
else:
return v

Expand Down

0 comments on commit a0d8daa

Please sign in to comment.