Skip to content

Commit

Permalink
Merge pull request #77 from martindurant/cleanups
Browse files Browse the repository at this point in the history
Some missing linkages
  • Loading branch information
martindurant authored Aug 15, 2024
2 parents e8c12a3 + 08b1d76 commit 321b6e4
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 13 deletions.
3 changes: 1 addition & 2 deletions src/akimbo/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def __init__(self, accessor) -> None:

# listed below https://arrow.apache.org/docs/python/generated/
# pyarrow.compute.ceil_temporal.html
cast = dec(pc.cast) # TODO: move to .ak
ceil_temporal = dec_t(pc.ceil_temporal)
floor_temporal = dec_t(pc.floor_temporal)
reound_temporal = dec_t(pc.round_temporal)
Expand Down Expand Up @@ -62,7 +61,7 @@ def __init__(self, accessor) -> None:
weeks_between = dec_t(pc.weeks_between)
years_between = dec_t(pc.years_between)

# TODO: strftime, strptime
strftime = dec_t(pc.strftime)

# TODO: timezone conversion

Expand Down
13 changes: 13 additions & 0 deletions src/akimbo/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from typing import Callable, Iterable

import awkward as ak
import pyarrow.compute as pc

from akimbo.apply_tree import dec

methods = [
_ for _ in (dir(ak)) if not _.startswith(("_", "ak_")) and not _[0].isupper()
Expand Down Expand Up @@ -142,6 +145,16 @@ def __init__(self, obj, behavior=None):
self._obj = obj
self._behavior = behavior

def __call__(self, *args, behavior=None, **kwargs):
return Accessor(self._obj, behavior=behavior)

cast = dec(pc.cast)

@property
def accessor(self):
# if we use `dec`, which expects to work on
return self

@classmethod
def is_series(cls, data):
return isinstance(data, cls.series_type)
Expand Down
11 changes: 10 additions & 1 deletion src/akimbo/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from collections.abc import Callable

import awkward as ak
import pyarrow.compute as pc

from akimbo.apply_tree import dec
from akimbo.mixin import Accessor
Expand Down Expand Up @@ -50,6 +51,12 @@ def _decode(layout):
if not aname.startswith(("_", "akstr_")) and not aname[0].isupper()
]

# make sensible defaults for strptime
strptime = functools.wraps(pc.strptime)(
lambda *args, format="%FT%T", unit="s", error_is_null=True, **kw:
pc.strptime(*args, format=format, unit=unit, error_is_null=error_is_null)
)


class StringAccessor:
def __init__(self, accessor):
Expand Down Expand Up @@ -92,8 +99,10 @@ def f(*args, **kwargs):

return f

strptime = dec(strptime, match=match_string)

def __dir__(self) -> list[str]:
return sorted(methods)
return sorted(methods + ["strptime"])


Accessor.register_accessor("str", StringAccessor)
24 changes: 17 additions & 7 deletions tests/test_dt.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import datetime
import os

import pytest

import akimbo.pandas # noqa

pd = pytest.importorskip("pandas")
WIN = os.name == "nt"


def test_cast():
s = pd.Series([[0, 1], [1, 0], [2]])
out = s.ak.dt.cast("timestamp[s]")
out = s.ak.cast("timestamp[s]")
assert str(out.dtype) == "list<item: timestamp[s]>[pyarrow]"
assert out.to_list() == [
[datetime.datetime(1970, 1, 1, 0, 0), datetime.datetime(1970, 1, 1, 0, 0, 1)],
Expand All @@ -20,7 +22,7 @@ def test_cast():

def test_unary_unit():
s = pd.Series([[0, 1], [1, 0], [2]])
ts = s.ak.dt.cast("timestamp[s]")
ts = s.ak.cast("timestamp[s]")
s2 = ts.ak.dt.second()
assert s.to_list() == s2.to_list()

Expand All @@ -35,8 +37,8 @@ def test_bad_type():
def test_binary():
s = pd.Series([[0, 1], [1, 0], [2]])
s2 = s.ak + 1
ts1 = s.ak.dt.cast("timestamp[s]")
ts2 = s2.ak.dt.cast("timestamp[s]")
ts1 = s.ak.cast("timestamp[s]")
ts2 = s2.ak.cast("timestamp[s]")

out = ts1.ak.dt.nanoseconds_between(ts2)
assert out.tolist() == [
Expand All @@ -50,8 +52,8 @@ def test_binary():
def test_binary_with_kwargs():
s = pd.Series([[0, 1], [1, 0], [2]])
s2 = s.ak + int(24 * 3600 * 7 * 2.5)
ts1 = s.ak.dt.cast("timestamp[s]")
ts2 = s2.ak.dt.cast("timestamp[s]")
ts1 = s.ak.cast("timestamp[s]")
ts2 = s2.ak.cast("timestamp[s]")

out = ts1.ak.dt.weeks_between(ts2, count_from_zero=False, week_start=2)
assert out.tolist() == [[2, 2], [2, 2], [2]]
Expand All @@ -64,8 +66,16 @@ def test_mixed_record():
s = pd.Series(data)

# explicit select of where to apply transform
ts = s.ak.dt.cast("timestamp[s]", where="a")
ts = s.ak.cast("timestamp[s]", where="a")

# implicit selection of timestamps
s2 = ts.ak.dt.second()
assert s2.to_list() == data


@pytest.mark.skipif(WIN, reason="arrow on windows needs a timezone database")
def test_text_conversion():
s = pd.Series([["2024-08-01T01:00:00", None, "2024-08-01T01:01:00"]])
s2 = s.ak.str.strptime()
s3 = s2.ak.dt.strftime("%FT%T")
assert s3.tolist() == [["2024-08-01T01:00:00", None, "2024-08-01T01:01:00"]]
6 changes: 3 additions & 3 deletions tests/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ def test_encode_decode():


def test_split():
s = pd.Series(["hello world", "oio", ""])
s = pd.Series(["hello world", "oio", pd.NA, ""])
s2 = s.ak.str.split_whitespace()
assert s2.tolist() == [["hello", "world"], ["oio"], [""]]
assert s2.tolist() == [["hello", "world"], ["oio"], pd.NA, [""]]
s2 = s.ak.str.split_pattern("i")
assert s2.tolist() == [["hello world"], ["o", "o"], [""]]
assert s2.tolist() == [["hello world"], ["o", "o"], pd.NA, [""]]

s = pd.Series([b"hello world", b"oio", b""])
s2 = s.ak.str.split_whitespace()
Expand Down

0 comments on commit 321b6e4

Please sign in to comment.