Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Series.between_time() #2129

Merged
merged 3 commits into from
Mar 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion databricks/koalas/missing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class MissingPandasLikeSeries(object):
asfreq = _unsupported_function("asfreq")
at_time = _unsupported_function("at_time")
autocorr = _unsupported_function("autocorr")
between_time = _unsupported_function("between_time")
combine = _unsupported_function("combine")
convert_dtypes = _unsupported_function("convert_dtypes")
cov = _unsupported_function("cov")
Expand Down
65 changes: 65 additions & 0 deletions databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""
A wrapper class for Spark Column to behave similar to pandas Series.
"""
import datetime
import re
import inspect
import sys
Expand Down Expand Up @@ -5791,6 +5792,70 @@ def align(

return (left_ser.copy(), right.copy()) if copy else (left_ser, right)

def between_time(
self,
start_time: Union[datetime.time, str],
end_time: Union[datetime.time, str],
include_start: bool = True,
include_end: bool = True,
axis: Union[int, str] = 0,
) -> "Series":
"""
Select values between particular times of the day (e.g., 9:00-9:30 AM).

By setting ``start_time`` to be later than ``end_time``,
you can get the times that are *not* between the two times.

Parameters
----------
start_time : datetime.time or str
Initial time as a time filter limit.
end_time : datetime.time or str
End time as a time filter limit.
include_start : bool, default True
Whether the start time needs to be included in the result.
include_end : bool, default True
Whether the end time needs to be included in the result.
axis : {0 or 'index', 1 or 'columns'}, default 0
Determine range time on index or columns value.

Returns
-------
Series
Data from the original object filtered to the specified dates range.

Raises
------
TypeError
If the index is not a :class:`DatetimeIndex`

See Also
--------
at_time : Select values at a particular time of the day.
last : Select final periods of time series based on a date offset.
DatetimeIndex.indexer_between_time : Get just the index locations for
values between particular times of the day.

Examples
--------
>>> idx = pd.date_range('2018-04-09', periods=4, freq='1D20min')
>>> kser = ks.Series([1, 2, 3, 4], index=idx)
>>> kser
2018-04-09 00:00:00 1
2018-04-10 00:20:00 2
2018-04-11 00:40:00 3
2018-04-12 01:00:00 4
dtype: int64

>>> kser.between_time('0:15', '0:45')
2018-04-10 00:20:00 2
2018-04-11 00:40:00 3
dtype: int64
"""
return first_series(
self.to_frame().between_time(start_time, end_time, include_start, include_end, axis)
).rename(self.name)

def _cum(self, func, skipna, part_cols=(), ascending=True):
# This is used to cummin, cummax, cumsum, etc.

Expand Down
23 changes: 23 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2882,3 +2882,26 @@ def test_pow_and_rpow(self):
self.assert_eq(pser ** np.nan, kser ** np.nan)
self.assert_eq(pser.rpow(np.nan), kser.rpow(np.nan))
self.assert_eq(1 ** pser, 1 ** kser)

def test_between_time(self):
idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
pser = pd.Series([1, 2, 3, 4], index=idx)
kser = ks.from_pandas(pser)
self.assert_eq(
pser.between_time("0:15", "0:45").sort_index(),
kser.between_time("0:15", "0:45").sort_index(),
)

pser.index.name = "ts"
kser = ks.from_pandas(pser)
self.assert_eq(
pser.between_time("0:15", "0:45").sort_index(),
kser.between_time("0:15", "0:45").sort_index(),
)

pser.index.name = "index"
kser = ks.from_pandas(pser)
self.assert_eq(
pser.between_time("0:15", "0:45").sort_index(),
kser.between_time("0:15", "0:45").sort_index(),
)
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ Time series-related
Series.shift
Series.first_valid_index
Series.last_valid_index
Series.between_time

Spark-related
-------------
Expand Down