From 89f67d303a37f31cf3d6eac42a4a604979755d42 Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Wed, 31 Mar 2021 10:53:39 -0700 Subject: [PATCH 1/3] Function and test --- databricks/koalas/series.py | 65 ++++++++++++++++++++++++++ databricks/koalas/tests/test_series.py | 23 +++++++++ 2 files changed, 88 insertions(+) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index d424d2973..3986f0c61 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -17,6 +17,7 @@ """ A wrapper class for Spark Column to behave similar to pandas Series. """ +import datetime import re import inspect import sys @@ -5791,6 +5792,70 @@ def align( return (left_ser.copy(), right.copy()) if copy else (left_ser, right) + def between_time( + self, + start_time: Union[datetime.time, str], + end_time: Union[datetime.time, str], + include_start: bool = True, + include_end: bool = True, + axis: Union[int, str] = 0, + ) -> "Series": + """ + Select values between particular times of the day (e.g., 9:00-9:30 AM). + + By setting ``start_time`` to be later than ``end_time``, + you can get the times that are *not* between the two times. + + Parameters + ---------- + start_time : datetime.time or str + Initial time as a time filter limit. + end_time : datetime.time or str + End time as a time filter limit. + include_start : bool, default True + Whether the start time needs to be included in the result. + include_end : bool, default True + Whether the end time needs to be included in the result. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Determine range time on index or columns value. + + Returns + ------- + DataFrame + Data from the original object filtered to the specified dates range. + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + at_time : Select values at a particular time of the day. + last : Select final periods of time series based on a date offset. + DatetimeIndex.indexer_between_time : Get just the index locations for + values between particular times of the day. + + Examples + -------- + >>> idx = pd.date_range('2018-04-09', periods=4, freq='1D20min') + >>> kser = ks.Series([1, 2, 3, 4], index=idx) + >>> kser + 2018-04-09 00:00:00 1 + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + 2018-04-12 01:00:00 4 + dtype: int64 + + >>> kser.between_time('0:15', '0:45') + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + dtype: int64 + """ + return first_series( + self.to_frame().between_time(start_time, end_time, include_start, include_end, axis) + ).rename(self.name) + def _cum(self, func, skipna, part_cols=(), ascending=True): # This is used to cummin, cummax, cumsum, etc. diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py index 1830217a6..0348f9a5d 100644 --- a/databricks/koalas/tests/test_series.py +++ b/databricks/koalas/tests/test_series.py @@ -2882,3 +2882,26 @@ def test_pow_and_rpow(self): self.assert_eq(pser ** np.nan, kser ** np.nan) self.assert_eq(pser.rpow(np.nan), kser.rpow(np.nan)) self.assert_eq(1 ** pser, 1 ** kser) + + def test_between_time(self): + idx = pd.date_range("2018-04-09", periods=4, freq="1D20min") + pser = pd.Series([1, 2, 3, 4], index=idx) + kser = ks.from_pandas(pser) + self.assert_eq( + pser.between_time("0:15", "0:45").sort_index(), + kser.between_time("0:15", "0:45").sort_index(), + ) + + pser.index.name = "ts" + kser = ks.from_pandas(pser) + self.assert_eq( + pser.between_time("0:15", "0:45").sort_index(), + kser.between_time("0:15", "0:45").sort_index(), + ) + + pser.index.name = "index" + kser = ks.from_pandas(pser) + self.assert_eq( + pser.between_time("0:15", "0:45").sort_index(), + kser.between_time("0:15", "0:45").sort_index(), + ) From 25c305cf682f1d592828aa386a30fe2bdf778509 Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Wed, 31 Mar 2021 10:55:50 -0700 Subject: [PATCH 2/3] Missing and doc --- databricks/koalas/missing/series.py | 1 - docs/source/reference/series.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks/koalas/missing/series.py b/databricks/koalas/missing/series.py index 489b345ad..1df263cc8 100644 --- a/databricks/koalas/missing/series.py +++ b/databricks/koalas/missing/series.py @@ -38,7 +38,6 @@ class MissingPandasLikeSeries(object): asfreq = _unsupported_function("asfreq") at_time = _unsupported_function("at_time") autocorr = _unsupported_function("autocorr") - between_time = _unsupported_function("between_time") combine = _unsupported_function("combine") convert_dtypes = _unsupported_function("convert_dtypes") cov = _unsupported_function("cov") diff --git a/docs/source/reference/series.rst b/docs/source/reference/series.rst index 6ee65776e..6941a0e9a 100644 --- a/docs/source/reference/series.rst +++ b/docs/source/reference/series.rst @@ -237,6 +237,7 @@ Time series-related Series.shift Series.first_valid_index Series.last_valid_index + Series.between_time Spark-related ------------- From 8964e4bb7820522b0d66b03a981dd5ba2f48e801 Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Wed, 31 Mar 2021 11:16:45 -0700 Subject: [PATCH 3/3] Update return type --- databricks/koalas/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 3986f0c61..289d70946 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -5821,7 +5821,7 @@ def between_time( Returns ------- - DataFrame + Series Data from the original object filtered to the specified dates range. Raises