diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index cba9443005f2f..4b03c60fc223a 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -26,8 +26,7 @@ "from_derivatives", "pchip", "akima", - "cubicspline", - ] + "cubicspline"] ) def nontemporal_method(request): """Fixture that returns an (method name, required kwargs) pair. @@ -56,8 +55,7 @@ def nontemporal_method(request): "from_derivatives", "pchip", "akima", - "cubicspline", - ] + "cubicspline"] ) def interp_methods_ind(request): """Fixture that returns a (method name, required kwargs) pair to @@ -104,10 +102,7 @@ def test_interpolate_cubicspline(self): ser = Series([10, 11, 12, 13]) - expected = Series( - [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], - index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), - ) + expected = Series([11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) # interpolate at new_index new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( float @@ -134,10 +129,7 @@ def test_interpolate_akima(self): ser = Series([10, 11, 12, 13]) # interpolate at new_index where `der` is zero - expected = Series( - [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], - index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), - ) + expected = Series([11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( float ) @@ -145,10 +137,7 @@ def test_interpolate_akima(self): tm.assert_series_equal(interp_s[1:3], expected) # interpolate at new_index where `der` is a non-zero int - expected = Series( - [11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0], - index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), - ) + expected = Series([11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( float ) @@ -159,10 +148,7 @@ def test_interpolate_akima(self): def test_interpolate_piecewise_polynomial(self): ser = Series([10, 11, 12, 13]) - expected = Series( - [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], - index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), - ) + expected = Series([11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) # interpolate at new_index new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( float @@ -174,10 +160,7 @@ def test_interpolate_piecewise_polynomial(self): def test_interpolate_from_derivatives(self): ser = Series([10, 11, 12, 13]) - expected = Series( - [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], - index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), - ) + expected = Series([11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) # interpolate at new_index new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( float @@ -187,13 +170,7 @@ def test_interpolate_from_derivatives(self): @pytest.mark.parametrize( "kwargs", - [ - {}, - pytest.param( - {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy - ), - ], - ) + [{}, pytest.param({"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy)]) def test_interpolate_corners(self, kwargs): s = Series([np.nan, np.nan]) tm.assert_series_equal(s.interpolate(**kwargs), s) @@ -233,15 +210,7 @@ def test_interpolate_non_ts(self): with pytest.raises(ValueError, match=msg): s.interpolate(method="time") - @pytest.mark.parametrize( - "kwargs", - [ - {}, - pytest.param( - {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy - ), - ], - ) + @pytest.mark.parametrize("kwargs",[{}, pytest.param({"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy)]) def test_nan_interpolate(self, kwargs): s = Series([0, 1, np.nan, 3]) result = s.interpolate(**kwargs) @@ -431,17 +400,7 @@ def test_interp_limit_area(self): @pytest.mark.parametrize( "method, limit_direction, expected", - [ - ("pad", "backward", "forward"), - ("ffill", "backward", "forward"), - ("backfill", "forward", "backward"), - ("bfill", "forward", "backward"), - ("pad", "both", "forward"), - ("ffill", "both", "forward"), - ("backfill", "both", "backward"), - ("bfill", "both", "backward"), - ], - ) + [("pad", "backward", "forward"), ("ffill", "backward", "forward"), ("backfill", "forward", "backward"), ("bfill", "forward", "backward"), ("pad", "both", "forward"), ("ffill", "both", "forward"), ("backfill", "both", "backward"), ("bfill", "both", "backward")]) def test_interp_limit_direction_raises(self, method, limit_direction, expected): # https://github.com/pandas-dev/pandas/pull/34746 s = Series([1, 2, 3]) @@ -540,14 +499,9 @@ def test_interp_nonmono_raise(self): @td.skip_if_no_scipy @pytest.mark.parametrize("method", ["nearest", "pad"]) def test_interp_datetime64(self, method, tz_naive_fixture): - df = Series( - [1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture) - ) + df = Series([1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)) result = df.interpolate(method=method) - expected = Series( - [1.0, 1.0, 3.0], - index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture), - ) + expected = Series([1.0, 1.0, 3.0], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)) tm.assert_series_equal(result, expected) def test_interp_pad_datetime64tz_values(self): @@ -658,14 +612,7 @@ def test_series_interpolate_intraday(self): tm.assert_numpy_array_equal(result.values, exp.values) - @pytest.mark.parametrize( - "ind", - [ - ["a", "b", "c", "d"], - pd.period_range(start="2019-01-01", periods=4), - pd.interval_range(start=0, end=4), - ], - ) + @pytest.mark.parametrize("ind",[["a", "b", "c", "d"], pd.period_range(start="2019-01-01", periods=4), pd.interval_range(start=0, end=4)]) def test_interp_non_timedelta_index(self, interp_methods_ind, ind): # gh 21662 df = pd.DataFrame([0, 1, np.nan, 3], index=ind) @@ -711,10 +658,7 @@ def test_interpolate_timedelta_index(self, interp_methods_ind): "This interpolation method is not supported for Timedelta Index yet." ) - @pytest.mark.parametrize( - "ascending, expected_values", - [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])], - ) + @pytest.mark.parametrize("ascending, expected_values", [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])]) def test_interpolate_unsorted_index(self, ascending, expected_values): # GH 21037 ts = pd.Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index d651315d64561..7a524fb20c6ab 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -7,34 +7,21 @@ def test_unstack(): - index = MultiIndex( - levels=[["bar", "foo"], ["one", "three", "two"]], - codes=[[1, 1, 0, 0], [0, 1, 0, 2]], - ) + index = MultiIndex(levels=[["bar", "foo"], ["one", "three", "two"]], codes=[[1, 1, 0, 0], [0, 1, 0, 2]]) s = Series(np.arange(4.0), index=index) unstacked = s.unstack() - expected = DataFrame( - [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], - index=["bar", "foo"], - columns=["one", "three", "two"], - ) + expected = DataFrame([[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], index=["bar", "foo"], columns=["one", "three", "two"]) tm.assert_frame_equal(unstacked, expected) unstacked = s.unstack(level=0) tm.assert_frame_equal(unstacked, expected.T) - index = MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) + index = MultiIndex(levels=[["bar"], ["one", "two", "three"], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) - exp_index = MultiIndex( - levels=[["one", "two", "three"], [0, 1]], - codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) + exp_index = MultiIndex(levels=[["one", "two", "three"], [0, 1]], codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0) unstacked = s.unstack(0).sort_index() tm.assert_frame_equal(unstacked, expected) @@ -48,18 +35,9 @@ def test_unstack(): ) tm.assert_frame_equal(left, right) - idx = pd.MultiIndex.from_arrays( - [ - ["cat", "cat", "cat", "dog", "dog"], - ["a", "a", "b", "a", "b"], - [1, 2, 1, 1, np.nan], - ] - ) + idx = pd.MultiIndex.from_arrays([["cat", "cat", "cat", "dog", "dog"], ["a", "a", "b", "a", "b"], [1, 2, 1, 1, np.nan]]) ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) - right = DataFrame( - [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], - columns=["cat", "dog"], - ) + right = DataFrame([[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], columns=["cat", "dog"]) tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] right.index = pd.MultiIndex.from_tuples(tpls) tm.assert_frame_equal(ts.unstack(level=0), right) @@ -73,11 +51,7 @@ def test_unstack_tuplename_in_multiindex(): ser = pd.Series(1, index=idx) result = ser.unstack(("A", "a")) - expected = pd.DataFrame( - [[1, 1, 1], [1, 1, 1], [1, 1, 1]], - columns=pd.MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]), - index=pd.Index([1, 2, 3], name=("B", "b")), - ) + expected = pd.DataFrame([[1, 1, 1], [1, 1, 1], [1, 1, 1]], columns=pd.MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]), index=pd.Index([1, 2, 3], name=("B", "b"))) tm.assert_frame_equal(result, expected) @@ -96,12 +70,10 @@ def test_unstack_tuplename_in_multiindex(): (("A", "a"), "B"), [[1, 1, 1, 1], [1, 1, 1, 1]], pd.Index([3, 4], name="C"), - pd.MultiIndex.from_tuples( - [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] - ), - ), - ], -) + pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]))] + ) + + def test_unstack_mixed_type_name_in_multiindex( unstack_idx, expected_values, expected_index, expected_columns ): @@ -127,9 +99,5 @@ def test_unstack_multi_index_categorical_values(): dti = ser.index.levels[0] c = pd.Categorical(["foo"] * len(dti)) - expected = DataFrame( - {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, - columns=pd.Index(list("ABCD"), name="minor"), - index=dti.rename("major"), - ) + expected = DataFrame({"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, columns=pd.Index(list("ABCD"), name="minor"), index=dti.rename("major")) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py index e070b86717503..4101015e6e13b 100644 --- a/pandas/tests/series/test_cumulative.py +++ b/pandas/tests/series/test_cumulative.py @@ -38,10 +38,7 @@ def test_cumprod(self, datetime_series): _check_accum_op("cumprod", datetime_series) def test_cummin(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummin().values, - np.minimum.accumulate(np.array(datetime_series)), - ) + tm.assert_numpy_array_equal(datetime_series.cummin().values, np.minimum.accumulate(np.array(datetime_series))) ts = datetime_series.copy() ts[::2] = np.NaN result = ts.cummin()[1::2] @@ -51,10 +48,7 @@ def test_cummin(self, datetime_series): tm.assert_series_equal(result, expected) def test_cummax(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummax().values, - np.maximum.accumulate(np.array(datetime_series)), - ) + tm.assert_numpy_array_equal(datetime_series.cummax().values, np.maximum.accumulate(np.array(datetime_series))) ts = datetime_series.copy() ts[::2] = np.NaN result = ts.cummax()[1::2] @@ -148,12 +142,7 @@ def test_cummethods_bool(self): b = ~a c = pd.Series([False] * len(b)) d = ~c - methods = { - "cumsum": np.cumsum, - "cumprod": np.cumprod, - "cummin": np.minimum.accumulate, - "cummax": np.maximum.accumulate, - } + methods = {"cumsum": np.cumsum, "cumprod": np.cumprod, "cummin": np.minimum.accumulate, "cummax": np.maximum.accumulate} args = product((a, b, c, d), methods) for s, method in args: expected = pd.Series(methods[method](s.values)) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a2c2ae22a0b62..5b4c0fc1a83da 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -12,25 +12,11 @@ from pandas.compat.numpy import np_array_datetime64_compat import pandas.util._test_decorators as td -from pandas.core.dtypes.common import ( - is_bool_dtype, - is_complex_dtype, - is_float_dtype, - is_integer_dtype, - is_object_dtype, -) +from pandas.core.dtypes.common import (is_bool_dtype, is_complex_dtype, is_float_dtype, is_integer_dtype, is_object_dtype) from pandas.core.dtypes.dtypes import CategoricalDtype as CDT import pandas as pd -from pandas import ( - Categorical, - CategoricalIndex, - DatetimeIndex, - Index, - IntervalIndex, - Series, - Timestamp, -) +from pandas import (Categorical, CategoricalIndex, DatetimeIndex, Index, IntervalIndex, Series, Timestamp) import pandas._testing as tm import pandas.core.algorithms as algos from pandas.core.arrays import DatetimeArray @@ -179,8 +165,7 @@ def test_factorize_nan(self): [(1, 1), (1, 2), (0, 0), (1, 2, 3)], ), ([(1, 1), (1, 2), (0, 0), (1, 2)], [0, 1, 2, 1], [(1, 1), (1, 2), (0, 0)]), - ], - ) + ]) def test_factorize_tuple_list(self, data, expected_codes, expected_uniques): # GH9454 codes, uniques = pd.factorize(data) @@ -275,13 +260,9 @@ def test_deprecate_order(self): algos.factorize(data) @pytest.mark.parametrize( - "data", - [ - np.array([0, 1, 0], dtype="u8"), - np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), - np.array(["__nan__", "foo", "__nan__"], dtype="object"), - ], - ) + "data" [ + np.array([0, 1, 0], dtype="u8"), np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), np.array(["__nan__", "foo", "__nan__"], dtype="object"), + ] ) def test_parametrized_factorize_na_value_default(self, data): # arrays that include the NA default for that type, but isn't used. codes, uniques = algos.factorize(data) @@ -290,9 +271,7 @@ def test_parametrized_factorize_na_value_default(self, data): tm.assert_numpy_array_equal(codes, expected_codes) tm.assert_numpy_array_equal(uniques, expected_uniques) - @pytest.mark.parametrize( - "data, na_value", - [ + @pytest.mark.parametrize("data, na_value", [ (np.array([0, 1, 0, 2], dtype="u8"), 0), (np.array([1, 0, 1, 2], dtype="u8"), 1), (np.array([-(2 ** 63), 1, -(2 ** 63), 0], dtype="i8"), -(2 ** 63)), @@ -300,8 +279,7 @@ def test_parametrized_factorize_na_value_default(self, data): (np.array(["a", "", "a", "b"], dtype=object), "a"), (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()), (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)), - ], - ) + ] ) def test_parametrized_factorize_na_value(self, data, na_value): codes, uniques = algos.factorize_array(data, na_value=na_value) expected_uniques = data[[1, 3]] @@ -323,8 +301,7 @@ def test_parametrized_factorize_na_value(self, data, na_value): pd.array([2, 1], dtype="Int64"), ), ], - ids=["numpy_array", "extension_array"], - ) + ids=["numpy_array", "extension_array"]) def test_factorize_na_sentinel(self, sort, na_sentinel, data, uniques): codes, uniques = algos.factorize(data, sort=sort, na_sentinel=na_sentinel) if sort: @@ -352,8 +329,7 @@ def test_factorize_na_sentinel(self, sort, na_sentinel, data, uniques): np.array([0, 2, 1, 0], dtype=np.dtype("intp")), np.array(["a", "b", np.nan], dtype=object), ), - ], - ) + ] ) def test_object_factorize_na_sentinel_none( self, data, expected_codes, expected_uniques ): @@ -375,8 +351,7 @@ def test_object_factorize_na_sentinel_none( np.array([0, 2, 0, 1], dtype=np.dtype("intp")), np.array([1, 2, np.nan], dtype=np.float64), ), - ], - ) + ] ) def test_int_factorize_na_sentinel_none( self, data, expected_codes, expected_uniques ): @@ -450,21 +425,9 @@ def test_dtype_preservation(self, any_numpy_dtype): def test_datetime64_dtype_array_returned(self): # GH 9431 - expected = np_array_datetime64_compat( - [ - "2015-01-03T00:00:00.000000000+0000", - "2015-01-01T00:00:00.000000000+0000", - ], - dtype="M8[ns]", - ) + expected = np_array_datetime64_compat(["2015-01-03T00:00:00.000000000+0000", "2015-01-01T00:00:00.000000000+0000"], dtype="M8[ns]" ) - dt_index = pd.to_datetime( - [ - "2015-01-03T00:00:00.000000000", - "2015-01-01T00:00:00.000000000", - "2015-01-01T00:00:00.000000000", - ] - ) + dt_index = pd.to_datetime(["2015-01-03T00:00:00.000000000", "2015-01-01T00:00:00.000000000","2015-01-01T00:00:00.000000000" ]) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype @@ -566,38 +529,17 @@ def test_categorical(self): def test_datetime64tz_aware(self): # GH 15939 - result = Series( - Index( - [ - Timestamp("20160101", tz="US/Eastern"), - Timestamp("20160101", tz="US/Eastern"), - ] - ) - ).unique() - expected = DatetimeArray._from_sequence( - np.array([Timestamp("2016-01-01 00:00:00-0500", tz="US/Eastern")]) - ) + result = Series(Index([Timestamp("20160101", tz="US/Eastern"), Timestamp("20160101", tz="US/Eastern")])).unique() + expected = DatetimeArray._from_sequence(np.array([Timestamp("2016-01-01 00:00:00-0500", tz="US/Eastern")])) tm.assert_extension_array_equal(result, expected) - result = Index( - [ - Timestamp("20160101", tz="US/Eastern"), - Timestamp("20160101", tz="US/Eastern"), - ] - ).unique() - expected = DatetimeIndex( - ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None - ) + result = Index([Timestamp("20160101", tz="US/Eastern"),Timestamp("20160101", tz="US/Eastern")]).unique() + expected = DatetimeIndex(["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None) tm.assert_index_equal(result, expected) result = pd.unique( Series( - Index( - [ - Timestamp("20160101", tz="US/Eastern"), - Timestamp("20160101", tz="US/Eastern"), - ] - ) + Index([Timestamp("20160101", tz="US/Eastern"),Timestamp("20160101", tz="US/Eastern")]) ) ) expected = DatetimeArray._from_sequence( @@ -606,12 +548,7 @@ def test_datetime64tz_aware(self): tm.assert_extension_array_equal(result, expected) result = pd.unique( - Index( - [ - Timestamp("20160101", tz="US/Eastern"), - Timestamp("20160101", tz="US/Eastern"), - ] - ) + Index([Timestamp("20160101", tz="US/Eastern"), Timestamp("20160101", tz="US/Eastern")]) ) expected = DatetimeIndex( ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None @@ -634,10 +571,7 @@ def test_order_of_appearance(self): result = pd.unique( Index( - [ - Timestamp("20160101", tz="US/Eastern"), - Timestamp("20160101", tz="US/Eastern"), - ] + [Timestamp("20160101", tz="US/Eastern"), Timestamp("20160101", tz="US/Eastern") ] ) ) expected = DatetimeIndex( @@ -653,13 +587,7 @@ def test_order_of_appearance(self): expected = Categorical(list("abc")) tm.assert_categorical_equal(result, expected) - @pytest.mark.parametrize( - "arg ,expected", - [ - (("1", "1", "2"), np.array(["1", "2"], dtype=object)), - (("foo",), np.array(["foo"], dtype=object)), - ], - ) + @pytest.mark.parametrize("arg ,expected",[(("1", "1", "2"), np.array(["1", "2"], dtype=object)), (("foo",), np.array(["foo"], dtype=object)),]) def test_tuple_with_strings(self, arg, expected): # see GH 17108 result = pd.unique(arg) @@ -1018,21 +946,11 @@ def test_value_counts_nat(self): def test_value_counts_datetime_outofbounds(self): # GH 13663 s = Series( - [ - datetime(3000, 1, 1), - datetime(5000, 1, 1), - datetime(5000, 1, 1), - datetime(6000, 1, 1), - datetime(3000, 1, 1), - datetime(3000, 1, 1), - ] + [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1), datetime(3000, 1, 1), datetime(3000, 1, 1)] ) res = s.value_counts() - exp_index = Index( - [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], - dtype=object, - ) + exp_index = Index([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], dtype=object) exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) @@ -1058,83 +976,42 @@ def test_categorical_nans(self): s = Series(Categorical(list("aaaaabbbcc"))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() - expected = Series( - [4, 3, 2], - index=CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"]), - ) + expected = Series([4, 3, 2], index=CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = Series([4, 3, 2, 1], index=CategoricalIndex(["a", "b", "c", np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) # out of order - s = Series( - Categorical(list("aaaaabbbcc"), ordered=True, categories=["b", "a", "c"]) - ) + s = Series(Categorical(list("aaaaabbbcc"), ordered=True, categories=["b", "a", "c"])) s.iloc[1] = np.nan result = s.value_counts() - expected = Series( - [4, 3, 2], - index=CategoricalIndex( - ["a", "b", "c"], categories=["b", "a", "c"], ordered=True - ), - ) + expected = Series( [4, 3, 2], index=CategoricalIndex( ["a", "b", "c"], categories=["b", "a", "c"], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = Series( - [4, 3, 2, 1], - index=CategoricalIndex( - ["a", "b", "c", np.nan], categories=["b", "a", "c"], ordered=True - ), - ) + expected = Series([4, 3, 2, 1], index=CategoricalIndex(["a", "b", "c", np.nan], categories=["b", "a", "c"], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) def test_categorical_zeroes(self): # keep the `d` category with 0 s = Series(Categorical(list("bbbaac"), categories=list("abcd"), ordered=True)) result = s.value_counts() - expected = Series( - [3, 2, 1, 0], - index=Categorical( - ["b", "a", "c", "d"], categories=list("abcd"), ordered=True - ), - ) + expected = Series([3, 2, 1, 0], index=Categorical(["b", "a", "c", "d"], categories=list("abcd"), ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) def test_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 - tm.assert_series_equal( - Series([True, True, False]).value_counts(dropna=True), - Series([2, 1], index=[True, False]), - ) - tm.assert_series_equal( - Series([True, True, False]).value_counts(dropna=False), - Series([2, 1], index=[True, False]), - ) + tm.assert_series_equal( Series([True, True, False]).value_counts(dropna=True),Series([2, 1], index=[True, False])) + tm.assert_series_equal(Series([True, True, False]).value_counts(dropna=False), Series([2, 1], index=[True, False])) - tm.assert_series_equal( - Series([True, True, False, None]).value_counts(dropna=True), - Series([2, 1], index=[True, False]), - ) - tm.assert_series_equal( - Series([True, True, False, None]).value_counts(dropna=False), - Series([2, 1, 1], index=[True, False, np.nan]), - ) - tm.assert_series_equal( - Series([10.3, 5.0, 5.0]).value_counts(dropna=True), - Series([2, 1], index=[5.0, 10.3]), - ) - tm.assert_series_equal( - Series([10.3, 5.0, 5.0]).value_counts(dropna=False), - Series([2, 1], index=[5.0, 10.3]), - ) + tm.assert_series_equal(Series([True, True, False, None]).value_counts(dropna=True), Series([2, 1], index=[True, False])) + tm.assert_series_equal(Series([True, True, False, None]).value_counts(dropna=False), Series([2, 1, 1], index=[True, False, np.nan])) + tm.assert_series_equal(Series([10.3, 5.0, 5.0]).value_counts(dropna=True), Series([2, 1], index=[5.0, 10.3])) + tm.assert_series_equal(Series([10.3, 5.0, 5.0]).value_counts(dropna=False), Series([2, 1], index=[5.0, 10.3])) - tm.assert_series_equal( - Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True), - Series([2, 1], index=[5.0, 10.3]), - ) + tm.assert_series_equal(Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True), Series([2, 1], index=[5.0, 10.3])) # 32-bit linux has a different ordering if IS64: @@ -1219,20 +1096,7 @@ def test_duplicated_with_nas(self): [ np.array([1, 2, 1, 5, 3, 2, 4, 1, 5, 6]), np.array([1.1, 2.2, 1.1, np.nan, 3.3, 2.2, 4.4, 1.1, np.nan, 6.6]), - np.array( - [ - 1 + 1j, - 2 + 2j, - 1 + 1j, - 5 + 5j, - 3 + 3j, - 2 + 2j, - 4 + 4j, - 1 + 1j, - 5 + 5j, - 6 + 6j, - ] - ), + np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j, 2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]), np.array(["a", "b", "a", "e", "c", "b", "d", "a", "e", "f"], dtype=object), np.array( [1, 2 ** 63, 1, 3 ** 5, 10, 2 ** 63, 39, 1, 3 ** 5, 7], dtype=np.uint64 @@ -1281,38 +1145,10 @@ def test_numeric_object_likes(self, case): def test_datetime_likes(self): - dt = [ - "2011-01-01", - "2011-01-02", - "2011-01-01", - "NaT", - "2011-01-03", - "2011-01-02", - "2011-01-04", - "2011-01-01", - "NaT", - "2011-01-06", - ] - td = [ - "1 days", - "2 days", - "1 days", - "NaT", - "3 days", - "2 days", - "4 days", - "1 days", - "NaT", - "6 days", - ] + dt = ["2011-01-01", "2011-01-02", "2011-01-01", "NaT", "2011-01-03", "2011-01-02", "2011-01-04","2011-01-01","NaT", "2011-01-06"] + td = ["1 days", "2 days", "1 days", "NaT", "3 days", "2 days", "4 days", "1 days", "NaT", "6 days"] - cases = [ - np.array([Timestamp(d) for d in dt]), - np.array([Timestamp(d, tz="US/Eastern") for d in dt]), - np.array([pd.Period(d, freq="D") for d in dt]), - np.array([np.datetime64(d) for d in dt]), - np.array([pd.Timedelta(d) for d in td]), - ] + cases = [np.array([Timestamp(d) for d in dt]), np.array([Timestamp(d, tz="US/Eastern") for d in dt]), np.array([pd.Period(d, freq="D") for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] exp_first = np.array( [False, False, True, False, False, True, False, True, True, False] @@ -1333,11 +1169,7 @@ def test_datetime_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # index - for idx in [ - Index(case), - Index(case, dtype="category"), - Index(case, dtype=object), - ]: + for idx in [Index(case), Index(case, dtype="category"), Index(case, dtype=object)]: res_first = idx.duplicated(keep="first") tm.assert_numpy_array_equal(res_first, exp_first) @@ -1348,11 +1180,7 @@ def test_datetime_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # series - for s in [ - Series(case), - Series(case, dtype="category"), - Series(case, dtype=object), - ]: + for s in [Series(case), Series(case, dtype="category"), Series(case, dtype=object)]: res_first = s.duplicated(keep="first") tm.assert_series_equal(res_first, Series(exp_first)) @@ -1373,16 +1201,9 @@ def test_unique_index(self): @pytest.mark.parametrize( "arr, unique", [ - ( - [(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)], - [(0, 0), (0, 1), (1, 0), (1, 1)], - ), - ( - [("b", "c"), ("a", "b"), ("a", "b"), ("b", "c")], - [("b", "c"), ("a", "b")], - ), - ([("a", 1), ("b", 2), ("a", 3), ("a", 1)], [("a", 1), ("b", 2), ("a", 3)]), - ], + ([(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)], [(0, 0), (0, 1), (1, 0), (1, 1)]), + ([("b", "c"), ("a", "b"), ("a", "b"), ("b", "c")], [("b", "c"), ("a", "b")]),([("a", 1), ("b", 2), ("a", 3), ("a", 1)], [("a", 1), ("b", 2), ("a", 3)]) + ] ) def test_unique_tuples(self, arr, unique): # https://github.com/pandas-dev/pandas/issues/16519 @@ -1572,13 +1393,7 @@ def test_get_unique(self): @pytest.mark.parametrize("nvals", [0, 10]) # resizing to 0 is special case @pytest.mark.parametrize( "htable, uniques, dtype, safely_resizes", - [ - (ht.PyObjectHashTable, ht.ObjectVector, "object", False), - (ht.StringHashTable, ht.ObjectVector, "object", True), - (ht.Float64HashTable, ht.Float64Vector, "float64", False), - (ht.Int64HashTable, ht.Int64Vector, "int64", False), - (ht.UInt64HashTable, ht.UInt64Vector, "uint64", False), - ], + [(ht.PyObjectHashTable, ht.ObjectVector, "object", False),(ht.StringHashTable, ht.ObjectVector, "object", True), (ht.Float64HashTable, ht.Float64Vector, "float64", False), (ht.Int64HashTable, ht.Int64Vector, "int64", False),(ht.UInt64HashTable, ht.UInt64Vector, "uint64", False)] ) def test_vector_resize( self, writable, htable, uniques, dtype, safely_resizes, nvals @@ -1616,13 +1431,7 @@ def test_vector_resize( @pytest.mark.parametrize( "htable, tm_dtype", [ - (ht.PyObjectHashTable, "String"), - (ht.StringHashTable, "String"), - (ht.Float64HashTable, "Float"), - (ht.Int64HashTable, "Int"), - (ht.UInt64HashTable, "UInt"), - ], - ) + (ht.PyObjectHashTable, "String"), (ht.StringHashTable, "String"), (ht.Float64HashTable, "Float"), (ht.Int64HashTable, "Int"), (ht.UInt64HashTable, "UInt")]) def test_hashtable_unique(self, htable, tm_dtype, writable): # output of maker has guaranteed unique elements maker = getattr(tm, "make" + tm_dtype + "Index") @@ -1655,14 +1464,7 @@ def test_hashtable_unique(self, htable, tm_dtype, writable): @pytest.mark.parametrize( "htable, tm_dtype", - [ - (ht.PyObjectHashTable, "String"), - (ht.StringHashTable, "String"), - (ht.Float64HashTable, "Float"), - (ht.Int64HashTable, "Int"), - (ht.UInt64HashTable, "UInt"), - ], - ) + [(ht.PyObjectHashTable, "String"), (ht.StringHashTable, "String"), (ht.Float64HashTable, "Float"), (ht.Int64HashTable, "Int"), (ht.UInt64HashTable, "UInt")]) def test_hashtable_factorize(self, htable, tm_dtype, writable): # output of maker has guaranteed unique elements maker = getattr(tm, "make" + tm_dtype + "Index") @@ -1694,15 +1496,7 @@ def test_hashtable_factorize(self, htable, tm_dtype, writable): tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct) @pytest.mark.parametrize( - "hashtable", - [ - ht.PyObjectHashTable, - ht.StringHashTable, - ht.Float64HashTable, - ht.Int64HashTable, - ht.UInt64HashTable, - ], - ) + "hashtable", [ht.PyObjectHashTable, ht.StringHashTable, ht.Float64HashTable, ht.Int64HashTable, ht.UInt64HashTable]) def test_hashtable_large_sizehint(self, hashtable): # GH 22729 size_hint = np.iinfo(np.uint32).max + 1 @@ -1772,11 +1566,7 @@ def test_too_many_ndims(self): @pytest.mark.single @pytest.mark.high_memory - @pytest.mark.parametrize( - "values", - [np.arange(2 ** 24 + 1), np.arange(2 ** 25 + 2).reshape(2 ** 24 + 1, 2)], - ids=["1d", "2d"], - ) + @pytest.mark.parametrize("values",[np.arange(2 ** 24 + 1), np.arange(2 ** 25 + 2).reshape(2 ** 24 + 1, 2)], ids=["1d", "2d"]) def test_pct_max_many_rows(self, values): # GH 18271 result = algos.rank(values, pct=True).max() @@ -1982,10 +1772,7 @@ def test_is_lexsorted(): 0, 0, 0, - 0, - ], - dtype="int64", - ), + 0],dtype="int64"), np.array( [ 30,