From 9b8c6394255a9d2f507c5d54b9b20f908a17707f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 26 Sep 2023 23:13:13 +0200 Subject: [PATCH 01/11] add to indices_dict entry mixed-int-string --- pandas/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 62f22921f0482..4e93f23f347fe 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -632,6 +632,7 @@ def _create_mi_with_dt64tz_level(): "empty": Index([]), "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), + "mixed-int-string": Index([0, "a", 1, "b", 2, "c"]), "multi": _create_multiindex(), "repeats": Index([0, 0, 1, 1, 2, 2]), "nullable_int": Index(np.arange(100), dtype="Int64"), From 3645428766a8a8ddca0f362e43d80ba34179b9b5 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 30 Sep 2023 19:21:15 +0200 Subject: [PATCH 02/11] fix tests in test_old_base.py, test_numpy_compat.py --- pandas/core/nanops.py | 4 +++- pandas/tests/indexes/test_old_base.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e60c42a20a9af..ed4fd28f17f5e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1083,7 +1083,9 @@ def reduction( skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ): - if values.size == 0: + if values.size == 0 or not all( + isinstance(elem, type(values[0])) for elem in values[1:] + ): return _na_for_min_count(values, axis) values, mask = _get_values( diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 79dc423f12a85..e535a72c4f9b2 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -331,11 +331,19 @@ def test_argsort(self, index): if isinstance(index, CategoricalIndex): pytest.skip(f"{type(self).__name__} separately tested") + if not all( + isinstance(elem, type(index.values[0])) for elem in index.values[1:] + ): + pytest.skip("'<' not supported between instances of 'str' and 'int'") result = index.argsort() expected = np.array(index).argsort() tm.assert_numpy_array_equal(result, expected, check_dtype=False) def test_numpy_argsort(self, index): + if not all( + isinstance(elem, type(index.values[0])) for elem in index.values[1:] + ): + pytest.skip("'<' not supported between instances of 'str' and 'int'") result = np.argsort(index) expected = index.argsort() tm.assert_numpy_array_equal(result, expected) From 6b0e2c28c9d2c2280c0c8e427ecd597fb202cfaa Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 9 Oct 2023 18:40:42 +0200 Subject: [PATCH 03/11] add except TypeError to def nargsort, fix tests --- pandas/core/sorting.py | 8 ++- pandas/tests/base/test_misc.py | 5 ++ pandas/tests/base/test_value_counts.py | 25 ++++++-- pandas/tests/indexes/multi/test_setops.py | 5 +- pandas/tests/indexes/test_common.py | 34 +++++++---- pandas/tests/indexes/test_setops.py | 70 ++++++++++++++++------- pandas/tests/test_algos.py | 5 ++ 7 files changed, 113 insertions(+), 39 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index d96fc02e16d0d..a49fcdfecc989 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -436,7 +436,13 @@ def nargsort( if not ascending: non_nans = non_nans[::-1] non_nan_idx = non_nan_idx[::-1] - indexer = non_nan_idx[non_nans.argsort(kind=kind)] + # GH#54072 + # argsort does not support mixed int/string Index + try: + indexer = non_nan_idx[non_nans.argsort(kind=kind)] + except TypeError as err: + msg = "'<' not supported between " + raise TypeError(msg) from err if not ascending: indexer = indexer[::-1] # Finally, place the NaNs at the end or the beginning according to diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 3ca53c4010449..d859ab2676ddb 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -151,6 +151,11 @@ def test_searchsorted(request, index_or_series_obj): # comparison semantics https://github.com/numpy/numpy/issues/15981 mark = pytest.mark.xfail(reason="complex objects are not comparable") request.node.add_marker(mark) + elif not all( + isinstance(elem, type(index_or_series_obj.values[0])) + for elem in index_or_series_obj.values[1:] + ): + pytest.skip("'>' not supported between instances of 'str' and 'int'") max_obj = max(obj, default=0) index = np.searchsorted(obj, max_obj) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 3cdfb7fe41e92..02c9aa70074fa 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -46,12 +46,20 @@ def test_value_counts(index_or_series_obj): # i.e IntegerDtype expected = expected.astype("Int64") - # TODO(GH#32514): Order of entries with the same count is inconsistent - # on CI (gh-32449) - if obj.duplicated().any(): - result = result.sort_index() - expected = expected.sort_index() - tm.assert_series_equal(result, expected) + if not all( + isinstance(elem, type(index_or_series_obj.values[0])) + for elem in index_or_series_obj.values[1:] + ): + msg = "'<' not supported between " + with pytest.raises(TypeError, match=msg): + result.sort_index() + else: + # TODO(GH#32514): Order of entries with the same count is inco8nsistent + # on CI (gh-32449) + if obj.duplicated().any(): + result = result.sort_index() + expected = expected.sort_index() + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("null_obj", [np.nan, None]) @@ -66,6 +74,11 @@ def test_value_counts_null(null_obj, index_or_series_obj): pytest.skip("Test doesn't make sense on empty data") elif isinstance(orig, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") + elif not all( + isinstance(elem, type(index_or_series_obj.values[0])) + for elem in index_or_series_obj.values[1:] + ): + pytest.skip("'<' not supported between instances of 'str' and 'int'") values = obj._values values[0:2] = null_obj diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index c951403fb2654..505f30b149e62 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -631,7 +631,10 @@ def test_union_duplicates(index, request): values = index.unique().values.tolist() mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) - result = mi2.union(mi1) + if not all(isinstance(elem, type(values[0])) for elem in values[1:]): + pytest.skip("'<' not supported between instances of 'str' and 'int'") + else: + result = mi2.union(mi1) expected = mi2.sort_values() tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 6245a129afedc..d7e6e0008cbe3 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -448,8 +448,14 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): - with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): - index_with_missing.sort_values(na_position=na_position) + if not all( + isinstance(sub, type(index_with_missing.values[0])) + for sub in index_with_missing.values[1:] + ): + pytest.skip("'<' not supported between instances of 'str' and 'int'") + else: + with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): + index_with_missing.sort_values(na_position=na_position) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @@ -467,17 +473,25 @@ def test_sort_values_with_missing(index_with_missing, na_position, request): missing_count = np.sum(index_with_missing.isna()) not_na_vals = index_with_missing[index_with_missing.notna()].values - sorted_values = np.sort(not_na_vals) - if na_position == "first": - sorted_values = np.concatenate([[None] * missing_count, sorted_values]) + if not all(isinstance(sub, type(not_na_vals[0])) for sub in not_na_vals[1:]): + with pytest.raises( + TypeError, match="'<' not supported between instances of 'int' and 'str'" + ): + np.sort(not_na_vals) else: - sorted_values = np.concatenate([sorted_values, [None] * missing_count]) + sorted_values = np.sort(not_na_vals) + if na_position == "first": + sorted_values = np.concatenate([[None] * missing_count, sorted_values]) + else: + sorted_values = np.concatenate([sorted_values, [None] * missing_count]) - # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray - expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype) + # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray + expected = type(index_with_missing)( + sorted_values, dtype=index_with_missing.dtype + ) - result = index_with_missing.sort_values(na_position=na_position) - tm.assert_index_equal(result, expected) + result = index_with_missing.sort_values(na_position=na_position) + tm.assert_index_equal(result, expected) def test_ndarray_compat_properties(index): diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index d6304774b87c4..98738f50d8ac2 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -33,9 +33,13 @@ def test_union_same_types(index): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory - idx1 = index.sort_values() - idx2 = index.sort_values() - assert idx1.union(idx2).dtype == idx1.dtype + if not all(isinstance(elem, type(index.values[0])) for elem in index.values[1:]): + with pytest.raises(TypeError, match="'<' not supported between "): + index.sort_values() + else: + idx1 = index.sort_values() + idx2 = index.sort_values() + assert idx1.union(idx2).dtype == idx1.dtype def test_union_different_types(index_flat, index_flat2, request): @@ -97,19 +101,25 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index - idx1 = idx1.sort_values() - idx2 = idx2.sort_values() - - with tm.assert_produces_warning(warn, match=msg): - res1 = idx1.union(idx2) - res2 = idx2.union(idx1) - - if any_uint64 and (idx1_signed or idx2_signed): - assert res1.dtype == np.dtype("O") - assert res2.dtype == np.dtype("O") + if not all( + isinstance(elem, type(idx1.values[0])) for elem in idx1.values[1:] + ) or not all(isinstance(elem, type(idx2.values[0])) for elem in idx2.values[1:]): + with pytest.raises(TypeError, match="'<' not supported between "): + idx1.sort_values() + idx2.sort_values() else: - assert res1.dtype == common_dtype - assert res2.dtype == common_dtype + idx1 = idx1.sort_values() + idx2 = idx2.sort_values() + with tm.assert_produces_warning(warn, match=msg): + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + + if any_uint64 and (idx1_signed or idx2_signed): + assert res1.dtype == np.dtype("O") + assert res2.dtype == np.dtype("O") + else: + assert res1.dtype == common_dtype + assert res2.dtype == common_dtype @pytest.mark.parametrize( @@ -369,9 +379,18 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): # test copy.union(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - union = first.union(second).sort_values() - expected = index.set_names(expected_name).sort_values() - tm.assert_index_equal(union, expected) + if not all( + isinstance(elem, type(second.values[0])) for elem in second.values[1:] + ): + with pytest.raises( + TypeError, + match="'<' not supported between ", + ): + first.union(second).sort_values() + else: + union = first.union(second).sort_values() + expected = index.set_names(expected_name).sort_values() + tm.assert_index_equal(union, expected) @pytest.mark.parametrize( "fname, sname, expected_name", @@ -436,9 +455,18 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): # test copy.intersection(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - intersect = first.intersection(second).sort_values() - expected = index[1:].set_names(expected_name).sort_values() - tm.assert_index_equal(intersect, expected) + if not all( + isinstance(elem, type(index.values[0])) for elem in index.values[1:] + ): + with pytest.raises( + TypeError, + match="'<' not supported between ", + ): + first.intersection(second).sort_values() + else: + intersect = first.intersection(second).sort_values() + expected = index[1:].set_names(expected_name).sort_values() + tm.assert_index_equal(intersect, expected) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_intersection_name_retention_with_nameless(self, index): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 661290fb00d13..ad30375286afd 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -70,6 +70,11 @@ def test_factorize(self, index_or_series_obj, sort): expected_uniques = expected_uniques.astype(object) if sort: + if not all( + isinstance(elem, type(expected_uniques.values[0])) + for elem in expected_uniques.values[1:] + ): + pytest.skip("'<' not supported between instances of 'str' and 'int'") expected_uniques = expected_uniques.sort_values() # construct an integer ndarray so that From 533b4a2b99d45acfe8221ef75315571b2c1fb6a6 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 10 Oct 2023 01:09:48 +0200 Subject: [PATCH 04/11] fix tests --- pandas/core/nanops.py | 4 +- pandas/tests/base/test_misc.py | 12 ++++-- pandas/tests/base/test_value_counts.py | 12 +++--- pandas/tests/indexes/multi/test_setops.py | 2 +- pandas/tests/indexes/test_common.py | 12 +++--- pandas/tests/indexes/test_numpy_compat.py | 6 +++ pandas/tests/indexes/test_old_base.py | 46 +++++++++++++++-------- pandas/tests/indexes/test_setops.py | 27 +++++++++---- pandas/tests/test_algos.py | 7 ++-- 9 files changed, 82 insertions(+), 46 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index ed4fd28f17f5e..e60c42a20a9af 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1083,9 +1083,7 @@ def reduction( skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ): - if values.size == 0 or not all( - isinstance(elem, type(values[0])) for elem in values[1:] - ): + if values.size == 0: return _na_for_min_count(values, axis) values, mask = _get_values( diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index d859ab2676ddb..334e5a760cecc 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -151,11 +151,15 @@ def test_searchsorted(request, index_or_series_obj): # comparison semantics https://github.com/numpy/numpy/issues/15981 mark = pytest.mark.xfail(reason="complex objects are not comparable") request.node.add_marker(mark) - elif not all( - isinstance(elem, type(index_or_series_obj.values[0])) - for elem in index_or_series_obj.values[1:] + elif any(isinstance(elem, int) for elem in obj.values[:]) and any( + isinstance(elem, str) for elem in obj.values[:] ): - pytest.skip("'>' not supported between instances of 'str' and 'int'") + with pytest.raises( + TypeError, match="'>' not supported between instances of 'str' and 'int'" + ): + max_obj = max(obj, default=0) + index = np.searchsorted(obj, max_obj) + return max_obj = max(obj, default=0) index = np.searchsorted(obj, max_obj) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 02c9aa70074fa..f2175ad6b4f6d 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -46,9 +46,10 @@ def test_value_counts(index_or_series_obj): # i.e IntegerDtype expected = expected.astype("Int64") - if not all( - isinstance(elem, type(index_or_series_obj.values[0])) - for elem in index_or_series_obj.values[1:] + if ( + len(obj) > 0 + and isinstance(obj.values[0], int) + and isinstance(obj.values[1], str) ): msg = "'<' not supported between " with pytest.raises(TypeError, match=msg): @@ -74,9 +75,8 @@ def test_value_counts_null(null_obj, index_or_series_obj): pytest.skip("Test doesn't make sense on empty data") elif isinstance(orig, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") - elif not all( - isinstance(elem, type(index_or_series_obj.values[0])) - for elem in index_or_series_obj.values[1:] + elif any(isinstance(elem, int) for elem in orig.values[:]) and any( + isinstance(elem, str) for elem in orig.values[:] ): pytest.skip("'<' not supported between instances of 'str' and 'int'") diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 505f30b149e62..7a635d8ba4c01 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -631,7 +631,7 @@ def test_union_duplicates(index, request): values = index.unique().values.tolist() mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) - if not all(isinstance(elem, type(values[0])) for elem in values[1:]): + if isinstance(index.values[0], int) and isinstance(index.values[1], str): pytest.skip("'<' not supported between instances of 'str' and 'int'") else: result = mi2.union(mi1) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index d7e6e0008cbe3..28f4cff4d1fad 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -448,11 +448,11 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): - if not all( - isinstance(sub, type(index_with_missing.values[0])) - for sub in index_with_missing.values[1:] + if any(isinstance(elem, int) for elem in index_with_missing.values[:]) and any( + isinstance(elem, str) for elem in index_with_missing.values[:] ): - pytest.skip("'<' not supported between instances of 'str' and 'int'") + with pytest.raises(TypeError, match="'<' not supported between "): + index_with_missing.sort_values(na_position=na_position) else: with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): index_with_missing.sort_values(na_position=na_position) @@ -473,7 +473,9 @@ def test_sort_values_with_missing(index_with_missing, na_position, request): missing_count = np.sum(index_with_missing.isna()) not_na_vals = index_with_missing[index_with_missing.notna()].values - if not all(isinstance(sub, type(not_na_vals[0])) for sub in not_na_vals[1:]): + if any(isinstance(elem, int) for elem in index_with_missing.values[:]) and any( + isinstance(elem, str) for elem in index_with_missing.values[:] + ): with pytest.raises( TypeError, match="'<' not supported between instances of 'int' and 'str'" ): diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index ace78d77350cb..dcfc1fe916b35 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -160,6 +160,12 @@ def test_numpy_ufuncs_reductions(index, func, request): with pytest.raises(TypeError, match="is not ordered for"): func.reduce(index) return + elif isinstance(index.values[0], int) and isinstance(index.values[1], str): + with pytest.raises( + TypeError, match=".* not supported between instances of 'int' and 'str'" + ): + func.reduce(index) + return else: result = func.reduce(index) diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index e535a72c4f9b2..f68f7f99d626c 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -331,26 +331,40 @@ def test_argsort(self, index): if isinstance(index, CategoricalIndex): pytest.skip(f"{type(self).__name__} separately tested") - if not all( - isinstance(elem, type(index.values[0])) for elem in index.values[1:] + if ( + len(index.values) > 0 + and isinstance(index.values[0], int) + and isinstance(index.values[1], str) ): - pytest.skip("'<' not supported between instances of 'str' and 'int'") - result = index.argsort() - expected = np.array(index).argsort() - tm.assert_numpy_array_equal(result, expected, check_dtype=False) + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'str' and 'int'", + ): + index.argsort() + else: + result = index.argsort() + expected = np.array(index).argsort() + tm.assert_numpy_array_equal(result, expected, check_dtype=False) def test_numpy_argsort(self, index): - if not all( - isinstance(elem, type(index.values[0])) for elem in index.values[1:] + if ( + len(index.values) > 0 + and isinstance(index.values[0], int) + and isinstance(index.values[1], str) ): - pytest.skip("'<' not supported between instances of 'str' and 'int'") - result = np.argsort(index) - expected = index.argsort() - tm.assert_numpy_array_equal(result, expected) - - result = np.argsort(index, kind="mergesort") - expected = index.argsort(kind="mergesort") - tm.assert_numpy_array_equal(result, expected) + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'str' and 'int'", + ): + np.argsort(index) + else: + result = np.argsort(index) + expected = index.argsort() + tm.assert_numpy_array_equal(result, expected) + + result = np.argsort(index, kind="mergesort") + expected = index.argsort(kind="mergesort") + tm.assert_numpy_array_equal(result, expected) # these are the only two types that perform # pandas compatibility input validation - the diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 98738f50d8ac2..9a1ce944fc820 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -33,7 +33,11 @@ def test_union_same_types(index): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory - if not all(isinstance(elem, type(index.values[0])) for elem in index.values[1:]): + if ( + len(index.values) > 0 + and isinstance(index.values[0], int) + and isinstance(index.values[1], str) + ): with pytest.raises(TypeError, match="'<' not supported between "): index.sort_values() else: @@ -101,9 +105,14 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index - if not all( - isinstance(elem, type(idx1.values[0])) for elem in idx1.values[1:] - ) or not all(isinstance(elem, type(idx2.values[0])) for elem in idx2.values[1:]): + if ( + len(idx1.values) > 0 + and isinstance(idx1.values[0], int) + and isinstance(idx1.values[1], str) + or len(idx2.values) > 0 + and isinstance(idx2.values[0], int) + and isinstance(idx2.values[1], str) + ): with pytest.raises(TypeError, match="'<' not supported between "): idx1.sort_values() idx2.sort_values() @@ -379,8 +388,8 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): # test copy.union(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - if not all( - isinstance(elem, type(second.values[0])) for elem in second.values[1:] + if any(isinstance(elem, int) for elem in second.values[:]) and any( + isinstance(elem, str) for elem in second.values[:] ): with pytest.raises( TypeError, @@ -455,8 +464,10 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): # test copy.intersection(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - if not all( - isinstance(elem, type(index.values[0])) for elem in index.values[1:] + if ( + len(index.values) > 0 + and isinstance(index.values[0], int) + and isinstance(index.values[1], str) ): with pytest.raises( TypeError, diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ad30375286afd..02ccf3f4cf226 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -70,9 +70,10 @@ def test_factorize(self, index_or_series_obj, sort): expected_uniques = expected_uniques.astype(object) if sort: - if not all( - isinstance(elem, type(expected_uniques.values[0])) - for elem in expected_uniques.values[1:] + if ( + len(expected_uniques.values) > 0 + and isinstance(expected_uniques.values[0], int) + and isinstance(expected_uniques.values[1], str) ): pytest.skip("'<' not supported between instances of 'str' and 'int'") expected_uniques = expected_uniques.sort_values() From 4ad1bc42cadf6b8bd8753e117b3eb7b42441996f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 13 Oct 2023 12:34:42 +0200 Subject: [PATCH 05/11] correct the error msg, add tm.assert_mixed_int_string_entry, correct tests --- pandas/_testing/__init__.py | 2 ++ pandas/_testing/asserters.py | 17 +++++++++ pandas/core/sorting.py | 2 +- pandas/tests/base/test_misc.py | 5 ++- pandas/tests/base/test_value_counts.py | 14 +++----- pandas/tests/indexes/multi/test_setops.py | 3 +- pandas/tests/indexes/test_common.py | 14 ++++---- pandas/tests/indexes/test_numpy_compat.py | 3 +- pandas/tests/indexes/test_old_base.py | 14 +++----- pandas/tests/indexes/test_setops.py | 42 +++++++++-------------- pandas/tests/test_algos.py | 7 ++-- 11 files changed, 61 insertions(+), 62 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 73835252c0329..c6309b9af6793 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -76,6 +76,7 @@ assert_is_sorted, assert_is_valid_plot_return_object, assert_metadata_equivalent, + assert_mixed_int_string_entry, assert_numpy_array_equal, assert_period_array_equal, assert_series_equal, @@ -1096,6 +1097,7 @@ def shares_memory(left, right) -> bool: "assert_produces_warning", "assert_series_equal", "assert_sp_array_equal", + "assert_mixed_int_string_entry", "assert_timedelta_array_equal", "at", "BOOL_DTYPES", diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index d4e7e196dc2d4..1f3a803bbba96 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1364,3 +1364,20 @@ def assert_metadata_equivalent( assert val is None else: assert val == getattr(right, attr, None) + + +def assert_mixed_int_string_entry(arr) -> bool: + """ + Check that arr is mixed-int-string entry. + """ + # we might have an entry [0, "a", 1, "b", 2, "c"] with duplicates + # or with None or without the first element + if len(arr) < 3: + return False + else: + if isinstance(arr[0], int): + return isinstance(arr[1], str) + elif isinstance(arr[0], str): + return isinstance(arr[1], int) + else: + return isinstance(arr[2], int) and isinstance(arr[3], str) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 17ef800c5b0ba..bd7442bcd5d3a 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -441,7 +441,7 @@ def nargsort( try: indexer = non_nan_idx[non_nans.argsort(kind=kind)] except TypeError as err: - msg = "'<' not supported between " + msg = "'<' not supported between instances of 'int' and 'str'" raise TypeError(msg) from err if not ascending: indexer = indexer[::-1] diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 334e5a760cecc..410103543ea78 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -151,9 +151,8 @@ def test_searchsorted(request, index_or_series_obj): # comparison semantics https://github.com/numpy/numpy/issues/15981 mark = pytest.mark.xfail(reason="complex objects are not comparable") request.node.add_marker(mark) - elif any(isinstance(elem, int) for elem in obj.values[:]) and any( - isinstance(elem, str) for elem in obj.values[:] - ): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(obj.values): with pytest.raises( TypeError, match="'>' not supported between instances of 'str' and 'int'" ): diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index f2175ad6b4f6d..8c21441ccd023 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -46,12 +46,9 @@ def test_value_counts(index_or_series_obj): # i.e IntegerDtype expected = expected.astype("Int64") - if ( - len(obj) > 0 - and isinstance(obj.values[0], int) - and isinstance(obj.values[1], str) - ): - msg = "'<' not supported between " + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(obj.values): + msg = "'<' not supported between instances of 'int' and 'str'" with pytest.raises(TypeError, match=msg): result.sort_index() else: @@ -75,9 +72,8 @@ def test_value_counts_null(null_obj, index_or_series_obj): pytest.skip("Test doesn't make sense on empty data") elif isinstance(orig, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") - elif any(isinstance(elem, int) for elem in orig.values[:]) and any( - isinstance(elem, str) for elem in orig.values[:] - ): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(orig.values): pytest.skip("'<' not supported between instances of 'str' and 'int'") values = obj._values diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 7a635d8ba4c01..d837881937d38 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -631,7 +631,8 @@ def test_union_duplicates(index, request): values = index.unique().values.tolist() mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) - if isinstance(index.values[0], int) and isinstance(index.values[1], str): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(index.values): pytest.skip("'<' not supported between instances of 'str' and 'int'") else: result = mi2.union(mi1) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 28f4cff4d1fad..006ce936dfe75 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -448,10 +448,11 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): - if any(isinstance(elem, int) for elem in index_with_missing.values[:]) and any( - isinstance(elem, str) for elem in index_with_missing.values[:] - ): - with pytest.raises(TypeError, match="'<' not supported between "): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(index_with_missing.values): + with pytest.raises( + TypeError, match="'<' not supported between instances of 'int' and 'str'" + ): index_with_missing.sort_values(na_position=na_position) else: with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): @@ -473,9 +474,8 @@ def test_sort_values_with_missing(index_with_missing, na_position, request): missing_count = np.sum(index_with_missing.isna()) not_na_vals = index_with_missing[index_with_missing.notna()].values - if any(isinstance(elem, int) for elem in index_with_missing.values[:]) and any( - isinstance(elem, str) for elem in index_with_missing.values[:] - ): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(index_with_missing.values): with pytest.raises( TypeError, match="'<' not supported between instances of 'int' and 'str'" ): diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index dcfc1fe916b35..9874dfdf0561a 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -160,7 +160,8 @@ def test_numpy_ufuncs_reductions(index, func, request): with pytest.raises(TypeError, match="is not ordered for"): func.reduce(index) return - elif isinstance(index.values[0], int) and isinstance(index.values[1], str): + # This check is written for the mixed-int-string entry + elif tm.assert_mixed_int_string_entry(index.values): with pytest.raises( TypeError, match=".* not supported between instances of 'int' and 'str'" ): diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index f68f7f99d626c..8ce950b724c5e 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -331,11 +331,8 @@ def test_argsort(self, index): if isinstance(index, CategoricalIndex): pytest.skip(f"{type(self).__name__} separately tested") - if ( - len(index.values) > 0 - and isinstance(index.values[0], int) - and isinstance(index.values[1], str) - ): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(index.values): with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", @@ -347,11 +344,8 @@ def test_argsort(self, index): tm.assert_numpy_array_equal(result, expected, check_dtype=False) def test_numpy_argsort(self, index): - if ( - len(index.values) > 0 - and isinstance(index.values[0], int) - and isinstance(index.values[1], str) - ): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(index.values): with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 9a1ce944fc820..fe107e9050c79 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -33,12 +33,11 @@ def test_union_same_types(index): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory - if ( - len(index.values) > 0 - and isinstance(index.values[0], int) - and isinstance(index.values[1], str) - ): - with pytest.raises(TypeError, match="'<' not supported between "): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(index.values): + with pytest.raises( + TypeError, match="'<' not supported between instances of 'int' and 'str'" + ): index.sort_values() else: idx1 = index.sort_values() @@ -105,15 +104,13 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index - if ( - len(idx1.values) > 0 - and isinstance(idx1.values[0], int) - and isinstance(idx1.values[1], str) - or len(idx2.values) > 0 - and isinstance(idx2.values[0], int) - and isinstance(idx2.values[1], str) - ): - with pytest.raises(TypeError, match="'<' not supported between "): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry( + idx1.values + ) or tm.assert_mixed_int_string_entry(idx2.values): + with pytest.raises( + TypeError, match="'<' not supported between instances of 'int' and 'str'" + ): idx1.sort_values() idx2.sort_values() else: @@ -388,12 +385,11 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): # test copy.union(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - if any(isinstance(elem, int) for elem in second.values[:]) and any( - isinstance(elem, str) for elem in second.values[:] - ): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(second.values): with pytest.raises( TypeError, - match="'<' not supported between ", + match="'<' not supported between instances of 'int' and 'str'", ): first.union(second).sort_values() else: @@ -464,14 +460,10 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): # test copy.intersection(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - if ( - len(index.values) > 0 - and isinstance(index.values[0], int) - and isinstance(index.values[1], str) - ): + if tm.assert_mixed_int_string_entry(index.values): with pytest.raises( TypeError, - match="'<' not supported between ", + match="'<' not supported between instances of 'int' and 'str'", ): first.intersection(second).sort_values() else: diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 02ccf3f4cf226..64fe5c94b744a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -70,11 +70,8 @@ def test_factorize(self, index_or_series_obj, sort): expected_uniques = expected_uniques.astype(object) if sort: - if ( - len(expected_uniques.values) > 0 - and isinstance(expected_uniques.values[0], int) - and isinstance(expected_uniques.values[1], str) - ): + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(expected_uniques.values): pytest.skip("'<' not supported between instances of 'str' and 'int'") expected_uniques = expected_uniques.sort_values() From 2d67a27863762b142560330059d492a606056b11 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 27 Nov 2023 20:20:52 +0100 Subject: [PATCH 06/11] fix tests --- pandas/tests/indexes/test_setops.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index b17a7b10413c0..daeeaa8eaadbe 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -270,7 +270,15 @@ def test_union_base(self, index): everything = index union = first.union(second) - tm.assert_index_equal(union.sort_values(), everything.sort_values()) + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(index.values): + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'int' and 'str'", + ): + tm.assert_index_equal(union.sort_values(), everything.sort_values()) + else: + tm.assert_index_equal(union.sort_values(), everything.sort_values()) if isinstance(index.dtype, DatetimeTZDtype): # The second.values below will drop tz, so the rest of this test @@ -335,7 +343,15 @@ def test_symmetric_difference(self, index): second = index[:-1] answer = index[[0, -1]] result = first.symmetric_difference(second) - tm.assert_index_equal(result.sort_values(), answer.sort_values()) + # This check is written for the mixed-int-string entry + if tm.assert_mixed_int_string_entry(index.values): + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'int' and 'str'", + ): + tm.assert_index_equal(result.sort_values(), answer.sort_values()) + else: + tm.assert_index_equal(result.sort_values(), answer.sort_values()) # GH#10149 cases = [second.to_numpy(), second.to_series(), second.to_list()] From adf49292aa60d1db85f98b065967425feac91e7b Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 11 Jan 2024 01:13:05 +0100 Subject: [PATCH 07/11] correct def nargsort, correect docs and rename assert_mixed_int_string_entry --- pandas/_testing/__init__.py | 4 ++-- pandas/_testing/asserters.py | 5 +++-- pandas/core/sorting.py | 8 +------- pandas/tests/base/test_misc.py | 2 +- pandas/tests/base/test_value_counts.py | 4 ++-- pandas/tests/indexes/multi/test_setops.py | 2 +- pandas/tests/indexes/test_common.py | 4 ++-- pandas/tests/indexes/test_numpy_compat.py | 2 +- pandas/tests/indexes/test_old_base.py | 4 ++-- pandas/tests/indexes/test_setops.py | 24 +++++++++++------------ pandas/tests/test_algos.py | 2 +- 11 files changed, 28 insertions(+), 33 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index f86d00a520b33..1c6cfd1a0b46e 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -61,7 +61,7 @@ assert_is_sorted, assert_is_valid_plot_return_object, assert_metadata_equivalent, - assert_mixed_int_string_entry, + is_mixed_int_string_entry, assert_numpy_array_equal, assert_period_array_equal, assert_series_equal, @@ -577,7 +577,7 @@ def shares_memory(left, right) -> bool: "assert_produces_warning", "assert_series_equal", "assert_sp_array_equal", - "assert_mixed_int_string_entry", + "is_mixed_int_string_entry", "assert_timedelta_array_equal", "assert_cow_warning", "at", diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 5313ae100d283..acb8550146d60 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1382,9 +1382,10 @@ def assert_metadata_equivalent( assert val == getattr(right, attr, None) -def assert_mixed_int_string_entry(arr) -> bool: +def is_mixed_int_string_entry(arr) -> bool: """ - Check that arr is mixed-int-string entry. + Check that fixtures `index`, `index_flat` and `index_with_missing` + contain arr mixed int-string array. """ # we might have an entry [0, "a", 1, "b", 2, "c"] with duplicates # or with None or without the first element diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index bc9eec70ec431..a431842218b3b 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -436,13 +436,7 @@ def nargsort( if not ascending: non_nans = non_nans[::-1] non_nan_idx = non_nan_idx[::-1] - # GH#54072 - # argsort does not support mixed int/string Index - try: - indexer = non_nan_idx[non_nans.argsort(kind=kind)] - except TypeError as err: - msg = "'<' not supported between instances of 'int' and 'str'" - raise TypeError(msg) from err + indexer = non_nan_idx[non_nans.argsort(kind=kind)] if not ascending: indexer = indexer[::-1] # Finally, place the NaNs at the end or the beginning according to diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 3c9e30ace9432..2e340d062e9ec 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -158,7 +158,7 @@ def test_searchsorted(request, index_or_series_obj): request.applymarker(mark) # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(obj.values): + if tm.is_mixed_int_string_entry(obj.values): with pytest.raises( TypeError, match="'>' not supported between instances of 'str' and 'int'" ): diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 27487aab1c221..b69f9dcdbde25 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -48,7 +48,7 @@ def test_value_counts(index_or_series_obj): expected = expected.astype("Int64") # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(obj.values): + if tm.is_mixed_int_string_entry(obj.values): msg = "'<' not supported between instances of 'int' and 'str'" with pytest.raises(TypeError, match=msg): result.sort_index() @@ -74,7 +74,7 @@ def test_value_counts_null(null_obj, index_or_series_obj): elif isinstance(orig, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(orig.values): + if tm.is_mixed_int_string_entry(orig.values): pytest.skip("'<' not supported between instances of 'str' and 'int'") values = obj._values diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 93d8084c7fd74..fe285bc12ad0b 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -634,7 +634,7 @@ def test_union_duplicates(index, request): mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(index.values): + if tm.is_mixed_int_string_entry(index.values): pytest.skip("'<' not supported between instances of 'str' and 'int'") else: result = mi2.union(mi1) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 3d94b9a6754dd..84ca4b73906c4 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -449,7 +449,7 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(index_with_missing.values): + if tm.is_mixed_int_string_entry(index_with_missing.values): with pytest.raises( TypeError, match="'<' not supported between instances of 'int' and 'str'" ): @@ -475,7 +475,7 @@ def test_sort_values_with_missing(index_with_missing, na_position, request): missing_count = np.sum(index_with_missing.isna()) not_na_vals = index_with_missing[index_with_missing.notna()].values # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(index_with_missing.values): + if tm.is_mixed_int_string_entry(index_with_missing.values): with pytest.raises( TypeError, match="'<' not supported between instances of 'int' and 'str'" ): diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 9874dfdf0561a..f579dad905067 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -161,7 +161,7 @@ def test_numpy_ufuncs_reductions(index, func, request): func.reduce(index) return # This check is written for the mixed-int-string entry - elif tm.assert_mixed_int_string_entry(index.values): + elif tm.is_mixed_int_string_entry(index.values): with pytest.raises( TypeError, match=".* not supported between instances of 'int' and 'str'" ): diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 20f8625bab808..8015f44306c0a 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -337,7 +337,7 @@ def test_argsort(self, index): pytest.skip(f"{type(self).__name__} separately tested") # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(index.values): + if tm.is_mixed_int_string_entry(index.values): with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", @@ -350,7 +350,7 @@ def test_argsort(self, index): def test_numpy_argsort(self, index): # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(index.values): + if tm.is_mixed_int_string_entry(index.values): with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index f9c53c0c9811e..6a4989b5a5311 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -60,9 +60,9 @@ def test_union_same_types(index): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(index.values): + if tm.is_mixed_int_string_entry(index.values): with pytest.raises( - TypeError, match="'<' not supported between instances of 'int' and 'str'" + TypeError, match="'<' not supported between instances of 'str' and 'int'" ): index.sort_values() else: @@ -131,11 +131,11 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry( + if tm.is_mixed_int_string_entry( idx1.values - ) or tm.assert_mixed_int_string_entry(idx2.values): + ) or tm.is_mixed_int_string_entry(idx2.values): with pytest.raises( - TypeError, match="'<' not supported between instances of 'int' and 'str'" + TypeError, match="'<' not supported between instances of 'str' and 'int'" ): idx1.sort_values() idx2.sort_values() @@ -268,10 +268,10 @@ def test_union_base(self, index): union = first.union(second) # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(index.values): + if tm.is_mixed_int_string_entry(index.values): with pytest.raises( TypeError, - match="'<' not supported between instances of 'int' and 'str'", + match="'<' not supported between instances of 'str' and 'int'", ): tm.assert_index_equal(union.sort_values(), everything.sort_values()) else: @@ -341,10 +341,10 @@ def test_symmetric_difference(self, index): answer = index[[0, -1]] result = first.symmetric_difference(second) # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(index.values): + if tm.is_mixed_int_string_entry(index.values): with pytest.raises( TypeError, - match="'<' not supported between instances of 'int' and 'str'", + match="'<' not supported between instances of 'str' and 'int'", ): tm.assert_index_equal(result.sort_values(), answer.sort_values()) else: @@ -428,10 +428,10 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): first = index.copy().set_names(fname) second = index[1:].set_names(sname) # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(second.values): + if tm.is_mixed_int_string_entry(second.values): with pytest.raises( TypeError, - match="'<' not supported between instances of 'int' and 'str'", + match="'<' not supported between instances of 'str' and 'int'", ): first.union(second).sort_values() else: @@ -504,7 +504,7 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): # test copy.intersection(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - if tm.assert_mixed_int_string_entry(index.values): + if tm.is_mixed_int_string_entry(index.values): with pytest.raises( TypeError, match="'<' not supported between instances of 'int' and 'str'", diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 0a5f814be7cc3..e8bf098da14df 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -84,7 +84,7 @@ def test_factorize(self, index_or_series_obj, sort): if sort: # This check is written for the mixed-int-string entry - if tm.assert_mixed_int_string_entry(expected_uniques.values): + if tm.is_mixed_int_string_entry(expected_uniques.values): pytest.skip("'<' not supported between instances of 'str' and 'int'") expected_uniques = expected_uniques.sort_values() From 5eef4fc1d360cc4d50183ce98c3702a2c3d8bf8d Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 11 Jan 2024 10:39:52 +0100 Subject: [PATCH 08/11] add import _testing --- pandas/tests/base/test_misc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index b9d9a60fdeb61..a185fcd0b45d7 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -17,6 +17,7 @@ Index, Series, ) +import pandas._testing as tm def test_isnull_notnull_docstrings(): From 8a487e36b3e332feed8552c02c56ce59d5bd0443 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 11 Jan 2024 10:44:32 +0100 Subject: [PATCH 09/11] fix pre-commit errors --- pandas/_testing/__init__.py | 2 +- pandas/tests/indexes/test_setops.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 1c6cfd1a0b46e..ff046cf9e3b23 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -61,12 +61,12 @@ assert_is_sorted, assert_is_valid_plot_return_object, assert_metadata_equivalent, - is_mixed_int_string_entry, assert_numpy_array_equal, assert_period_array_equal, assert_series_equal, assert_sp_array_equal, assert_timedelta_array_equal, + is_mixed_int_string_entry, raise_assert_detail, ) from pandas._testing.compat import ( diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 3959fc8761634..2755a184f38ab 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -136,9 +136,9 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry( - idx1.values - ) or tm.is_mixed_int_string_entry(idx2.values): + if tm.is_mixed_int_string_entry(idx1.values) or tm.is_mixed_int_string_entry( + idx2.values + ): with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'" ): From e7edbbe7437ad58dd7e2d73531c4c3845fe493aa Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 3 Feb 2024 00:36:14 +0100 Subject: [PATCH 10/11] fix pre-commit error --- pandas/_testing/asserters.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 81b9a0d9b1550..6191749fccfde 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1443,10 +1443,9 @@ def is_mixed_int_string_entry(arr) -> bool: # or with None or without the first element if len(arr) < 3: return False + elif isinstance(arr[0], int): + return isinstance(arr[1], str) + elif isinstance(arr[0], str): + return isinstance(arr[1], int) else: - if isinstance(arr[0], int): - return isinstance(arr[1], str) - elif isinstance(arr[0], str): - return isinstance(arr[1], int) - else: - return isinstance(arr[2], int) and isinstance(arr[3], str) + return isinstance(arr[2], int) and isinstance(arr[3], str) From 82ad988ab0132cdcc8aade9478d74152fbf365ae Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 16 Mar 2024 01:12:20 +0100 Subject: [PATCH 11/11] replace is_mixed_int_string_entry with request.node.callspec.id, correct tests --- pandas/_testing/__init__.py | 2 -- pandas/_testing/asserters.py | 17 --------- pandas/tests/base/test_misc.py | 10 ++---- pandas/tests/base/test_value_counts.py | 8 ++--- pandas/tests/indexes/multi/test_setops.py | 6 ++-- pandas/tests/indexes/test_common.py | 7 ++-- pandas/tests/indexes/test_numpy_compat.py | 5 ++- pandas/tests/indexes/test_old_base.py | 8 ++--- pandas/tests/indexes/test_setops.py | 43 ++++++++++++++++------- pandas/tests/test_algos.py | 4 +-- 10 files changed, 53 insertions(+), 57 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 4168798e00924..12395b42bba19 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -65,7 +65,6 @@ assert_series_equal, assert_sp_array_equal, assert_timedelta_array_equal, - is_mixed_int_string_entry, raise_assert_detail, ) from pandas._testing.compat import ( @@ -573,7 +572,6 @@ def shares_memory(left, right) -> bool: "assert_produces_warning", "assert_series_equal", "assert_sp_array_equal", - "is_mixed_int_string_entry", "assert_timedelta_array_equal", "at", "BOOL_DTYPES", diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 481935eed09d0..3aacd3099c334 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1433,20 +1433,3 @@ def assert_metadata_equivalent( assert val is None else: assert val == getattr(right, attr, None) - - -def is_mixed_int_string_entry(arr) -> bool: - """ - Check that fixtures `index`, `index_flat` and `index_with_missing` - contain arr mixed int-string array. - """ - # we might have an entry [0, "a", 1, "b", 2, "c"] with duplicates - # or with None or without the first element - if len(arr) < 3: - return False - elif isinstance(arr[0], int): - return isinstance(arr[1], str) - elif isinstance(arr[0], str): - return isinstance(arr[1], int) - else: - return isinstance(arr[2], int) and isinstance(arr[3], str) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index a185fcd0b45d7..d0d7759a09a9c 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -17,7 +17,6 @@ Index, Series, ) -import pandas._testing as tm def test_isnull_notnull_docstrings(): @@ -162,13 +161,8 @@ def test_searchsorted(request, index_or_series_obj): request.applymarker(mark) # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(obj.values): - with pytest.raises( - TypeError, match="'>' not supported between instances of 'str' and 'int'" - ): - max_obj = max(obj, default=0) - index = np.searchsorted(obj, max_obj) - return + if request.node.callspec.id == "mixed-int-string": + pytest.skip("'>' not supported between instances of 'str' and 'int'") max_obj = max(obj, default=0) index = np.searchsorted(obj, max_obj) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index a42059fa66de4..0948cd2063a3d 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -21,7 +21,7 @@ @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") -def test_value_counts(index_or_series_obj): +def test_value_counts(index_or_series_obj, request): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) result = obj.value_counts() @@ -48,7 +48,7 @@ def test_value_counts(index_or_series_obj): expected = expected.astype("Int64") # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(obj.values): + if request.node.callspec.id == "mixed-int-string": msg = "'<' not supported between instances of 'int' and 'str'" with pytest.raises(TypeError, match=msg): result.sort_index() @@ -63,7 +63,7 @@ def test_value_counts(index_or_series_obj): @pytest.mark.parametrize("null_obj", [np.nan, None]) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") -def test_value_counts_null(null_obj, index_or_series_obj): +def test_value_counts_null(null_obj, index_or_series_obj, request): orig = index_or_series_obj obj = orig.copy() @@ -74,7 +74,7 @@ def test_value_counts_null(null_obj, index_or_series_obj): elif isinstance(orig, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(orig.values): + if request.node.callspec.id in ["mixed-int-string-nan", "mixed-int-string-None"]: pytest.skip("'<' not supported between instances of 'str' and 'int'") values = obj._values diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 4c9ad79a520b8..6fb2daab3e3f8 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -634,10 +634,10 @@ def test_union_duplicates(index, request): mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(index.values): + if request.node.callspec.id == "mixed-int-string": pytest.skip("'<' not supported between instances of 'str' and 'int'") - else: - result = mi2.union(mi1) + + result = mi2.union(mi1) expected = mi2.sort_values() tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 52fdbe6ce6bae..3fd71332c2bcd 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -446,9 +446,9 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @pytest.mark.parametrize("na_position", [None, "middle"]) -def test_sort_values_invalid_na_position(index_with_missing, na_position): +def test_sort_values_invalid_na_position(index_with_missing, na_position, request): # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(index_with_missing.values): + if request.node.callspec.id in ["mixed-int-string-None", "mixed-int-string-middle"]: with pytest.raises( TypeError, match="'<' not supported between instances of 'int' and 'str'" ): @@ -474,7 +474,8 @@ def test_sort_values_with_missing(index_with_missing, na_position, request): missing_count = np.sum(index_with_missing.isna()) not_na_vals = index_with_missing[index_with_missing.notna()].values # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(index_with_missing.values): + + if request.node.callspec.id in ["mixed-int-string-first", "mixed-int-string-last"]: with pytest.raises( TypeError, match="'<' not supported between instances of 'int' and 'str'" ): diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index f579dad905067..e4e8797a82136 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -161,7 +161,10 @@ def test_numpy_ufuncs_reductions(index, func, request): func.reduce(index) return # This check is written for the mixed-int-string entry - elif tm.is_mixed_int_string_entry(index.values): + elif request.node.callspec.id in [ + "mixed-int-string-maximum", + "mixed-int-string-minimum", + ]: with pytest.raises( TypeError, match=".* not supported between instances of 'int' and 'str'" ): diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 5d8e82163f1a0..191e5b700f03b 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -331,12 +331,12 @@ def test_memory_usage(self, index): if index.inferred_type == "object": assert result3 > result2 - def test_argsort(self, index): + def test_argsort(self, index, request): if isinstance(index, CategoricalIndex): pytest.skip(f"{type(self).__name__} separately tested") # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(index.values): + if request.node.callspec.id == "mixed-int-string": with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", @@ -347,9 +347,9 @@ def test_argsort(self, index): expected = np.array(index).argsort() tm.assert_numpy_array_equal(result, expected, check_dtype=False) - def test_numpy_argsort(self, index): + def test_numpy_argsort(self, index, request): # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(index.values): + if request.node.callspec.id == "mixed-int-string": with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 78edc103c632f..02a084026aac4 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -62,11 +62,11 @@ def index_flat2(index_flat): return index_flat -def test_union_same_types(index): +def test_union_same_types(index, request): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(index.values): + if request.node.callspec.id == "mixed-int-string": with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'" ): @@ -137,9 +137,7 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(idx1.values) or tm.is_mixed_int_string_entry( - idx2.values - ): + if request.node.callspec.id == "mixed-int-string": with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'" ): @@ -266,7 +264,7 @@ def test_intersection_base(self, index): "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning" ) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") - def test_union_base(self, index): + def test_union_base(self, index, request): index = index.unique() first = index[3:] second = index[:5] @@ -274,7 +272,7 @@ def test_union_base(self, index): union = first.union(second) # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(index.values): + if request.node.callspec.id == "mixed-int-string": with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", @@ -332,7 +330,7 @@ def test_difference_base(self, sort, index): @pytest.mark.filterwarnings( "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning" ) - def test_symmetric_difference(self, index): + def test_symmetric_difference(self, index, request): if isinstance(index, CategoricalIndex): pytest.skip(f"Not relevant for {type(index).__name__}") if len(index) < 2: @@ -347,7 +345,7 @@ def test_symmetric_difference(self, index): answer = index[[0, -1]] result = first.symmetric_difference(second) # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(index.values): + if request.node.callspec.id == "mixed-int-string": with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", @@ -424,7 +422,7 @@ def test_corner_union(self, index_flat, fname, sname, expected_name): (None, None, None), ], ) - def test_union_unequal(self, index_flat, fname, sname, expected_name): + def test_union_unequal(self, index_flat, fname, sname, expected_name, request): if not index_flat.is_unique: index = index_flat.unique() else: @@ -434,7 +432,16 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): first = index.copy().set_names(fname) second = index[1:].set_names(sname) # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(second.values): + if request.node.callspec.id in [ + "-".join(["mixed-int-string", t]) + for t in [ + "A-A-A", + "A-B-None", + "A-None-None", + "None-B-None", + "None-None-None", + ] + ]: with pytest.raises( TypeError, match="'<' not supported between instances of 'str' and 'int'", @@ -501,7 +508,7 @@ def test_corner_intersect(self, index_flat, fname, sname, expected_name): (None, None, None), ], ) - def test_intersect_unequal(self, index_flat, fname, sname, expected_name): + def test_intersect_unequal(self, index_flat, fname, sname, expected_name, request): if not index_flat.is_unique: index = index_flat.unique() else: @@ -510,7 +517,17 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): # test copy.intersection(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - if tm.is_mixed_int_string_entry(index.values): + # This check is written for the mixed-int-string entry + if request.node.callspec.id in [ + "-".join(["mixed-int-string", t]) + for t in [ + "A-A-A", + "A-B-None", + "A-None-None", + "None-B-None", + "None-None-None", + ] + ]: with pytest.raises( TypeError, match="'<' not supported between instances of 'int' and 'str'", diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index cd1efc450e0af..dbfd80f783f9b 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -66,7 +66,7 @@ def test_factorize_complex(self): expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object) tm.assert_numpy_array_equal(uniques, expected_uniques) - def test_factorize(self, index_or_series_obj, sort): + def test_factorize(self, index_or_series_obj, sort, request): obj = index_or_series_obj result_codes, result_uniques = obj.factorize(sort=sort) @@ -86,7 +86,7 @@ def test_factorize(self, index_or_series_obj, sort): if sort: # This check is written for the mixed-int-string entry - if tm.is_mixed_int_string_entry(expected_uniques.values): + if request.node.callspec.id == "mixed-int-string-True": pytest.skip("'<' not supported between instances of 'str' and 'int'") expected_uniques = expected_uniques.sort_values()