pandas-dev · natmokval · Sep 26, 2023 · Sep 30, 2023 · Oct 9, 2023 · Oct 9, 2023
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -632,6 +632,7 @@ def _create_mi_with_dt64tz_level():
     "empty": Index([]),
     "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
     "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(),
+    "mixed-int-string": Index([0, "a", 1, "b", 2, "c"]),
     "multi": _create_multiindex(),
     "repeats": Index([0, 0, 1, 1, 2, 2]),
     "nullable_int": Index(np.arange(100), dtype="Int64"),

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -436,7 +436,13 @@ def nargsort(
     if not ascending:
         non_nans = non_nans[::-1]
         non_nan_idx = non_nan_idx[::-1]
-    indexer = non_nan_idx[non_nans.argsort(kind=kind)]
+    # GH#54072
+    # argsort does not support mixed int/string Index
+    try:
+        indexer = non_nan_idx[non_nans.argsort(kind=kind)]
+    except TypeError as err:
+        msg = "'<' not supported between "
+        raise TypeError(msg) from err
     if not ascending:
         indexer = indexer[::-1]
     # Finally, place the NaNs at the end or the beginning according to

diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py
@@ -151,6 +151,15 @@ def test_searchsorted(request, index_or_series_obj):
         #  comparison semantics https://github.com/numpy/numpy/issues/15981
         mark = pytest.mark.xfail(reason="complex objects are not comparable")
         request.node.add_marker(mark)
+    elif any(isinstance(elem, int) for elem in obj.values[:]) and any(
+        isinstance(elem, str) for elem in obj.values[:]
+    ):
+        with pytest.raises(
+            TypeError, match="'>' not supported between instances of 'str' and 'int'"
+        ):
+            max_obj = max(obj, default=0)
+            index = np.searchsorted(obj, max_obj)
+        return
 
     max_obj = max(obj, default=0)
     index = np.searchsorted(obj, max_obj)

diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
@@ -46,12 +46,21 @@ def test_value_counts(index_or_series_obj):
             # i.e IntegerDtype
             expected = expected.astype("Int64")
 
-    # TODO(GH#32514): Order of entries with the same count is inconsistent
-    #  on CI (gh-32449)
-    if obj.duplicated().any():
-        result = result.sort_index()
-        expected = expected.sort_index()
-    tm.assert_series_equal(result, expected)
+    if (
+        len(obj) > 0
+        and isinstance(obj.values[0], int)
+        and isinstance(obj.values[1], str)
+    ):
+        msg = "'<' not supported between "
+        with pytest.raises(TypeError, match=msg):
+            result.sort_index()
+    else:
+        # TODO(GH#32514): Order of entries with the same count is inco8nsistent
+        #  on CI (gh-32449)
+        if obj.duplicated().any():
+            result = result.sort_index()
+            expected = expected.sort_index()
+        tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize("null_obj", [np.nan, None])
@@ -66,6 +75,10 @@ def test_value_counts_null(null_obj, index_or_series_obj):
         pytest.skip("Test doesn't make sense on empty data")
     elif isinstance(orig, MultiIndex):
         pytest.skip(f"MultiIndex can't hold '{null_obj}'")
+    elif any(isinstance(elem, int) for elem in orig.values[:]) and any(
+        isinstance(elem, str) for elem in orig.values[:]
+    ):
+        pytest.skip("'<' not supported between instances of 'str' and 'int'")
 
     values = obj._values
     values[0:2] = null_obj

diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
@@ -631,7 +631,10 @@ def test_union_duplicates(index, request):
     values = index.unique().values.tolist()
     mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
     mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
-    result = mi2.union(mi1)
+    if isinstance(index.values[0], int) and isinstance(index.values[1], str):
+        pytest.skip("'<' not supported between instances of 'str' and 'int'")
+    else:
+        result = mi2.union(mi1)
     expected = mi2.sort_values()
     tm.assert_index_equal(result, expected)
 

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
@@ -448,8 +448,14 @@ def test_hasnans_isnans(self, index_flat):
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
 @pytest.mark.parametrize("na_position", [None, "middle"])
 def test_sort_values_invalid_na_position(index_with_missing, na_position):
-    with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
-        index_with_missing.sort_values(na_position=na_position)
+    if any(isinstance(elem, int) for elem in index_with_missing.values[:]) and any(
+        isinstance(elem, str) for elem in index_with_missing.values[:]
+    ):
+        with pytest.raises(TypeError, match="'<' not supported between "):
+            index_with_missing.sort_values(na_position=na_position)
+    else:
+        with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
+            index_with_missing.sort_values(na_position=na_position)
 
 
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@@ -467,17 +473,27 @@ def test_sort_values_with_missing(index_with_missing, na_position, request):
 
     missing_count = np.sum(index_with_missing.isna())
     not_na_vals = index_with_missing[index_with_missing.notna()].values
-    sorted_values = np.sort(not_na_vals)
-    if na_position == "first":
-        sorted_values = np.concatenate([[None] * missing_count, sorted_values])
+    if any(isinstance(elem, int) for elem in index_with_missing.values[:]) and any(
+        isinstance(elem, str) for elem in index_with_missing.values[:]
+    ):
+        with pytest.raises(
+            TypeError, match="'<' not supported between instances of 'int' and 'str'"
+        ):
+            np.sort(not_na_vals)
     else:
-        sorted_values = np.concatenate([sorted_values, [None] * missing_count])
+        sorted_values = np.sort(not_na_vals)
+        if na_position == "first":
+            sorted_values = np.concatenate([[None] * missing_count, sorted_values])
+        else:
+            sorted_values = np.concatenate([sorted_values, [None] * missing_count])
 
-    # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
-    expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
+        # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
+        expected = type(index_with_missing)(
+            sorted_values, dtype=index_with_missing.dtype
+        )
 
-    result = index_with_missing.sort_values(na_position=na_position)
-    tm.assert_index_equal(result, expected)
+        result = index_with_missing.sort_values(na_position=na_position)
+        tm.assert_index_equal(result, expected)
 
 
 def test_ndarray_compat_properties(index):

diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py
@@ -160,6 +160,12 @@ def test_numpy_ufuncs_reductions(index, func, request):
         with pytest.raises(TypeError, match="is not ordered for"):
             func.reduce(index)
         return
+    elif isinstance(index.values[0], int) and isinstance(index.values[1], str):
+        with pytest.raises(
+            TypeError, match=".* not supported between instances of 'int' and 'str'"
+        ):
+            func.reduce(index)
+        return
     else:
         result = func.reduce(index)
 

diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
@@ -331,18 +331,40 @@ def test_argsort(self, index):
         if isinstance(index, CategoricalIndex):
             pytest.skip(f"{type(self).__name__} separately tested")
 
-        result = index.argsort()
-        expected = np.array(index).argsort()
-        tm.assert_numpy_array_equal(result, expected, check_dtype=False)
+        if (
+            len(index.values) > 0
+            and isinstance(index.values[0], int)
+            and isinstance(index.values[1], str)
+        ):
+            with pytest.raises(
+                TypeError,
+                match="'<' not supported between instances of 'str' and 'int'",
+            ):
+                index.argsort()
+        else:
+            result = index.argsort()
+            expected = np.array(index).argsort()
+            tm.assert_numpy_array_equal(result, expected, check_dtype=False)
 
     def test_numpy_argsort(self, index):
-        result = np.argsort(index)
-        expected = index.argsort()
-        tm.assert_numpy_array_equal(result, expected)
+        if (
+            len(index.values) > 0
+            and isinstance(index.values[0], int)
+            and isinstance(index.values[1], str)
+        ):
+            with pytest.raises(
+                TypeError,
+                match="'<' not supported between instances of 'str' and 'int'",
+            ):
+                np.argsort(index)
+        else:
+            result = np.argsort(index)
+            expected = index.argsort()
+            tm.assert_numpy_array_equal(result, expected)
 
-        result = np.argsort(index, kind="mergesort")
-        expected = index.argsort(kind="mergesort")
-        tm.assert_numpy_array_equal(result, expected)
+            result = np.argsort(index, kind="mergesort")
+            expected = index.argsort(kind="mergesort")
+            tm.assert_numpy_array_equal(result, expected)
 
         # these are the only two types that perform
         # pandas compatibility input validation - the

diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -33,9 +33,17 @@
 def test_union_same_types(index):
     # Union with a non-unique, non-monotonic index raises error
     # Only needed for bool index factory
-    idx1 = index.sort_values()
-    idx2 = index.sort_values()
-    assert idx1.union(idx2).dtype == idx1.dtype
+    if (
+        len(index.values) > 0
+        and isinstance(index.values[0], int)
+        and isinstance(index.values[1], str)
+    ):
+        with pytest.raises(TypeError, match="'<' not supported between "):
+            index.sort_values()
+    else:
+        idx1 = index.sort_values()
+        idx2 = index.sort_values()
+        assert idx1.union(idx2).dtype == idx1.dtype
 
 
 def test_union_different_types(index_flat, index_flat2, request):
@@ -97,19 +105,30 @@ def test_union_different_types(index_flat, index_flat2, request):
 
     # Union with a non-unique, non-monotonic index raises error
     # This applies to the boolean index
-    idx1 = idx1.sort_values()
-    idx2 = idx2.sort_values()
-
-    with tm.assert_produces_warning(warn, match=msg):
-        res1 = idx1.union(idx2)
-        res2 = idx2.union(idx1)
-
-    if any_uint64 and (idx1_signed or idx2_signed):
-        assert res1.dtype == np.dtype("O")
-        assert res2.dtype == np.dtype("O")
+    if (
+        len(idx1.values) > 0
+        and isinstance(idx1.values[0], int)
+        and isinstance(idx1.values[1], str)
+        or len(idx2.values) > 0
+        and isinstance(idx2.values[0], int)
+        and isinstance(idx2.values[1], str)
+    ):
+        with pytest.raises(TypeError, match="'<' not supported between "):
+            idx1.sort_values()
+            idx2.sort_values()
     else:
-        assert res1.dtype == common_dtype
-        assert res2.dtype == common_dtype
+        idx1 = idx1.sort_values()
+        idx2 = idx2.sort_values()
+        with tm.assert_produces_warning(warn, match=msg):
+            res1 = idx1.union(idx2)
+            res2 = idx2.union(idx1)
+
+        if any_uint64 and (idx1_signed or idx2_signed):
+            assert res1.dtype == np.dtype("O")
+            assert res2.dtype == np.dtype("O")
+        else:
+            assert res1.dtype == common_dtype
+            assert res2.dtype == common_dtype
 
 
 @pytest.mark.parametrize(
@@ -369,9 +388,18 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name):
         # test copy.union(subset) - need sort for unicode and string
         first = index.copy().set_names(fname)
         second = index[1:].set_names(sname)
-        union = first.union(second).sort_values()
-        expected = index.set_names(expected_name).sort_values()
-        tm.assert_index_equal(union, expected)
+        if any(isinstance(elem, int) for elem in second.values[:]) and any(
+            isinstance(elem, str) for elem in second.values[:]
+        ):
+            with pytest.raises(
+                TypeError,
+                match="'<' not supported between ",
+            ):
+                first.union(second).sort_values()
+        else:
+            union = first.union(second).sort_values()
+            expected = index.set_names(expected_name).sort_values()
+            tm.assert_index_equal(union, expected)
 
     @pytest.mark.parametrize(
         "fname, sname, expected_name",
@@ -436,9 +464,20 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name):
         # test copy.intersection(subset) - need sort for unicode and string
         first = index.copy().set_names(fname)
         second = index[1:].set_names(sname)
-        intersect = first.intersection(second).sort_values()
-        expected = index[1:].set_names(expected_name).sort_values()
-        tm.assert_index_equal(intersect, expected)
+        if (
+            len(index.values) > 0
+            and isinstance(index.values[0], int)
+            and isinstance(index.values[1], str)
+        ):
+            with pytest.raises(
+                TypeError,
+                match="'<' not supported between ",
+            ):
+                first.intersection(second).sort_values()
+        else:
+            intersect = first.intersection(second).sort_values()
+            expected = index[1:].set_names(expected_name).sort_values()
+            tm.assert_index_equal(intersect, expected)
 
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
     def test_intersection_name_retention_with_nameless(self, index):

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -70,6 +70,12 @@ def test_factorize(self, index_or_series_obj, sort):
             expected_uniques = expected_uniques.astype(object)
 
         if sort:
+            if (
+                len(expected_uniques.values) > 0
+                and isinstance(expected_uniques.values[0], int)
+                and isinstance(expected_uniques.values[1], str)
+            ):
+                pytest.skip("'<' not supported between instances of 'str' and 'int'")
             expected_uniques = expected_uniques.sort_values()
 
         # construct an integer ndarray so that