Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: fix tests for mixed int string index #55458

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9b8c639
add to indices_dict entry mixed-int-string
natmokval Sep 26, 2023
3645428
fix tests in test_old_base.py, test_numpy_compat.py
natmokval Sep 30, 2023
6b0e2c2
add except TypeError to def nargsort, fix tests
natmokval Oct 9, 2023
786c250
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Oct 9, 2023
533b4a2
fix tests
natmokval Oct 9, 2023
4ad1bc4
correct the error msg, add tm.assert_mixed_int_string_entry, correct …
natmokval Oct 13, 2023
87160c0
resolve conflict
natmokval Oct 16, 2023
a27bc13
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Nov 27, 2023
2d67a27
fix tests
natmokval Nov 27, 2023
531beda
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Nov 27, 2023
f09b0ac
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Jan 1, 2024
adf4929
correct def nargsort, correect docs and rename assert_mixed_int_strin…
natmokval Jan 11, 2024
fac6291
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Jan 11, 2024
5eef4fc
add import _testing
natmokval Jan 11, 2024
8a487e3
fix pre-commit errors
natmokval Jan 11, 2024
5fb2f55
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Jan 31, 2024
3081ad5
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Feb 2, 2024
e7edbbe
fix pre-commit error
natmokval Feb 2, 2024
14d6b6f
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Mar 8, 2024
454f0b6
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Mar 15, 2024
82ad988
replace is_mixed_int_string_entry with request.node.callspec.id, corr…
natmokval Mar 16, 2024
72435c6
Merge branch 'main' into TST-mixed-int-string-Index
natmokval Mar 25, 2024
7ebd7ef
reolve conflicts
natmokval Jul 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ def _create_mi_with_dt64tz_level():
"empty": Index([]),
"tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
"mi-with-dt64tz-level": _create_mi_with_dt64tz_level(),
"mixed-int-string": Index([0, "a", 1, "b", 2, "c"]),
"multi": _create_multiindex(),
"repeats": Index([0, 0, 1, 1, 2, 2]),
"nullable_int": Index(np.arange(10), dtype="Int64"),
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ def test_searchsorted(request, index_or_series_obj):
mark = pytest.mark.xfail(reason="complex objects are not comparable")
request.applymarker(mark)

# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
pytest.skip("'>' not supported between instances of 'str' and 'int'")

max_obj = max(obj, default=0)
index = np.searchsorted(obj, max_obj)
assert 0 <= index <= len(obj)
Expand Down
13 changes: 11 additions & 2 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_value_counts(index_or_series_obj):
def test_value_counts(index_or_series_obj, request):
obj = index_or_series_obj
obj = np.repeat(obj, range(1, len(obj) + 1))
result = obj.value_counts()
Expand Down Expand Up @@ -49,10 +49,16 @@ def test_value_counts(index_or_series_obj):

tm.assert_series_equal(result, expected)

# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
msg = "'<' not supported between instances of 'int' and 'str'"
with pytest.raises(TypeError, match=msg):
result.sort_index()


@pytest.mark.parametrize("null_obj", [np.nan, None])
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_value_counts_null(null_obj, index_or_series_obj):
def test_value_counts_null(null_obj, index_or_series_obj, request):
orig = index_or_series_obj
obj = orig.copy()

Expand All @@ -62,6 +68,9 @@ def test_value_counts_null(null_obj, index_or_series_obj):
pytest.skip("Test doesn't make sense on empty data")
elif isinstance(orig, MultiIndex):
pytest.skip(f"MultiIndex can't hold '{null_obj}'")
# This check is written for the mixed-int-string entry
if request.node.callspec.id in ["mixed-int-string-nan", "mixed-int-string-None"]:
pytest.skip("'<' not supported between instances of 'str' and 'int'")

values = obj._values
values[0:2] = null_obj
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/multi/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,10 @@ def test_union_duplicates(index, request):
values = index.unique().values.tolist()
mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
pytest.skip("'<' not supported between instances of 'str' and 'int'")

result = mi2.union(mi1)
expected = mi2.sort_values()
tm.assert_index_equal(result, expected)
Expand Down
39 changes: 28 additions & 11 deletions pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,16 @@ def test_hasnans_isnans(self, index_flat):

@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@pytest.mark.parametrize("na_position", [None, "middle"])
def test_sort_values_invalid_na_position(index_with_missing, na_position):
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
index_with_missing.sort_values(na_position=na_position)
def test_sort_values_invalid_na_position(index_with_missing, na_position, request):
# This check is written for the mixed-int-string entry
if request.node.callspec.id in ["mixed-int-string-None", "mixed-int-string-middle"]:
with pytest.raises(
TypeError, match="'<' not supported between instances of 'int' and 'str'"
):
index_with_missing.sort_values(na_position=na_position)
else:
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
index_with_missing.sort_values(na_position=na_position)


@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
Expand All @@ -457,17 +464,27 @@ def test_sort_values_with_missing(index_with_missing, na_position, request):

missing_count = np.sum(index_with_missing.isna())
not_na_vals = index_with_missing[index_with_missing.notna()].values
sorted_values = np.sort(not_na_vals)
if na_position == "first":
sorted_values = np.concatenate([[None] * missing_count, sorted_values])
# This check is written for the mixed-int-string entry

if request.node.callspec.id in ["mixed-int-string-first", "mixed-int-string-last"]:
with pytest.raises(
TypeError, match="'<' not supported between instances of 'int' and 'str'"
):
np.sort(not_na_vals)
else:
sorted_values = np.concatenate([sorted_values, [None] * missing_count])
sorted_values = np.sort(not_na_vals)
if na_position == "first":
sorted_values = np.concatenate([[None] * missing_count, sorted_values])
else:
sorted_values = np.concatenate([sorted_values, [None] * missing_count])

# Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
# Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
expected = type(index_with_missing)(
sorted_values, dtype=index_with_missing.dtype
)

result = index_with_missing.sort_values(na_position=na_position)
tm.assert_index_equal(result, expected)
result = index_with_missing.sort_values(na_position=na_position)
tm.assert_index_equal(result, expected)


def test_sort_values_natsort_key():
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/indexes/test_numpy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,16 @@ def test_numpy_ufuncs_reductions(index, func, request):
with pytest.raises(TypeError, match="is not ordered for"):
func.reduce(index)
return
# This check is written for the mixed-int-string entry
elif request.node.callspec.id in [
"mixed-int-string-maximum",
"mixed-int-string-minimum",
]:
with pytest.raises(
TypeError, match=".* not supported between instances of 'int' and 'str'"
):
func.reduce(index)
return
else:
result = func.reduce(index)

Expand Down
40 changes: 28 additions & 12 deletions pandas/tests/indexes/test_old_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,22 +350,38 @@ def test_memory_usage_doesnt_trigger_engine(self, index):
assert res_without_engine > 0
assert res_with_engine > 0

def test_argsort(self, index):
def test_argsort(self, index, request):
if isinstance(index, CategoricalIndex):
pytest.skip(f"{type(self).__name__} separately tested")

result = index.argsort()
expected = np.array(index).argsort()
tm.assert_numpy_array_equal(result, expected, check_dtype=False)

def test_numpy_argsort(self, index):
result = np.argsort(index)
expected = index.argsort()
tm.assert_numpy_array_equal(result, expected)
# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
with pytest.raises(
TypeError,
match="'<' not supported between instances of 'str' and 'int'",
):
index.argsort()
else:
result = index.argsort()
expected = np.array(index).argsort()
tm.assert_numpy_array_equal(result, expected, check_dtype=False)

def test_numpy_argsort(self, index, request):
# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
with pytest.raises(
TypeError,
match="'<' not supported between instances of 'str' and 'int'",
):
np.argsort(index)
else:
result = np.argsort(index)
expected = index.argsort()
tm.assert_numpy_array_equal(result, expected)

result = np.argsort(index, kind="mergesort")
expected = index.argsort(kind="mergesort")
tm.assert_numpy_array_equal(result, expected)
result = np.argsort(index, kind="mergesort")
expected = index.argsort(kind="mergesort")
tm.assert_numpy_array_equal(result, expected)

# these are the only two types that perform
# pandas compatibility input validation - the
Expand Down
120 changes: 92 additions & 28 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,19 @@ def index_flat2(index_flat):
return index_flat


def test_union_same_types(index):
def test_union_same_types(index, request):
# Union with a non-unique, non-monotonic index raises error
# Only needed for bool index factory
idx1 = index.sort_values()
idx2 = index.sort_values()
assert idx1.union(idx2).dtype == idx1.dtype
# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
with pytest.raises(
TypeError, match="'<' not supported between instances of 'str' and 'int'"
):
index.sort_values()
else:
idx1 = index.sort_values()
idx2 = index.sort_values()
assert idx1.union(idx2).dtype == idx1.dtype


def test_union_different_types(index_flat, index_flat2, request):
Expand Down Expand Up @@ -129,19 +136,26 @@ def test_union_different_types(index_flat, index_flat2, request):

# Union with a non-unique, non-monotonic index raises error
# This applies to the boolean index
idx1 = idx1.sort_values()
idx2 = idx2.sort_values()

with tm.assert_produces_warning(warn, match=msg):
res1 = idx1.union(idx2)
res2 = idx2.union(idx1)

if any_uint64 and (idx1_signed or idx2_signed):
assert res1.dtype == np.dtype("O")
assert res2.dtype == np.dtype("O")
# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
with pytest.raises(
TypeError, match="'<' not supported between instances of 'str' and 'int'"
):
idx1.sort_values()
idx2.sort_values()
else:
assert res1.dtype == common_dtype
assert res2.dtype == common_dtype
idx1 = idx1.sort_values()
idx2 = idx2.sort_values()
with tm.assert_produces_warning(warn, match=msg):
res1 = idx1.union(idx2)
res2 = idx2.union(idx1)

if any_uint64 and (idx1_signed or idx2_signed):
assert res1.dtype == np.dtype("O")
assert res2.dtype == np.dtype("O")
else:
assert res1.dtype == common_dtype
assert res2.dtype == common_dtype


@pytest.mark.parametrize(
Expand Down Expand Up @@ -250,14 +264,22 @@ def test_intersection_base(self, index):
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
)
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_union_base(self, index):
def test_union_base(self, index, request):
index = index.unique()
first = index[3:]
second = index[:5]
everything = index

union = first.union(second)
tm.assert_index_equal(union.sort_values(), everything.sort_values())
# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
with pytest.raises(
TypeError,
match="'<' not supported between instances of 'str' and 'int'",
):
tm.assert_index_equal(union.sort_values(), everything.sort_values())
else:
tm.assert_index_equal(union.sort_values(), everything.sort_values())

if isinstance(index.dtype, DatetimeTZDtype):
# The second.values below will drop tz, so the rest of this test
Expand Down Expand Up @@ -308,7 +330,7 @@ def test_difference_base(self, sort, index):
@pytest.mark.filterwarnings(
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
)
def test_symmetric_difference(self, index):
def test_symmetric_difference(self, index, request):
if isinstance(index, CategoricalIndex):
pytest.skip(f"Not relevant for {type(index).__name__}")
if len(index) < 2:
Expand All @@ -322,7 +344,15 @@ def test_symmetric_difference(self, index):
second = index[:-1]
answer = index[[0, -1]]
result = first.symmetric_difference(second)
tm.assert_index_equal(result.sort_values(), answer.sort_values())
# This check is written for the mixed-int-string entry
if request.node.callspec.id == "mixed-int-string":
with pytest.raises(
TypeError,
match="'<' not supported between instances of 'str' and 'int'",
):
tm.assert_index_equal(result.sort_values(), answer.sort_values())
else:
tm.assert_index_equal(result.sort_values(), answer.sort_values())

# GH#10149
cases = [second.to_numpy(), second.to_series(), second.to_list()]
Expand Down Expand Up @@ -392,7 +422,7 @@ def test_corner_union(self, index_flat, fname, sname, expected_name):
(None, None, None),
],
)
def test_union_unequal(self, index_flat, fname, sname, expected_name):
def test_union_unequal(self, index_flat, fname, sname, expected_name, request):
if not index_flat.is_unique:
index = index_flat.unique()
else:
Expand All @@ -401,9 +431,26 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name):
# test copy.union(subset) - need sort for unicode and string
first = index.copy().set_names(fname)
second = index[1:].set_names(sname)
union = first.union(second).sort_values()
expected = index.set_names(expected_name).sort_values()
tm.assert_index_equal(union, expected)
# This check is written for the mixed-int-string entry
if request.node.callspec.id in [
"-".join(["mixed-int-string", t])
for t in [
"A-A-A",
"A-B-None",
"A-None-None",
"None-B-None",
"None-None-None",
]
]:
with pytest.raises(
TypeError,
match="'<' not supported between instances of 'str' and 'int'",
):
first.union(second).sort_values()
else:
union = first.union(second).sort_values()
expected = index.set_names(expected_name).sort_values()
tm.assert_index_equal(union, expected)

@pytest.mark.parametrize(
"fname, sname, expected_name",
Expand Down Expand Up @@ -461,7 +508,7 @@ def test_corner_intersect(self, index_flat, fname, sname, expected_name):
(None, None, None),
],
)
def test_intersect_unequal(self, index_flat, fname, sname, expected_name):
def test_intersect_unequal(self, index_flat, fname, sname, expected_name, request):
if not index_flat.is_unique:
index = index_flat.unique()
else:
Expand All @@ -470,9 +517,26 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name):
# test copy.intersection(subset) - need sort for unicode and string
first = index.copy().set_names(fname)
second = index[1:].set_names(sname)
intersect = first.intersection(second).sort_values()
expected = index[1:].set_names(expected_name).sort_values()
tm.assert_index_equal(intersect, expected)
# This check is written for the mixed-int-string entry
if request.node.callspec.id in [
"-".join(["mixed-int-string", t])
for t in [
"A-A-A",
"A-B-None",
"A-None-None",
"None-B-None",
"None-None-None",
]
]:
with pytest.raises(
TypeError,
match="'<' not supported between instances of 'int' and 'str'",
):
first.intersection(second).sort_values()
else:
intersect = first.intersection(second).sort_values()
expected = index[1:].set_names(expected_name).sort_values()
tm.assert_index_equal(intersect, expected)

@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_intersection_name_retention_with_nameless(self, index):
Expand Down
Loading