diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 0e5ac8dc02d..0c889fcbc81 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -147,12 +147,14 @@ def __init__( self._key_columns_with_same_name = ( set(_coerce_to_tuple(on)) if on - else set() - if (self._using_left_index or self._using_right_index) else { lkey.name for lkey, rkey in zip(self._left_keys, self._right_keys) if lkey.name == rkey.name + and not ( + isinstance(lkey, _IndexIndexer) + or isinstance(rkey, _IndexIndexer) + ) } ) diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index 5066c5eef38..c787204735f 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -1,5 +1,7 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. +from itertools import combinations, product, repeat + import numpy as np import pandas as pd import pytest @@ -2106,6 +2108,28 @@ def test_string_join_values_nulls(): assert_join_results_equal(expect, got, how="left") +@pytest.mark.parametrize( + "left_on,right_on", + [ + *product(["a", "b", "c"], ["a", "b"]), + *zip(combinations(["a", "b", "c"], 2), repeat(["a", "b"])), + ], +) +def test_merge_mixed_index_columns(left_on, right_on): + left = pd.DataFrame({"a": [1, 2, 1, 2], "b": [2, 3, 3, 4]}).set_index("a") + right = pd.DataFrame({"a": [1, 2, 1, 3], "b": [2, 30, 3, 4]}).set_index( + "a" + ) + + left["c"] = 10 + + expect = left.merge(right, left_on=left_on, right_on=right_on, how="outer") + cleft = cudf.from_pandas(left) + cright = cudf.from_pandas(right) + got = cleft.merge(cright, left_on=left_on, right_on=right_on, how="outer") + assert_join_results_equal(expect, got, how="outer") + + def test_merge_multiindex_columns(): lhs = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}) lhs.columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])