From 4be579ef70284867148b8e73c18e88511665787c Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Mon, 2 Sep 2019 22:59:17 +0100
Subject: [PATCH 01/22] BUG: CategoricalIndex allowed reindexing duplicate
 sources, but not duplicate targets: this is the wrong way around

---
 doc/source/whatsnew/v1.0.0.rst            |  2 +-
 pandas/core/indexes/category.py           |  8 ---
 pandas/tests/indexes/test_category.py     | 51 +++++++++------
 pandas/tests/indexing/test_categorical.py | 77 ++++++++++++-----------
 4 files changed, 75 insertions(+), 63 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index a3d75d69e1e82..e6dbc22c1a859 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -165,7 +165,7 @@ Categorical
 
 - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
 - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
--
+- For :class:`CategoricalIndex`, `DataFrame.reindex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`)
 -
 
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index c4321c993e638..c73e9af7d73b8 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -551,10 +551,6 @@ def get_value(self, series: AnyArrayLike, key: Any):
         # we might be a positional inexer
         return super().get_value(series, key)
 
-    def _can_reindex(self, indexer):
-        """ always allow reindexing """
-        pass
-
     @Appender(_index_shared_docs["where"])
     def where(self, cond, other=None):
         # TODO: Investigate an alternative implementation with
@@ -579,7 +575,6 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
             Indices of output values in original index
 
         """
-
         if method is not None:
             raise NotImplementedError(
                 "argument method is not implemented for CategoricalIndex.reindex"
@@ -599,9 +594,6 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
             indexer = None
             missing = []
         else:
-            if not target.is_unique:
-                raise ValueError("cannot reindex with a non-unique indexer")
-
             indexer, missing = self.get_indexer_non_unique(np.array(target))
 
         if len(self.codes) and indexer is not None:
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 67bf9bd20e716..e496047b399c7 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -574,41 +574,56 @@ def test_reindexing(self):
             tm.assert_numpy_array_equal(expected, actual)
 
     def test_reindex_dtype(self):
-        c = CategoricalIndex(["a", "b", "c", "a"])
+        c = CategoricalIndex(["a", "b", "c"])
         res, indexer = c.reindex(["a", "c"])
-        tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
+        tm.assert_index_equal(res, Index(["a", "c"]), exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2], dtype=np.intp))
 
-        c = CategoricalIndex(["a", "b", "c", "a"])
+        c = CategoricalIndex(["a", "b", "c"])
         res, indexer = c.reindex(Categorical(["a", "c"]))
 
-        exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
+        exp = CategoricalIndex(["a", "c"], categories=["a", "c"])
         tm.assert_index_equal(res, exp, exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2], dtype=np.intp))
 
-        c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
+        c = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
         res, indexer = c.reindex(["a", "c"])
-        exp = Index(["a", "a", "c"], dtype="object")
+        exp = Index(["a", "c"], dtype="object")
         tm.assert_index_equal(res, exp, exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2], dtype=np.intp))
 
-        c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
+        c = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
         res, indexer = c.reindex(Categorical(["a", "c"]))
-        exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
+        exp = CategoricalIndex(["a", "c"], categories=["a", "c"])
         tm.assert_index_equal(res, exp, exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2], dtype=np.intp))
 
-    def test_reindex_duplicate_target(self):
+    def test_reindex_duplicate_source(self):
         # See GH23963
         c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
-        with pytest.raises(ValueError, match="non-unique indexer"):
-            c.reindex(["a", "a", "c"])
+        with pytest.raises(ValueError, match="duplicate axis"):
+            c._can_reindex(["a", "c"])
 
-        with pytest.raises(ValueError, match="non-unique indexer"):
-            c.reindex(
-                CategoricalIndex(["a", "a", "c"], categories=["a", "b", "c", "d"])
+        with pytest.raises(ValueError, match="duplicate axis"):
+            c._can_reindex(
+                CategoricalIndex(["a", "c"], categories=["a", "b", "c", "d"])
             )
 
+    def test_reindex_duplicate_target(self):
+        # See GH25459
+        c = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
+        res, indexer = c.reindex(["a", "c", "c"])
+        exp = Index(["a", "c", "c"], dtype="object")
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
+
+        res, indexer = c.reindex(
+            CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
+        )
+        exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
+
     def test_reindex_empty_index(self):
         # See GH16770
         c = CategoricalIndex([])
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index c365c985eb4b6..29e03246a5750 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -46,6 +46,18 @@ def setup_method(self, method):
                 "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
             }
         ).set_index("B")
+        self.df5 = DataFrame(
+            {
+                "A": np.arange(3, dtype="int64"),
+                "B": Series(list("abc")).astype(CDT(list("cabe"))),
+            }
+        ).set_index("B")
+        self.df6 = DataFrame(
+            {
+                "A": np.arange(3, dtype="int64"),
+                "B": (Series([1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
+            }
+        ).set_index("B")
 
     def test_loc_scalar(self):
         result = self.df.loc["a"]
@@ -564,23 +576,21 @@ def test_reindexing(self):
 
         # reindexing
         # convert to a regular index
-        result = self.df2.reindex(["a", "b", "e"])
-        expected = DataFrame(
-            {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
-        ).set_index("B")
+        result = self.df5.reindex(["a", "b", "e"])
+        expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
+            "B"
+        )
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["a", "b"])
-        expected = DataFrame(
-            {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
-        ).set_index("B")
+        result = self.df5.reindex(["a", "b"])
+        expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["e"])
+        result = self.df5.reindex(["e"])
         expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["d"])
+        result = self.df5.reindex(["d"])
         expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
@@ -588,65 +598,60 @@ def test_reindexing(self):
         # then return a Categorical
         cats = list("cabe")
 
-        result = self.df2.reindex(Categorical(["a", "d"], categories=cats))
+        result = self.df5.reindex(Categorical(["a", "e"], categories=cats))
         expected = DataFrame(
-            {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(cats))}
+            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(Categorical(["a"], categories=cats))
+        result = self.df5.reindex(Categorical(["a"], categories=cats))
         expected = DataFrame(
-            {"A": [0, 1, 5], "B": Series(list("aaa")).astype(CDT(cats))}
+            {"A": [0], "B": Series(list("a")).astype(CDT(cats))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["a", "b", "e"])
-        expected = DataFrame(
-            {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
-        ).set_index("B")
+        result = self.df5.reindex(["a", "b", "e"])
+        expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
+            "B"
+        )
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["a", "b"])
-        expected = DataFrame(
-            {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
-        ).set_index("B")
+        result = self.df5.reindex(["a", "b"])
+        expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["e"])
+        result = self.df5.reindex(["e"])
         expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
         # give back the type of categorical that we received
-        result = self.df2.reindex(
-            Categorical(["a", "d"], categories=cats, ordered=True)
+        result = self.df5.reindex(
+            Categorical(["a", "e"], categories=cats, ordered=True)
         )
         expected = DataFrame(
-            {
-                "A": [0, 1, 5, np.nan],
-                "B": Series(list("aaad")).astype(CDT(cats, ordered=True)),
-            }
+            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(Categorical(["a", "d"], categories=["a", "d"]))
+        result = self.df5.reindex(Categorical(["a", "d"], categories=["a", "d"]))
         expected = DataFrame(
-            {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(["a", "d"]))}
+            {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
         # passed duplicate indexers are not allowed
-        msg = "cannot reindex with a non-unique indexer"
+        msg = "cannot reindex from a duplicate axis"
         with pytest.raises(ValueError, match=msg):
-            self.df2.reindex(["a", "a"])
+            self.df2.reindex(["a", "b"])
 
         # args NotImplemented ATM
         msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
         with pytest.raises(NotImplementedError, match=msg.format("method")):
-            self.df2.reindex(["a"], method="ffill")
+            self.df5.reindex(["a"], method="ffill")
         with pytest.raises(NotImplementedError, match=msg.format("level")):
-            self.df2.reindex(["a"], level=1)
+            self.df5.reindex(["a"], level=1)
         with pytest.raises(NotImplementedError, match=msg.format("limit")):
-            self.df2.reindex(["a"], limit=2)
+            self.df5.reindex(["a"], limit=2)
 
     def test_loc_slice(self):
         # slicing

From cca2565f4b71b08ada9dde32ca8cc704dd9cfcb2 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 19:05:16 +0100
Subject: [PATCH 02/22] Restore original CategoricalIndex.reindex test

---
 pandas/tests/indexes/test_category.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index e496047b399c7..f3743a579a049 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -574,29 +574,29 @@ def test_reindexing(self):
             tm.assert_numpy_array_equal(expected, actual)
 
     def test_reindex_dtype(self):
-        c = CategoricalIndex(["a", "b", "c"])
+        c = CategoricalIndex(["a", "b", "c", "a"])
         res, indexer = c.reindex(["a", "c"])
-        tm.assert_index_equal(res, Index(["a", "c"]), exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 2], dtype=np.intp))
+        tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
 
-        c = CategoricalIndex(["a", "b", "c"])
+        c = CategoricalIndex(["a", "b", "c", "a"])
         res, indexer = c.reindex(Categorical(["a", "c"]))
 
-        exp = CategoricalIndex(["a", "c"], categories=["a", "c"])
+        exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
         tm.assert_index_equal(res, exp, exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 2], dtype=np.intp))
+        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
 
-        c = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
+        c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
         res, indexer = c.reindex(["a", "c"])
-        exp = Index(["a", "c"], dtype="object")
+        exp = Index(["a", "a", "c"], dtype="object")
         tm.assert_index_equal(res, exp, exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 2], dtype=np.intp))
+        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
 
-        c = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
+        c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
         res, indexer = c.reindex(Categorical(["a", "c"]))
-        exp = CategoricalIndex(["a", "c"], categories=["a", "c"])
+        exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
         tm.assert_index_equal(res, exp, exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 2], dtype=np.intp))
+        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
 
     def test_reindex_duplicate_source(self):
         # See GH23963

From 9fa3ed3696fa24def41c1608f749b1a8fcda2d6b Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 19:05:40 +0100
Subject: [PATCH 03/22] Fix buggy code in doc, pytables test

---
 doc/source/user_guide/advanced.rst | 8 ++++----
 pandas/util/testing.py             | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 62a9b6396404a..5c38e2ec2bb3f 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -783,10 +783,10 @@ values **not** in the categories, similarly to how you can reindex **any** panda
 
 .. ipython:: python
 
-   df2.reindex(['a', 'e'])
-   df2.reindex(['a', 'e']).index
-   df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde')))
-   df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index
+   df2.iloc[[0]].reindex(['a', 'e'])
+   df2.iloc[[0]].reindex(['a', 'e']).index
+   df2.iloc[[0]].reindex(pd.Categorical(['a', 'e'], categories=list('abcde')))
+   df2.iloc[[0]].reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index
 
 .. warning::
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index aee58f808d9e6..005cad03e6e6b 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1597,7 +1597,7 @@ def makeUnicodeIndex(k=10, name=None):
 def makeCategoricalIndex(k=10, n=3, name=None, **kwargs):
     """ make a length k index or n categories """
     x = rands_array(nchars=4, size=n)
-    return CategoricalIndex(np.random.choice(x, k), name=name, **kwargs)
+    return CategoricalIndex(Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs)
 
 
 def makeIntervalIndex(k=10, name=None, **kwargs):

From 1c480c232b1b098b85c3a2de758e1e998f031b50 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 19:12:37 +0100
Subject: [PATCH 04/22] Fix formatting

---
 pandas/util/testing.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 005cad03e6e6b..7045d3ecfa7c9 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1597,7 +1597,10 @@ def makeUnicodeIndex(k=10, name=None):
 def makeCategoricalIndex(k=10, n=3, name=None, **kwargs):
     """ make a length k index or n categories """
     x = rands_array(nchars=4, size=n)
-    return CategoricalIndex(Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs)
+    return CategoricalIndex(
+        Categorical.from_codes(np.arange(k) % n, categories=x),
+        name=name, **kwargs
+    )
 
 
 def makeIntervalIndex(k=10, name=None, **kwargs):

From 3162ce5c4cbde2bc27d323dfb5d4a6fa3c931460 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 21:48:28 +0100
Subject: [PATCH 05/22] Make docs shorter to shut up linter

---
 doc/source/user_guide/advanced.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 5c38e2ec2bb3f..6c8e6dcbb1db9 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -785,8 +785,8 @@ values **not** in the categories, similarly to how you can reindex **any** panda
 
    df2.iloc[[0]].reindex(['a', 'e'])
    df2.iloc[[0]].reindex(['a', 'e']).index
-   df2.iloc[[0]].reindex(pd.Categorical(['a', 'e'], categories=list('abcde')))
-   df2.iloc[[0]].reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index
+   df2.iloc[[0]].reindex(pd.Categorical(['a', 'e'], categories=list('abe')))
+   df2.iloc[[0]].reindex(pd.Categorical(['a', 'e'], categories=list('abe'))).index
 
 .. warning::
 

From 6ff11059f96ffaf456967446a3608f834b9917e3 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 21:49:30 +0100
Subject: [PATCH 06/22] Fix Index.union and get_indexer_non_unique bugs exposed
 by my categorical index fixes

---
 pandas/_libs/index.pyx      | 18 +++++++++++++-----
 pandas/core/indexes/base.py | 10 ++++++++--
 pandas/tests/test_base.py   |  5 +++++
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 979dad6db0838..6d2f98ab6640e 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -304,12 +304,20 @@ cdef class IndexEngine:
         if stargets and len(stargets) < 5 and self.is_monotonic_increasing:
             # if there are few enough stargets and the index is monotonically
             # increasing, then use binary search for each starget
+            remaining_stargets = set()
             for starget in stargets:
-                start = values.searchsorted(starget, side='left')
-                end = values.searchsorted(starget, side='right')
-                if start != end:
-                    d[starget] = list(range(start, end))
-        else:
+                try:
+                    start = values.searchsorted(starget, side='left')
+                    end = values.searchsorted(starget, side='right')
+                except TypeError: # e.g. if we tried to search for string in int array
+                    remaining_stargets.add(starget)
+                else:
+                    if start != end:
+                        d[starget] = list(range(start, end))
+
+            stargets = remaining_stargets
+        
+        if stargets:
             # otherwise, map by iterating through all items in the index
             for i in range(n):
                 val = values[i]
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 62662edb692a7..2967e51a285de 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2489,8 +2489,14 @@ def _union(self, other, sort):
                 value_set = set(lvals)
                 result.extend([x for x in rvals if x not in value_set])
         else:
-            indexer = self.get_indexer(other)
-            indexer, = (indexer == -1).nonzero()
+            # find indexes of things in "other" that are not in "self"
+            try:
+                indexer = self.get_indexer(other)
+            except InvalidIndexError:
+                # duplicates
+                indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
+            else:
+                indexer, = (indexer == -1).nonzero()
 
             if len(indexer) > 0:
                 other_diff = algos.take_nd(rvals, indexer, allow_fill=False)
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index c760c75e44f6b..8e57ec66f77c9 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -990,6 +990,11 @@ def test_bool_indexing(self, indexer_klass, indexer):
             s = pd.Series(idx)
             tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx])
 
+    def test_get_indexer_non_unique_dtype_mismatch(self):
+        indexes, missing = pd.Index(['A', 'B']).get_indexer_non_unique(pd.Index([0]))
+        tm.assert_numpy_array_equal(np.array([], dtype=np.intp), indexes)
+        tm.assert_numpy_array_equal(np.array([0, 1], dtype=np.intp), missing)
+
 
 class TestTranspose(Ops):
     errmsg = "the 'axes' parameter is not supported"

From 3051ce5bf5e684e65575de682708df405fdb0b13 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 22:34:38 +0100
Subject: [PATCH 07/22] Small fixes

---
 pandas/_libs/index.pyx    | 4 ++--
 pandas/tests/test_base.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 6d2f98ab6640e..0599e9c26b57c 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -309,14 +309,14 @@ cdef class IndexEngine:
                 try:
                     start = values.searchsorted(starget, side='left')
                     end = values.searchsorted(starget, side='right')
-                except TypeError: # e.g. if we tried to search for string in int array
+                except TypeError:  # e.g. if we tried to search for string in int array
                     remaining_stargets.add(starget)
                 else:
                     if start != end:
                         d[starget] = list(range(start, end))
 
             stargets = remaining_stargets
-        
+
         if stargets:
             # otherwise, map by iterating through all items in the index
             for i in range(n):
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 8e57ec66f77c9..47b6e572e3e0c 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -992,8 +992,8 @@ def test_bool_indexing(self, indexer_klass, indexer):
 
     def test_get_indexer_non_unique_dtype_mismatch(self):
         indexes, missing = pd.Index(['A', 'B']).get_indexer_non_unique(pd.Index([0]))
-        tm.assert_numpy_array_equal(np.array([], dtype=np.intp), indexes)
-        tm.assert_numpy_array_equal(np.array([0, 1], dtype=np.intp), missing)
+        tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
+        tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
 
 
 class TestTranspose(Ops):

From 85fdd7d6c13c2a3d66f66d6543ca3ae42f56d3ac Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 22:39:58 +0100
Subject: [PATCH 08/22] series | index still fails, but now only due to a dtype
 mismatch

---
 pandas/tests/series/test_operators.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index c2cf91e582c47..c8189714f7a4e 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -288,8 +288,8 @@ def test_logical_ops_with_index(self, op):
             pytest.param(
                 ops.ror_,
                 marks=pytest.mark.xfail(
-                    reason="Index.get_indexer with non unique index",
-                    raises=InvalidIndexError,
+                    reason="GH#22092 Index implementation returns Index",
+                    raises=AssertionError,
                     strict=True,
                 ),
             ),

From 78faa9d5e78081987ec786b02eeeb6de49ecb884 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 23:24:37 +0100
Subject: [PATCH 09/22] Last small issues

---
 pandas/tests/series/test_operators.py | 1 -
 pandas/tests/test_base.py             | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index c8189714f7a4e..f2bdf643f09c3 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -7,7 +7,6 @@
 import pandas as pd
 from pandas import Categorical, DataFrame, Index, Series, bdate_range, date_range, isna
 from pandas.core import ops
-from pandas.core.indexes.base import InvalidIndexError
 import pandas.core.nanops as nanops
 import pandas.util.testing as tm
 from pandas.util.testing import (
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 47b6e572e3e0c..07c0ebef0750f 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -992,8 +992,8 @@ def test_bool_indexing(self, indexer_klass, indexer):
 
     def test_get_indexer_non_unique_dtype_mismatch(self):
         indexes, missing = pd.Index(['A', 'B']).get_indexer_non_unique(pd.Index([0]))
-        tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
-        tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
+        tm.assert_numpy_array_equal(np.array([-1], dtype=np.int64), indexes)
+        tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)
 
 
 class TestTranspose(Ops):

From 5eaf83e6d80ed1bba0908b3f797127a3bf2c0cc8 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 23:47:26 +0100
Subject: [PATCH 10/22] More whatsnew

---
 doc/source/whatsnew/v1.0.0.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index e6dbc22c1a859..0980c3b640737 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -229,6 +229,9 @@ Indexing
 - Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)
 - Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
 - Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
+- :meth:`Index.union` could fail when the LHS contained duplicates (:issue:`28257`)
+- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
+-
 
 Missing
 ^^^^^^^

From 5ea41c6da3b561527000f70844b9e2b8f008aa55 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 23:47:44 +0100
Subject: [PATCH 11/22] Blacker

---
 pandas/tests/test_base.py | 2 +-
 pandas/util/testing.py    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 07c0ebef0750f..af6f87f9a8348 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -991,7 +991,7 @@ def test_bool_indexing(self, indexer_klass, indexer):
             tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx])
 
     def test_get_indexer_non_unique_dtype_mismatch(self):
-        indexes, missing = pd.Index(['A', 'B']).get_indexer_non_unique(pd.Index([0]))
+        indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
         tm.assert_numpy_array_equal(np.array([-1], dtype=np.int64), indexes)
         tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 7045d3ecfa7c9..af4962c9a0c55 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1598,8 +1598,7 @@ def makeCategoricalIndex(k=10, n=3, name=None, **kwargs):
     """ make a length k index or n categories """
     x = rands_array(nchars=4, size=n)
     return CategoricalIndex(
-        Categorical.from_codes(np.arange(k) % n, categories=x),
-        name=name, **kwargs
+        Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs
     )
 
 

From b23e408829c1df11c3732f91ce72b4f9a72971a5 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 3 Sep 2019 23:56:09 +0100
Subject: [PATCH 12/22] get_indexer_non_unique makes strange dtype choices

---
 pandas/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index af6f87f9a8348..cd02c37ba83c3 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -992,7 +992,7 @@ def test_bool_indexing(self, indexer_klass, indexer):
 
     def test_get_indexer_non_unique_dtype_mismatch(self):
         indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
-        tm.assert_numpy_array_equal(np.array([-1], dtype=np.int64), indexes)
+        tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
         tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)
 
 

From 06a6580d45f14f8cd923292912eda7a4c1188be5 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Wed, 4 Sep 2019 19:30:31 +0100
Subject: [PATCH 13/22] Address some review comments

---
 pandas/_libs/index.pyx                    |  2 +-
 pandas/core/indexes/base.py               |  8 ++--
 pandas/tests/indexing/test_categorical.py | 46 ++++++++++-------------
 pandas/tests/series/test_operators.py     | 31 ++++-----------
 pandas/tests/test_base.py                 |  1 +
 5 files changed, 32 insertions(+), 56 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 0599e9c26b57c..4ce7a6f43a527 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -281,7 +281,7 @@ cdef class IndexEngine:
         cdef:
             ndarray values, x
             ndarray[int64_t] result, missing
-            set stargets
+            set stargets, remaining_stargets
             dict d = {}
             object val
             int count = 0, count_missing = 0
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2967e51a285de..942aa82906272 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2490,13 +2490,11 @@ def _union(self, other, sort):
                 result.extend([x for x in rvals if x not in value_set])
         else:
             # find indexes of things in "other" that are not in "self"
-            try:
+            if self.is_unique:
                 indexer = self.get_indexer(other)
-            except InvalidIndexError:
-                # duplicates
-                indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
-            else:
                 indexer, = (indexer == -1).nonzero()
+            else:
+                indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
 
             if len(indexer) > 0:
                 other_diff = algos.take_nd(rvals, indexer, allow_fill=False)
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index 29e03246a5750..0d97680c37c29 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -46,18 +46,6 @@ def setup_method(self, method):
                 "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
             }
         ).set_index("B")
-        self.df5 = DataFrame(
-            {
-                "A": np.arange(3, dtype="int64"),
-                "B": Series(list("abc")).astype(CDT(list("cabe"))),
-            }
-        ).set_index("B")
-        self.df6 = DataFrame(
-            {
-                "A": np.arange(3, dtype="int64"),
-                "B": (Series([1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
-            }
-        ).set_index("B")
 
     def test_loc_scalar(self):
         result = self.df.loc["a"]
@@ -573,24 +561,30 @@ def test_read_only_source(self):
         assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
 
     def test_reindexing(self):
+        df = DataFrame(
+            {
+                "A": np.arange(3, dtype="int64"),
+                "B": Series(list("abc")).astype(CDT(list("cabe"))),
+            }
+        ).set_index("B")
 
         # reindexing
         # convert to a regular index
-        result = self.df5.reindex(["a", "b", "e"])
+        result = df.reindex(["a", "b", "e"])
         expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
             "B"
         )
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df5.reindex(["a", "b"])
+        result = df.reindex(["a", "b"])
         expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df5.reindex(["e"])
+        result = df.reindex(["e"])
         expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df5.reindex(["d"])
+        result = df.reindex(["d"])
         expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
@@ -598,34 +592,34 @@ def test_reindexing(self):
         # then return a Categorical
         cats = list("cabe")
 
-        result = self.df5.reindex(Categorical(["a", "e"], categories=cats))
+        result = df.reindex(Categorical(["a", "e"], categories=cats))
         expected = DataFrame(
             {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df5.reindex(Categorical(["a"], categories=cats))
+        result = df.reindex(Categorical(["a"], categories=cats))
         expected = DataFrame(
             {"A": [0], "B": Series(list("a")).astype(CDT(cats))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df5.reindex(["a", "b", "e"])
+        result = df.reindex(["a", "b", "e"])
         expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
             "B"
         )
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df5.reindex(["a", "b"])
+        result = df.reindex(["a", "b"])
         expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df5.reindex(["e"])
+        result = df.reindex(["e"])
         expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
         # give back the type of categorical that we received
-        result = self.df5.reindex(
+        result = df.reindex(
             Categorical(["a", "e"], categories=cats, ordered=True)
         )
         expected = DataFrame(
@@ -633,7 +627,7 @@ def test_reindexing(self):
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df5.reindex(Categorical(["a", "d"], categories=["a", "d"]))
+        result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
         expected = DataFrame(
             {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
         ).set_index("B")
@@ -647,11 +641,11 @@ def test_reindexing(self):
         # args NotImplemented ATM
         msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
         with pytest.raises(NotImplementedError, match=msg.format("method")):
-            self.df5.reindex(["a"], method="ffill")
+            df.reindex(["a"], method="ffill")
         with pytest.raises(NotImplementedError, match=msg.format("level")):
-            self.df5.reindex(["a"], level=1)
+            df.reindex(["a"], level=1)
         with pytest.raises(NotImplementedError, match=msg.format("limit")):
-            self.df5.reindex(["a"], limit=2)
+            df.reindex(["a"], limit=2)
 
     def test_loc_slice(self):
         # slicing
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index f2bdf643f09c3..44bdae6cd6165 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -274,41 +274,24 @@ def test_logical_ops_with_index(self, op):
         assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
-        "op",
+        "op, index_op",
         [
-            pytest.param(
-                ops.rand_,
-                marks=pytest.mark.xfail(
-                    reason="GH#22092 Index implementation returns Index",
-                    raises=AssertionError,
-                    strict=True,
-                ),
-            ),
-            pytest.param(
-                ops.ror_,
-                marks=pytest.mark.xfail(
-                    reason="GH#22092 Index implementation returns Index",
-                    raises=AssertionError,
-                    strict=True,
-                ),
-            ),
-            ops.rxor,
+            (ops.rand_, Index.intersection),
+            (ops.ror_,  Index.union),
+            (ops.rxor,  Index.symmetric_difference),
         ],
     )
-    def test_reversed_logical_ops_with_index(self, op):
+    def test_reversed_logical_ops_with_index(self, op, index_op):
         # GH#22092, GH#19792
         ser = Series([True, True, False, False])
         idx1 = Index([True, False, True, False])
         idx2 = Index([1, 0, 1, 0])
 
-        # symmetric_difference is only for rxor, but other 2 should fail
-        expected = idx1.symmetric_difference(ser)
-
+        expected = index_op(idx1, ser)
         result = op(ser, idx1)
         assert_index_equal(result, expected)
 
-        expected = idx2.symmetric_difference(ser)
-
+        expected = index_op(idx2, ser)
         result = op(ser, idx2)
         assert_index_equal(result, expected)
 
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index cd02c37ba83c3..bde9c61f6f9ff 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -991,6 +991,7 @@ def test_bool_indexing(self, indexer_klass, indexer):
             tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx])
 
     def test_get_indexer_non_unique_dtype_mismatch(self):
+        # GH 25459
         indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
         tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
         tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)

From ad573ef13ce77ab99a17f77a914eacf647c71675 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Wed, 4 Sep 2019 21:04:52 +0100
Subject: [PATCH 14/22] More blackening

---
 pandas/tests/indexing/test_categorical.py | 4 +---
 pandas/tests/series/test_operators.py     | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index 0d97680c37c29..005a9a24dc597 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -619,9 +619,7 @@ def test_reindexing(self):
         assert_frame_equal(result, expected, check_index_type=True)
 
         # give back the type of categorical that we received
-        result = df.reindex(
-            Categorical(["a", "e"], categories=cats, ordered=True)
-        )
+        result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True))
         expected = DataFrame(
             {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
         ).set_index("B")
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index 44bdae6cd6165..ec2853890b3de 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -277,8 +277,8 @@ def test_logical_ops_with_index(self, op):
         "op, index_op",
         [
             (ops.rand_, Index.intersection),
-            (ops.ror_,  Index.union),
-            (ops.rxor,  Index.symmetric_difference),
+            (ops.ror_, Index.union),
+            (ops.rxor, Index.symmetric_difference),
         ],
     )
     def test_reversed_logical_ops_with_index(self, op, index_op):

From e26026581308e4cb2094e22ace3c1bb7ddf912ec Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 10 Sep 2019 19:16:28 +0100
Subject: [PATCH 15/22] Doc changes

---
 doc/source/user_guide/advanced.rst | 10 ++++++----
 doc/source/whatsnew/v1.0.0.rst     |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 6c8e6dcbb1db9..5a86561fb5101 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -783,10 +783,12 @@ values **not** in the categories, similarly to how you can reindex **any** panda
 
 .. ipython:: python
 
-   df2.iloc[[0]].reindex(['a', 'e'])
-   df2.iloc[[0]].reindex(['a', 'e']).index
-   df2.iloc[[0]].reindex(pd.Categorical(['a', 'e'], categories=list('abe')))
-   df2.iloc[[0]].reindex(pd.Categorical(['a', 'e'], categories=list('abe'))).index
+   df3 = pd.DataFrame({'A': np.arange(3), 'B': pd.Series(list('abc')).astype('category')})
+   df3 = df3.set_index('B')
+   df3.reindex(['a', 'e'])
+   df3.reindex(['a', 'e']).index
+   df3.reindex(pd.Categorical(['a', 'e'], categories=list('abe')))
+   df3.reindex(pd.Categorical(['a', 'e'], categories=list('abe'))).index
 
 .. warning::
 
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 0980c3b640737..cdd9075eaab74 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -165,7 +165,7 @@ Categorical
 
 - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
 - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
-- For :class:`CategoricalIndex`, `DataFrame.reindex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`)
+- For :class:`CategoricalIndex`, :meth:`DataFrame.reindex` with a :class:`CategoricalIndex`, would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`)
 -
 
 

From 7df795a22ef7de1596bb8a0891757f5bfe8d0beb Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 10 Sep 2019 21:13:37 +0100
Subject: [PATCH 16/22] Docs docs

---
 doc/source/user_guide/advanced.rst | 3 ++-
 doc/source/whatsnew/v1.0.0.rst     | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 5a86561fb5101..7b185e2faa128 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -783,7 +783,8 @@ values **not** in the categories, similarly to how you can reindex **any** panda
 
 .. ipython:: python
 
-   df3 = pd.DataFrame({'A': np.arange(3), 'B': pd.Series(list('abc')).astype('category')})
+   df3 = pd.DataFrame({'A': np.arange(3),
+                       'B': pd.Series(list('abc')).astype('category')})
    df3 = df3.set_index('B')
    df3.reindex(['a', 'e'])
    df3.reindex(['a', 'e']).index
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index cdd9075eaab74..25bcfb0bcb1e9 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -165,7 +165,7 @@ Categorical
 
 - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
 - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
-- For :class:`CategoricalIndex`, :meth:`DataFrame.reindex` with a :class:`CategoricalIndex`, would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`)
+- :meth:`DataFrame.reindex` with a :class:`CategoricalIndex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`)
 -
 
 
@@ -229,7 +229,7 @@ Indexing
 - Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)
 - Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
 - Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
-- :meth:`Index.union` could fail when the LHS contained duplicates (:issue:`28257`)
+- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`)
 - :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
 -
 

From 5d3a861cac75bc3ecd790e63057d71f3b8c18499 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Wed, 25 Sep 2019 19:42:12 +0100
Subject: [PATCH 17/22] Address review comments

---
 pandas/core/indexes/base.py           |  2 +-
 pandas/tests/indexes/test_category.py | 12 +++----
 pandas/tests/series/test_operators.py | 45 +++++++++++++++++++++------
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 942aa82906272..3f3ba5224b8af 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2492,7 +2492,7 @@ def _union(self, other, sort):
             # find indexes of things in "other" that are not in "self"
             if self.is_unique:
                 indexer = self.get_indexer(other)
-                indexer, = (indexer == -1).nonzero()
+                indexer = (indexer == -1).nonzero()[0]
             else:
                 indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
 
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index f3743a579a049..221da492ddc63 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -600,24 +600,24 @@ def test_reindex_dtype(self):
 
     def test_reindex_duplicate_source(self):
         # See GH23963
-        c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
+        cat = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
         with pytest.raises(ValueError, match="duplicate axis"):
-            c._can_reindex(["a", "c"])
+            cat._can_reindex(["a", "c"])
 
         with pytest.raises(ValueError, match="duplicate axis"):
-            c._can_reindex(
+            cat._can_reindex(
                 CategoricalIndex(["a", "c"], categories=["a", "b", "c", "d"])
             )
 
     def test_reindex_duplicate_target(self):
         # See GH25459
-        c = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
-        res, indexer = c.reindex(["a", "c", "c"])
+        cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
+        res, indexer = cat.reindex(["a", "c", "c"])
         exp = Index(["a", "c", "c"], dtype="object")
         tm.assert_index_equal(res, exp, exact=True)
         tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
 
-        res, indexer = c.reindex(
+        res, indexer = cat.reindex(
             CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
         )
         exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index ec2853890b3de..d9b3fd27183ef 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -273,27 +273,54 @@ def test_logical_ops_with_index(self, op):
         result = op(ser, idx2)
         assert_series_equal(result, expected)
 
+    def test_reversed_xor_with_index_returns_index(self):
+        # GH#22092, GH#19792
+        ser = Series([True, True, False, False])
+        idx1 = Index([True, False, True, False])
+        idx2 = Index([1, 0, 1, 0])
+
+        expected = Index.symmetric_difference(idx1, ser)
+        result = idx1 ^ ser
+        assert_index_equal(result, expected)
+
+        expected = Index.symmetric_difference(idx2, ser)
+        result = idx2 ^ ser
+        assert_index_equal(result, expected)
+
     @pytest.mark.parametrize(
-        "op, index_op",
+        "op",
         [
-            (ops.rand_, Index.intersection),
-            (ops.ror_, Index.union),
-            (ops.rxor, Index.symmetric_difference),
+            pytest.param(
+                ops.rand_,
+                marks=pytest.mark.xfail(
+                    reason="GH#22092 Index __and__ returns Index intersection",
+                    raises=AssertionError,  
+                    strict=True,    
+                ),  
+            ),
+            pytest.param(   
+                ops.ror_,   
+                marks=pytest.mark.xfail(    
+                    reason="GH#22092 Index __or__ returns Index union",
+                    raises=AssertionError,   
+                    strict=True,    
+                ),  
+            ),
         ],
     )
-    def test_reversed_logical_ops_with_index(self, op, index_op):
+    def test_reversed_logical_op_with_index_returns_series(self, op):
         # GH#22092, GH#19792
         ser = Series([True, True, False, False])
         idx1 = Index([True, False, True, False])
         idx2 = Index([1, 0, 1, 0])
 
-        expected = index_op(idx1, ser)
+        expected = pd.Series(op(idx1.values, ser.values))
         result = op(ser, idx1)
-        assert_index_equal(result, expected)
+        assert_series_equal(result, expected)
 
-        expected = index_op(idx2, ser)
+        expected = pd.Series(op(idx2.values, ser.values))
         result = op(ser, idx2)
-        assert_index_equal(result, expected)
+        assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
         "op, expected",

From 0ee4f892be48117707d1a22c39e902453077f7c2 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Wed, 25 Sep 2019 19:49:34 +0100
Subject: [PATCH 18/22] Delete test that checks internal method

---
 pandas/tests/indexes/test_category.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 221da492ddc63..6ec7301c279f5 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -598,17 +598,6 @@ def test_reindex_dtype(self):
         tm.assert_index_equal(res, exp, exact=True)
         tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
 
-    def test_reindex_duplicate_source(self):
-        # See GH23963
-        cat = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
-        with pytest.raises(ValueError, match="duplicate axis"):
-            cat._can_reindex(["a", "c"])
-
-        with pytest.raises(ValueError, match="duplicate axis"):
-            cat._can_reindex(
-                CategoricalIndex(["a", "c"], categories=["a", "b", "c", "d"])
-            )
-
     def test_reindex_duplicate_target(self):
         # See GH25459
         cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])

From f07faa3b57001d508476f42b7dacb509991de89e Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Wed, 25 Sep 2019 20:46:42 +0100
Subject: [PATCH 19/22] Strip trailing wspace

---
 pandas/tests/series/test_operators.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index d9b3fd27183ef..2b5479e322971 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -294,17 +294,17 @@ def test_reversed_xor_with_index_returns_index(self):
                 ops.rand_,
                 marks=pytest.mark.xfail(
                     reason="GH#22092 Index __and__ returns Index intersection",
-                    raises=AssertionError,  
-                    strict=True,    
-                ),  
+                    raises=AssertionError,
+                    strict=True,
+                ),
             ),
-            pytest.param(   
-                ops.ror_,   
-                marks=pytest.mark.xfail(    
+            pytest.param(
+                ops.ror_,
+                marks=pytest.mark.xfail(
                     reason="GH#22092 Index __or__ returns Index union",
-                    raises=AssertionError,   
-                    strict=True,    
-                ),  
+                    raises=AssertionError,
+                    strict=True,
+                ),
             ),
         ],
     )

From 2b05a5553ae01eec6883de9a0fa4ba50b79234a2 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Wed, 25 Sep 2019 20:46:54 +0100
Subject: [PATCH 20/22] Split docs into two blocks??

---
 doc/source/user_guide/advanced.rst | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 7b185e2faa128..d8dc556b35b81 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -796,17 +796,15 @@ values **not** in the categories, similarly to how you can reindex **any** panda
    Reshaping and Comparison operations on a ``CategoricalIndex`` must have the same categories
    or a ``TypeError`` will be raised.
 
-   .. code-block:: ipython
+   .. ipython:: python
 
-    In [9]: df3 = pd.DataFrame({'A': np.arange(6), 'B': pd.Series(list('aabbca')).astype('category')})
+      df3 = pd.DataFrame({'A': np.arange(6), 'B': pd.Series(list('aabbca')).astype('category')})
+      df3 = df3.set_index('B')
+      df3.index
+   
+   .. ipython:: python
 
-    In [11]: df3 = df3.set_index('B')
-
-    In [11]: df3.index
-    Out[11]: CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['a', 'b', 'c'], ordered=False, name='B', dtype='category')
-
-    In [12]: pd.concat([df2, df3])
-    TypeError: categories must match existing categories when appending
+      pd.concat([df2, df3])
 
 .. _indexing.rangeindex:
 

From 4ad347c6c2b1329e314aaea139b5f82ab2afd361 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Wed, 25 Sep 2019 21:26:21 +0100
Subject: [PATCH 21/22] Check failures

---
 doc/source/user_guide/advanced.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index d8dc556b35b81..2e27e2f7d2e73 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -798,10 +798,11 @@ values **not** in the categories, similarly to how you can reindex **any** panda
 
    .. ipython:: python
 
-      df3 = pd.DataFrame({'A': np.arange(6), 'B': pd.Series(list('aabbca')).astype('category')})
+      df3 = pd.DataFrame({'A': np.arange(6),
+                          'B': pd.Series(list('aabbca')).astype('category')})
       df3 = df3.set_index('B')
       df3.index
-   
+
    .. ipython:: python
 
       pd.concat([df2, df3])

From ff13dea80cd106af3cbaee9b494e4b5ab4492a17 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Thu, 26 Sep 2019 00:23:23 +0100
Subject: [PATCH 22/22] Maybe this is better docs?

---
 doc/source/user_guide/advanced.rst | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 2e27e2f7d2e73..4949dd580414f 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -786,6 +786,10 @@ values **not** in the categories, similarly to how you can reindex **any** panda
    df3 = pd.DataFrame({'A': np.arange(3),
                        'B': pd.Series(list('abc')).astype('category')})
    df3 = df3.set_index('B')
+   df3
+
+.. ipython:: python
+
    df3.reindex(['a', 'e'])
    df3.reindex(['a', 'e']).index
    df3.reindex(pd.Categorical(['a', 'e'], categories=list('abe')))
@@ -798,14 +802,22 @@ values **not** in the categories, similarly to how you can reindex **any** panda
 
    .. ipython:: python
 
-      df3 = pd.DataFrame({'A': np.arange(6),
-                          'B': pd.Series(list('aabbca')).astype('category')})
-      df3 = df3.set_index('B')
-      df3.index
+      df4 = pd.DataFrame({'A': np.arange(2),
+                          'B': list('ba')})
+      df4['B'] = df4['B'].astype(CategoricalDtype(list('ab')))
+      df4 = df4.set_index('B')
+      df4.index
 
-   .. ipython:: python
+      df5 = pd.DataFrame({'A': np.arange(2),
+                          'B': list('bc')})
+      df5['B'] = df5['B'].astype(CategoricalDtype(list('bc')))
+      df5 = df5.set_index('B')
+      df5.index
+
+   .. code-block:: ipython
 
-      pd.concat([df2, df3])
+      In [1]: pd.concat([df4, df5])
+      TypeError: categories must match existing categories when appending
 
 .. _indexing.rangeindex: