From b84e2c908e88fbdcf963cdae946986e467e76818 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Tue, 18 Feb 2020 11:03:53 +0900
Subject: [PATCH 1/9] Implement Series.combine_first

---
 databricks/koalas/missing/series.py           |  1 -
 databricks/koalas/series.py                   | 54 +++++++++++++++++++
 .../koalas/tests/test_ops_on_diff_frames.py   | 38 +++++++++++++
 docs/source/reference/series.rst              |  1 +
 4 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/databricks/koalas/missing/series.py b/databricks/koalas/missing/series.py
index 677a66aac8..c6bdcef10b 100644
--- a/databricks/koalas/missing/series.py
+++ b/databricks/koalas/missing/series.py
@@ -56,7 +56,6 @@ class _MissingPandasLikeSeries(object):
     between_time = unsupported_function('between_time')
     bfill = unsupported_function('bfill')
     combine = unsupported_function('combine')
-    combine_first = unsupported_function('combine_first')
     cov = unsupported_function('cov')
     divmod = unsupported_function('divmod')
     dot = unsupported_function('dot')
diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
index b4772239ac..512d192a6e 100644
--- a/databricks/koalas/series.py
+++ b/databricks/koalas/series.py
@@ -4139,6 +4139,60 @@ def pct_change(self, periods=1):
 
         return self._with_new_scol((scol - prev_row) / prev_row)
 
+    def combine_first(self, other):
+        """
+        Combine Series values, choosing the calling Series's values first.
+
+        .. note:: This API internally performs a join operation which can be pretty expensive
+            in general. if you want to use though, set `compute.ops_on_diff_frames` to True.
+
+        Parameters
+        ----------
+        other : Series
+            The value(s) to be combined with the `Series`.
+
+        Returns
+        -------
+        Series
+            The result of combining the Series with the other object.
+
+        See Also
+        --------
+        Series.combine : Perform elementwise operation on two Series
+            using a given function.
+
+        Notes
+        -----
+        Result index will be the union of the two indexes.
+
+        Examples
+        --------
+        >>> from databricks.koalas.config import set_option, reset_option
+        >>> set_option("compute.ops_on_diff_frames", True)
+        >>> s1 = ks.Series([1, np.nan])
+        >>> s2 = ks.Series([3, 4])
+        >>> s1.combine_first(s2)
+        0    1.0
+        1    4.0
+        Name: 0, dtype: float64
+
+        >>> reset_option("compute.ops_on_diff_frames")
+        """
+        if not isinstance(self, ks.Series):
+            raise ValueError("`combine_first` only allows `Series` for parameter `other`")
+        this = '__this_0'
+        that = '__that_0'
+        combined = combine_frames(self.to_frame(), other)
+        index_scols = combined._internal.index_scols
+        sdf = combined._sdf
+        # If `self` has missing value, use value of `other`
+        cond = F.when(sdf[this].isNull(), sdf[that]).otherwise(sdf[this])
+        sdf = sdf.select(*index_scols, cond.alias(self.name))
+        internal = _InternalFrame(
+            sdf=sdf,
+            index_map=self._internal.index_map)
+        return _col(ks.DataFrame(internal))
+
     def _cum(self, func, skipna, part_cols=()):
         # This is used to cummin, cummax, cumsum, etc.
 
diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py
index a3eb8a773b..38be269f6b 100644
--- a/databricks/koalas/tests/test_ops_on_diff_frames.py
+++ b/databricks/koalas/tests/test_ops_on_diff_frames.py
@@ -637,6 +637,33 @@ def test_multi_index_column_assignment_frame(self):
         with self.assertRaisesRegex(KeyError, 'Key length \\(3\\) exceeds index depth \\(2\\)'):
             kdf[('1', '2', '3')] = ks.Series([100, 200, 300, 200])
 
+    def test_combine_first(self):
+        # Series.combine_first
+        kser1 = ks.Series({'falcon': 330.0, 'eagle': 160.0})
+        kser2 = ks.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
+        pser1 = kser1.to_pandas()
+        pser2 = kser2.to_pandas()
+
+        self.assert_eq(repr(kser1.combine_first(kser2).sort_index()),
+                       repr(pser1.combine_first(pser2).sort_index()))
+
+        # MultiIndex
+        midx1 = pd.MultiIndex([['lama', 'cow', 'falcon', 'koala'],
+                               ['speed', 'weight', 'length', 'power']],
+                              [[0, 3, 1, 1, 1, 2, 2, 2],
+                               [0, 2, 0, 3, 2, 0, 1, 3]])
+        midx2 = pd.MultiIndex([['lama', 'cow', 'falcon'],
+                               ['speed', 'weight', 'length']],
+                              [[0, 0, 0, 1, 1, 1, 2, 2, 2],
+                               [0, 1, 2, 0, 1, 2, 0, 1, 2]])
+        kser1 = ks.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx1)
+        kser2 = ks.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx2)
+        pser1 = kser1.to_pandas()
+        pser2 = kser2.to_pandas()
+
+        self.assert_eq(repr(kser1.combine_first(kser2).sort_index()),
+                       repr(pser1.combine_first(pser2).sort_index()))
+
 
 class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils):
 
@@ -738,3 +765,14 @@ def test_mask(self):
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
             self.assert_eq(repr(pdf1.mask(pdf2 > -250)),
                            repr(kdf1.mask(kdf2 > -250).sort_index()))
+
+    def test_combine_first(self):
+        # Series.combine_first
+        kser1 = ks.Series({'falcon': 330.0, 'eagle': 160.0})
+        kser2 = ks.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
+        pser1 = kser1.to_pandas()
+        pser2 = kser2.to_pandas()
+
+        with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
+            self.assert_eq(repr(pser1.combine_first(pser2)),
+                           repr(kser1.combine_first(kser2).sort_index()))
diff --git a/docs/source/reference/series.rst b/docs/source/reference/series.rst
index bab3058a2c..e1a8687b82 100644
--- a/docs/source/reference/series.rst
+++ b/docs/source/reference/series.rst
@@ -78,6 +78,7 @@ Binary operator functions
    Series.rmod
    Series.floordiv
    Series.rfloordiv
+   Series.combine_first
    Series.lt
    Series.gt
    Series.le

From 06ed257a29e0ab7b55b676e58342f60663119c10 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Tue, 18 Feb 2020 11:07:49 +0900
Subject: [PATCH 2/9] Add failure test

---
 databricks/koalas/series.py                        | 2 +-
 databricks/koalas/tests/test_ops_on_diff_frames.py | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
index 512d192a6e..67b5c9b950 100644
--- a/databricks/koalas/series.py
+++ b/databricks/koalas/series.py
@@ -4178,7 +4178,7 @@ def combine_first(self, other):
 
         >>> reset_option("compute.ops_on_diff_frames")
         """
-        if not isinstance(self, ks.Series):
+        if not isinstance(other, ks.Series):
             raise ValueError("`combine_first` only allows `Series` for parameter `other`")
         this = '__this_0'
         that = '__that_0'
diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py
index 38be269f6b..561784bd97 100644
--- a/databricks/koalas/tests/test_ops_on_diff_frames.py
+++ b/databricks/koalas/tests/test_ops_on_diff_frames.py
@@ -646,6 +646,9 @@ def test_combine_first(self):
 
         self.assert_eq(repr(kser1.combine_first(kser2).sort_index()),
                        repr(pser1.combine_first(pser2).sort_index()))
+        with self.assertRaisesRegex(ValueError,
+                                    "`combine_first` only allows `Series` for parameter `other`"):
+            kser1.combine_first(50)
 
         # MultiIndex
         midx1 = pd.MultiIndex([['lama', 'cow', 'falcon', 'koala'],

From 910a8d288397abdeb54694609e1c9f7b9388363e Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Sun, 1 Mar 2020 16:28:49 +0900
Subject: [PATCH 3/9] Applying Black

---
 databricks/koalas/series.py                   |  8 ++-
 .../koalas/tests/test_ops_on_diff_frames.py   | 49 ++++++++++---------
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
index f3828fcfdd..550457aec8 100644
--- a/databricks/koalas/series.py
+++ b/databricks/koalas/series.py
@@ -4365,17 +4365,15 @@ def combine_first(self, other):
         """
         if not isinstance(other, ks.Series):
             raise ValueError("`combine_first` only allows `Series` for parameter `other`")
-        this = '__this_0'
-        that = '__that_0'
+        this = "__this_0"
+        that = "__that_0"
         combined = combine_frames(self.to_frame(), other)
         index_scols = combined._internal.index_scols
         sdf = combined._sdf
         # If `self` has missing value, use value of `other`
         cond = F.when(sdf[this].isNull(), sdf[that]).otherwise(sdf[this])
         sdf = sdf.select(*index_scols, cond.alias(self.name))
-        internal = _InternalFrame(
-            sdf=sdf,
-            index_map=self._internal.index_map)
+        internal = _InternalFrame(sdf=sdf, index_map=self._internal.index_map)
         return _col(ks.DataFrame(internal))
 
     def _cum(self, func, skipna, part_cols=()):
diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py
index 0a57ac5cfd..ce348ad0d2 100644
--- a/databricks/koalas/tests/test_ops_on_diff_frames.py
+++ b/databricks/koalas/tests/test_ops_on_diff_frames.py
@@ -649,33 +649,38 @@ def test_multi_index_column_assignment_frame(self):
 
     def test_combine_first(self):
         # Series.combine_first
-        kser1 = ks.Series({'falcon': 330.0, 'eagle': 160.0})
-        kser2 = ks.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
+        kser1 = ks.Series({"falcon": 330.0, "eagle": 160.0})
+        kser2 = ks.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
         pser1 = kser1.to_pandas()
         pser2 = kser2.to_pandas()
 
-        self.assert_eq(repr(kser1.combine_first(kser2).sort_index()),
-                       repr(pser1.combine_first(pser2).sort_index()))
-        with self.assertRaisesRegex(ValueError,
-                                    "`combine_first` only allows `Series` for parameter `other`"):
+        self.assert_eq(
+            repr(kser1.combine_first(kser2).sort_index()),
+            repr(pser1.combine_first(pser2).sort_index()),
+        )
+        with self.assertRaisesRegex(
+            ValueError, "`combine_first` only allows `Series` for parameter `other`"
+        ):
             kser1.combine_first(50)
 
         # MultiIndex
-        midx1 = pd.MultiIndex([['lama', 'cow', 'falcon', 'koala'],
-                               ['speed', 'weight', 'length', 'power']],
-                              [[0, 3, 1, 1, 1, 2, 2, 2],
-                               [0, 2, 0, 3, 2, 0, 1, 3]])
-        midx2 = pd.MultiIndex([['lama', 'cow', 'falcon'],
-                               ['speed', 'weight', 'length']],
-                              [[0, 0, 0, 1, 1, 1, 2, 2, 2],
-                               [0, 1, 2, 0, 1, 2, 0, 1, 2]])
+        midx1 = pd.MultiIndex(
+            [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", "power"]],
+            [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
+        )
+        midx2 = pd.MultiIndex(
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        )
         kser1 = ks.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx1)
         kser2 = ks.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx2)
         pser1 = kser1.to_pandas()
         pser2 = kser2.to_pandas()
 
-        self.assert_eq(repr(kser1.combine_first(kser2).sort_index()),
-                       repr(pser1.combine_first(pser2).sort_index()))
+        self.assert_eq(
+            repr(kser1.combine_first(kser2).sort_index()),
+            repr(pser1.combine_first(pser2).sort_index()),
+        )
 
 
 class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils):
@@ -775,16 +780,16 @@ def test_mask(self):
         kdf2 = ks.from_pandas(pdf2)
 
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
-            self.assert_eq(repr(pdf1.mask(pdf2 > -250)),
-                           repr(kdf1.mask(kdf2 > -250).sort_index()))
+            self.assert_eq(repr(pdf1.mask(pdf2 > -250)), repr(kdf1.mask(kdf2 > -250).sort_index()))
 
     def test_combine_first(self):
         # Series.combine_first
-        kser1 = ks.Series({'falcon': 330.0, 'eagle': 160.0})
-        kser2 = ks.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
+        kser1 = ks.Series({"falcon": 330.0, "eagle": 160.0})
+        kser2 = ks.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
         pser1 = kser1.to_pandas()
         pser2 = kser2.to_pandas()
 
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
-            self.assert_eq(repr(pser1.combine_first(pser2)),
-                           repr(kser1.combine_first(kser2).sort_index()))
+            self.assert_eq(
+                repr(pser1.combine_first(pser2)), repr(kser1.combine_first(kser2).sort_index())
+            )

From baa87e8d37dba85686c56d1828c9cceac3dce934 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Thu, 5 Mar 2020 21:44:22 +0900
Subject: [PATCH 4/9] Adding case when Series come from same DataFrame

---
 databricks/koalas/series.py            | 21 ++++++++++-----------
 databricks/koalas/tests/test_series.py | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
index 550457aec8..911158eaaf 100644
--- a/databricks/koalas/series.py
+++ b/databricks/koalas/series.py
@@ -35,7 +35,7 @@
 from pyspark.sql.window import Window
 
 from databricks import koalas as ks  # For running doctests and reference resolution in PyCharm.
-from databricks.koalas.config import get_option
+from databricks.koalas.config import get_option, option_context
 from databricks.koalas.base import IndexOpsMixin
 from databricks.koalas.exceptions import SparkPandasIndexingError
 from databricks.koalas.frame import DataFrame
@@ -4328,9 +4328,6 @@ def combine_first(self, other):
         """
         Combine Series values, choosing the calling Series's values first.
 
-        .. note:: This API internally performs a join operation which can be pretty expensive
-            in general. if you want to use though, set `compute.ops_on_diff_frames` to True.
-
         Parameters
         ----------
         other : Series
@@ -4352,22 +4349,24 @@ def combine_first(self, other):
 
         Examples
         --------
-        >>> from databricks.koalas.config import set_option, reset_option
-        >>> set_option("compute.ops_on_diff_frames", True)
         >>> s1 = ks.Series([1, np.nan])
         >>> s2 = ks.Series([3, 4])
         >>> s1.combine_first(s2)
         0    1.0
         1    4.0
         Name: 0, dtype: float64
-
-        >>> reset_option("compute.ops_on_diff_frames")
         """
         if not isinstance(other, ks.Series):
             raise ValueError("`combine_first` only allows `Series` for parameter `other`")
-        this = "__this_0"
-        that = "__that_0"
-        combined = combine_frames(self.to_frame(), other)
+        if self._kdf is other._kdf:
+            this = self.name
+            that = other.name
+            combined = self._kdf
+        else:
+            this = "__this_{}".format(self.name)
+            that = "__that_{}".format(other.name)
+            with option_context("compute.ops_on_diff_frames", True):
+                combined = combine_frames(self.to_frame(), other)
         index_scols = combined._internal.index_scols
         sdf = combined._sdf
         # If `self` has missing value, use value of `other`
diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py
index b41550c027..890fef40c5 100644
--- a/databricks/koalas/tests/test_series.py
+++ b/databricks/koalas/tests/test_series.py
@@ -1283,3 +1283,20 @@ def test_axes(self):
         kser = ks.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
         pser = kser.to_pandas()
         self.assert_list_eq(kser.axes, pser.axes)
+
+    def test_combine_first(self):
+        kdf = ks.DataFrame(
+            {
+                "A": pd.Series({"falcon": 330.0, "eagle": 160.0}),
+                "B": pd.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0}),
+            }
+        )
+        kser1 = kdf.A
+        kser2 = kdf.B
+        pser1 = kser1.to_pandas()
+        pser2 = kser2.to_pandas()
+
+        self.assert_eq(
+            repr(kser1.combine_first(kser2).sort_index()),
+            repr(pser1.combine_first(pser2).sort_index()),
+        )

From 53cb1686998be79c4f06c3c00fc554da22358d06 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Fri, 6 Mar 2020 02:48:57 +0900
Subject: [PATCH 5/9] Move test from ops_on_diff to series

---
 .../koalas/tests/test_ops_on_diff_frames.py   | 47 -------------------
 databricks/koalas/tests/test_series.py        | 38 ++++++++++++++-
 2 files changed, 36 insertions(+), 49 deletions(-)

diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py
index e11ad30543..6537246f6c 100644
--- a/databricks/koalas/tests/test_ops_on_diff_frames.py
+++ b/databricks/koalas/tests/test_ops_on_diff_frames.py
@@ -647,41 +647,6 @@ def test_multi_index_column_assignment_frame(self):
         with self.assertRaisesRegex(KeyError, "Key length \\(3\\) exceeds index depth \\(2\\)"):
             kdf[("1", "2", "3")] = ks.Series([100, 200, 300, 200])
 
-    def test_combine_first(self):
-        # Series.combine_first
-        kser1 = ks.Series({"falcon": 330.0, "eagle": 160.0})
-        kser2 = ks.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
-        pser1 = kser1.to_pandas()
-        pser2 = kser2.to_pandas()
-
-        self.assert_eq(
-            repr(kser1.combine_first(kser2).sort_index()),
-            repr(pser1.combine_first(pser2).sort_index()),
-        )
-        with self.assertRaisesRegex(
-            ValueError, "`combine_first` only allows `Series` for parameter `other`"
-        ):
-            kser1.combine_first(50)
-
-        # MultiIndex
-        midx1 = pd.MultiIndex(
-            [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", "power"]],
-            [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
-        )
-        midx2 = pd.MultiIndex(
-            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
-            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
-        )
-        kser1 = ks.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx1)
-        kser2 = ks.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx2)
-        pser1 = kser1.to_pandas()
-        pser2 = kser2.to_pandas()
-
-        self.assert_eq(
-            repr(kser1.combine_first(kser2).sort_index()),
-            repr(pser1.combine_first(pser2).sort_index()),
-        )
-
     def test_to_series_comparison(self):
         kidx1 = ks.Index([1, 2, 3, 4, 5])
         kidx2 = ks.Index([1, 2, 3, 4, 5])
@@ -792,15 +757,3 @@ def test_mask(self):
 
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
             self.assert_eq(repr(pdf1.mask(pdf2 > -250)), repr(kdf1.mask(kdf2 > -250).sort_index()))
-
-    def test_combine_first(self):
-        # Series.combine_first
-        kser1 = ks.Series({"falcon": 330.0, "eagle": 160.0})
-        kser2 = ks.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
-        pser1 = kser1.to_pandas()
-        pser2 = kser2.to_pandas()
-
-        with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
-            self.assert_eq(
-                repr(pser1.combine_first(pser2)), repr(kser1.combine_first(kser2).sort_index())
-            )
diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py
index 8f4a304416..fc53b9ac18 100644
--- a/databricks/koalas/tests/test_series.py
+++ b/databricks/koalas/tests/test_series.py
@@ -1279,10 +1279,44 @@ def test_axes(self):
         self.assert_list_eq(kser.axes, pser.axes)
 
     def test_combine_first(self):
+        kser1 = ks.Series({"falcon": 330.0, "eagle": 160.0})
+        kser2 = ks.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
+        pser1 = kser1.to_pandas()
+        pser2 = kser2.to_pandas()
+
+        self.assert_eq(
+            repr(kser1.combine_first(kser2).sort_index()),
+            repr(pser1.combine_first(pser2).sort_index()),
+        )
+        with self.assertRaisesRegex(
+            ValueError, "`combine_first` only allows `Series` for parameter `other`"
+        ):
+            kser1.combine_first(50)
+
+        # MultiIndex
+        midx1 = pd.MultiIndex(
+            [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", "power"]],
+            [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
+        )
+        midx2 = pd.MultiIndex(
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        )
+        kser1 = ks.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx1)
+        kser2 = ks.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx2)
+        pser1 = kser1.to_pandas()
+        pser2 = kser2.to_pandas()
+
+        self.assert_eq(
+            repr(kser1.combine_first(kser2).sort_index()),
+            repr(pser1.combine_first(pser2).sort_index()),
+        )
+
+        # Series come from same DataFrame
         kdf = ks.DataFrame(
             {
-                "A": pd.Series({"falcon": 330.0, "eagle": 160.0}),
-                "B": pd.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0}),
+                "A": {"falcon": 330.0, "eagle": 160.0},
+                "B": {"falcon": 345.0, "eagle": 200.0, "duck": 30.0},
             }
         )
         kser1 = kdf.A

From f134976456c3d5970e043ef97d4b665b836d15d4 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Fri, 6 Mar 2020 03:48:31 +0900
Subject: [PATCH 6/9] Empty commit for build test


From 3551c678fbeaa1aa9854907cab0980c5f9014356 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Mon, 16 Mar 2020 11:55:39 +0900
Subject: [PATCH 7/9] scols -> spark_columns

---
 databricks/koalas/series.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
index 4988f974ce..859c79cc08 100644
--- a/databricks/koalas/series.py
+++ b/databricks/koalas/series.py
@@ -4378,11 +4378,11 @@ def combine_first(self, other):
             that = "__that_{}".format(other.name)
             with option_context("compute.ops_on_diff_frames", True):
                 combined = combine_frames(self.to_frame(), other)
-        index_scols = combined._internal.index_scols
+        index_spark_columns = combined._internal.index_spark_columns
         sdf = combined._sdf
         # If `self` has missing value, use value of `other`
         cond = F.when(sdf[this].isNull(), sdf[that]).otherwise(sdf[this])
-        sdf = sdf.select(*index_scols, cond.alias(self.name))
+        sdf = sdf.select(*index_spark_columns, cond.alias(self.name))
         internal = _InternalFrame(sdf=sdf, index_map=self._internal.index_map)
         return _col(ks.DataFrame(internal))
 

From d156f367f189a59e39ffcb293b696083a1e383fb Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Mon, 16 Mar 2020 13:19:53 +0900
Subject: [PATCH 8/9] Rebase to Master

---
 databricks/koalas/series.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
index 859c79cc08..93fe8eb5f4 100644
--- a/databricks/koalas/series.py
+++ b/databricks/koalas/series.py
@@ -4378,12 +4378,12 @@ def combine_first(self, other):
             that = "__that_{}".format(other.name)
             with option_context("compute.ops_on_diff_frames", True):
                 combined = combine_frames(self.to_frame(), other)
-        index_spark_columns = combined._internal.index_spark_columns
+        index_scols = combined._internal.index_spark_columns
         sdf = combined._sdf
         # If `self` has missing value, use value of `other`
         cond = F.when(sdf[this].isNull(), sdf[that]).otherwise(sdf[this])
-        sdf = sdf.select(*index_spark_columns, cond.alias(self.name))
-        internal = _InternalFrame(sdf=sdf, index_map=self._internal.index_map)
+        sdf = sdf.select(*index_scols, cond.alias(self.name))
+        internal = _InternalFrame(spark_frame=sdf, index_map=self._internal.index_map)
         return _col(ks.DataFrame(internal))
 
     def dot(self, other):

From c4fb5d0eb920dcf3702dc014423c9f9783696101 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Mon, 16 Mar 2020 15:28:38 +0900
Subject: [PATCH 9/9] fix all comments

---
 databricks/koalas/series.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
index 93fe8eb5f4..3ef7219d39 100644
--- a/databricks/koalas/series.py
+++ b/databricks/koalas/series.py
@@ -4378,12 +4378,20 @@ def combine_first(self, other):
             that = "__that_{}".format(other.name)
             with option_context("compute.ops_on_diff_frames", True):
                 combined = combine_frames(self.to_frame(), other)
-        index_scols = combined._internal.index_spark_columns
         sdf = combined._sdf
         # If `self` has missing value, use value of `other`
         cond = F.when(sdf[this].isNull(), sdf[that]).otherwise(sdf[this])
-        sdf = sdf.select(*index_scols, cond.alias(self.name))
-        internal = _InternalFrame(spark_frame=sdf, index_map=self._internal.index_map)
+        # If `self` and `other` come from same frame, the anchor should be kept
+        if self._kdf is other._kdf:
+            return self._with_new_scol(cond)
+        index_scols = combined._internal.index_spark_columns
+        sdf = sdf.select(*index_scols, cond.alias(self.name)).distinct()
+        internal = _InternalFrame(
+            spark_frame=sdf,
+            index_map=self._internal.index_map,
+            column_labels=self._internal.column_labels,
+            column_label_names=self._internal.column_label_names,
+        )
         return _col(ks.DataFrame(internal))
 
     def dot(self, other):