From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqi@kth.se>
Date: Mon, 3 Dec 2018 17:43:52 +0100
Subject: [PATCH 01/39] remove \n from docstring

---
 pandas/core/arrays/datetimes.py  | 26 +++++++++++++-------------
 pandas/core/arrays/timedeltas.py | 16 ++++++++--------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index cfe3afcf3730a..b3df505d56d78 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -82,7 +82,7 @@ def f(self):
         return result
 
     f.__name__ = name
-    f.__doc__ = docstring
+    f.__doc__ = "\n{}\n".format(docstring)
     return property(f)
 
 
@@ -1072,19 +1072,19 @@ def date(self):
 
         return tslib.ints_to_pydatetime(timestamps, box="date")
 
-    year = _field_accessor('year', 'Y', "\n The year of the datetime\n")
+    year = _field_accessor('year', 'Y', "The year of the datetime")
     month = _field_accessor('month', 'M',
-                            "\n The month as January=1, December=12 \n")
-    day = _field_accessor('day', 'D', "\nThe days of the datetime\n")
-    hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n")
-    minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n")
-    second = _field_accessor('second', 's', "\nThe seconds of the datetime\n")
+                            "The month as January=1, December=12")
+    day = _field_accessor('day', 'D', "The days of the datetime")
+    hour = _field_accessor('hour', 'h', "The hours of the datetime")
+    minute = _field_accessor('minute', 'm', "The minutes of the datetime")
+    second = _field_accessor('second', 's', "The seconds of the datetime")
     microsecond = _field_accessor('microsecond', 'us',
-                                  "\nThe microseconds of the datetime\n")
+                                  "The microseconds of the datetime")
     nanosecond = _field_accessor('nanosecond', 'ns',
-                                 "\nThe nanoseconds of the datetime\n")
+                                 "The nanoseconds of the datetime")
     weekofyear = _field_accessor('weekofyear', 'woy',
-                                 "\nThe week ordinal of the year\n")
+                                 "The week ordinal of the year")
     week = weekofyear
     _dayofweek_doc = """
     The day of the week with Monday=0, Sunday=6.
@@ -1129,12 +1129,12 @@ def date(self):
         "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0")
 
     dayofyear = _field_accessor('dayofyear', 'doy',
-                                "\nThe ordinal day of the year\n")
-    quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n")
+                                "The ordinal day of the year")
+    quarter = _field_accessor('quarter', 'q', "The quarter of the date")
     days_in_month = _field_accessor(
         'days_in_month',
         'dim',
-        "\nThe number of days in the month\n")
+        "The number of days in the month")
     daysinmonth = days_in_month
     _is_month_doc = """
         Indicates whether the date is the {first_or_last} day of the month.
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 830283d31a929..4afc9f5483c2a 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -59,7 +59,7 @@ def f(self):
         return result
 
     f.__name__ = name
-    f.__doc__ = docstring
+    f.__doc__ = "\n{}\n".format(docstring)
     return property(f)
 
 
@@ -684,16 +684,16 @@ def to_pytimedelta(self):
         return tslibs.ints_to_pytimedelta(self.asi8)
 
     days = _field_accessor("days", "days",
-                           "\nNumber of days for each element.\n")
+                           "Number of days for each element.")
     seconds = _field_accessor("seconds", "seconds",
-                              "\nNumber of seconds (>= 0 and less than 1 day) "
-                              "for each element.\n")
+                              "Number of seconds (>= 0 and less than 1 day) "
+                              "for each element.")
     microseconds = _field_accessor("microseconds", "microseconds",
-                                   "\nNumber of microseconds (>= 0 and less "
-                                   "than 1 second) for each element.\n")
+                                   "Number of microseconds (>= 0 and less "
+                                   "than 1 second) for each element.")
     nanoseconds = _field_accessor("nanoseconds", "nanoseconds",
-                                  "\nNumber of nanoseconds (>= 0 and less "
-                                  "than 1 microsecond) for each element.\n")
+                                  "Number of nanoseconds (>= 0 and less "
+                                  "than 1 microsecond) for each element.")
 
     @property
     def components(self):

From dea38f24c0067ae3fe9484b837c9649714213bba Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 14 Jan 2020 21:26:31 +0100
Subject: [PATCH 02/39] fix issue 17038

---
 pandas/core/reshape/pivot.py       |  4 +++-
 pandas/tests/reshape/test_pivot.py | 20 ++++++++++++++------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index b443ba142369c..9743d90f4dd04 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -117,7 +117,9 @@ def pivot_table(
                 agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
 
     table = agged
-    if table.index.nlevels > 1:
+
+    # GH 17038, this check should only happen if index is specified
+    if table.index.nlevels > 1 and index:
         # Related GH #17123
         # If index_names are integers, determine whether the integers refer
         # to the level position or name.
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 743fc50c87e96..46a05123c9fdd 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -896,12 +896,6 @@ def _check_output(
             totals = table.loc[("All", ""), value_col]
             assert totals == self.data[value_col].mean()
 
-        # no rows
-        rtable = self.data.pivot_table(
-            columns=["AA", "BB"], margins=True, aggfunc=np.mean
-        )
-        assert isinstance(rtable, Series)
-
         table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
         for item in ["DD", "EE", "FF"]:
             totals = table.loc[("All", ""), item]
@@ -972,6 +966,20 @@ def test_pivot_integer_columns(self):
 
         tm.assert_frame_equal(table, table2, check_names=False)
 
+    @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
+    def test_pivot_table_multiindex_only(self, cols):
+        # GH 17038
+        df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]})
+
+        result = df2.pivot_table(values="v", columns=cols)
+        expected = DataFrame(
+            [[4, 5, 6]],
+            columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
+            index=Index(["v"]),
+        )
+
+        tm.assert_frame_equal(result, expected)
+
     def test_pivot_no_level_overlap(self):
         # GH #1181
 

From cd9e7ac3f31ffaf95cd628863df911dea9fa1248 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 14 Jan 2020 21:29:43 +0100
Subject: [PATCH 03/39] revert change

---
 pandas/core/reshape/pivot.py       |  3 +--
 pandas/tests/reshape/test_pivot.py | 20 ++++++--------------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 9743d90f4dd04..a7cdbb0da7a4e 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -118,8 +118,7 @@ def pivot_table(
 
     table = agged
 
-    # GH 17038, this check should only happen if index is specified
-    if table.index.nlevels > 1 and index:
+    if table.index.nlevels > 1:
         # Related GH #17123
         # If index_names are integers, determine whether the integers refer
         # to the level position or name.
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 46a05123c9fdd..743fc50c87e96 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -896,6 +896,12 @@ def _check_output(
             totals = table.loc[("All", ""), value_col]
             assert totals == self.data[value_col].mean()
 
+        # no rows
+        rtable = self.data.pivot_table(
+            columns=["AA", "BB"], margins=True, aggfunc=np.mean
+        )
+        assert isinstance(rtable, Series)
+
         table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
         for item in ["DD", "EE", "FF"]:
             totals = table.loc[("All", ""), item]
@@ -966,20 +972,6 @@ def test_pivot_integer_columns(self):
 
         tm.assert_frame_equal(table, table2, check_names=False)
 
-    @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
-    def test_pivot_table_multiindex_only(self, cols):
-        # GH 17038
-        df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]})
-
-        result = df2.pivot_table(values="v", columns=cols)
-        expected = DataFrame(
-            [[4, 5, 6]],
-            columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
-            index=Index(["v"]),
-        )
-
-        tm.assert_frame_equal(result, expected)
-
     def test_pivot_no_level_overlap(self):
         # GH #1181
 

From e5e912be0f596943067a7df812442764d311a086 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 14 Jan 2020 21:30:16 +0100
Subject: [PATCH 04/39] revert change

---
 pandas/core/reshape/pivot.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index a7cdbb0da7a4e..b443ba142369c 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -117,7 +117,6 @@ def pivot_table(
                 agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
 
     table = agged
-
     if table.index.nlevels > 1:
         # Related GH #17123
         # If index_names are integers, determine whether the integers refer

From 2c451a94c1f0703a83b5d07379ddb7547b383f93 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 12:49:26 +0200
Subject: [PATCH 05/39] add dropna doc for factorize

---
 pandas/core/algorithms.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index befde7c355818..ee2bc529c4d9c 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -522,6 +522,12 @@ def _factorize_array(
         Hint to the hashtable sizer.
     """
     ),
+    dropna=dedent(
+        """\
+    dropna : bool, default True
+        Drop the NA from the uniques of the values.
+    """
+    )
 )
 def factorize(
     values,
@@ -543,7 +549,7 @@ def factorize(
     {values}{sort}
     na_sentinel : int, default -1
         Value to mark "not found".
-    {size_hint}\
+    {size_hint}{dropna}\
 
     Returns
     -------
@@ -620,6 +626,23 @@ def factorize(
     array([0, 0, 1]...)
     >>> uniques
     Index(['a', 'c'], dtype='object')
+
+    If NA is in the values, and we want to include NA in the uniques of the
+    values, it can be achieved by setting ``dropna=False``. The default is
+    to exclude NA from the uniques.
+
+    >>> values = np.array([1, 2, 1, np.nan]
+    >>> codes, uniques = pd.factorize(values, dropna=True)  # default
+    >>> codes
+    array([ 0,  1,  0, -1]
+    >>> uniques
+    array([1., 2.])
+
+    >>> codes, uniques = pd.factorize(values, dropna=False)
+    >>> codes
+    array([0, 1, 0, 2])
+    >>> uniques
+    array([ 1.,  2., nan])
     """
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)

From 31ffca7ab5432891b3d8c0c42c5377c65e5a738f Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 12:51:35 +0200
Subject: [PATCH 06/39] rephrase the doc

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index ee2bc529c4d9c..a15d9ed0bf666 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -629,7 +629,7 @@ def factorize(
 
     If NA is in the values, and we want to include NA in the uniques of the
     values, it can be achieved by setting ``dropna=False``. The default is
-    to exclude NA from the uniques.
+    ``dropna=True`` which excludes NA from the uniques.
 
     >>> values = np.array([1, 2, 1, np.nan]
     >>> codes, uniques = pd.factorize(values, dropna=True)  # default

From 32d029d6b85656207681eace85242f115781e830 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 12:53:33 +0200
Subject: [PATCH 07/39] flake8

---
 pandas/core/algorithms.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index a15d9ed0bf666..ebe79d936ff73 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -634,15 +634,15 @@ def factorize(
     >>> values = np.array([1, 2, 1, np.nan]
     >>> codes, uniques = pd.factorize(values, dropna=True)  # default
     >>> codes
-    array([ 0,  1,  0, -1]
+    array([0, 1, 0, -1]...)
     >>> uniques
     array([1., 2.])
 
     >>> codes, uniques = pd.factorize(values, dropna=False)
     >>> codes
-    array([0, 1, 0, 2])
+    array([0, 1, 0, 2]...)
     >>> uniques
-    array([ 1.,  2., nan])
+    array([1., 2., nan])
     """
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)

From ba93eb636495ff6091eea61184df393ea71b11e3 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 13:04:02 +0200
Subject: [PATCH 08/39] fixup

---
 pandas/core/algorithms.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index ebe79d936ff73..820669fc8fbbf 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -522,12 +522,6 @@ def _factorize_array(
         Hint to the hashtable sizer.
     """
     ),
-    dropna=dedent(
-        """\
-    dropna : bool, default True
-        Drop the NA from the uniques of the values.
-    """
-    )
 )
 def factorize(
     values,
@@ -549,7 +543,9 @@ def factorize(
     {values}{sort}
     na_sentinel : int, default -1
         Value to mark "not found".
-    {size_hint}{dropna}\
+    {size_hint}
+    dropna : bool, default True
+        Drop the NA from the uniques of the values.
 
     Returns
     -------

From 23309089ee8f7eb2727d99c2c87fb10cf32564df Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 13:06:17 +0200
Subject: [PATCH 09/39] use NaN

---
 pandas/core/algorithms.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 820669fc8fbbf..c3107c981ee6a 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -545,7 +545,7 @@ def factorize(
         Value to mark "not found".
     {size_hint}
     dropna : bool, default True
-        Drop the NA from the uniques of the values.
+        Drop the NaN from the uniques of the values.
 
     Returns
     -------
@@ -623,9 +623,9 @@ def factorize(
     >>> uniques
     Index(['a', 'c'], dtype='object')
 
-    If NA is in the values, and we want to include NA in the uniques of the
+    If NaN is in the values, and we want to include NaN in the uniques of the
     values, it can be achieved by setting ``dropna=False``. The default is
-    ``dropna=True`` which excludes NA from the uniques.
+    ``dropna=True`` which excludes NaN from the uniques.
 
     >>> values = np.array([1, 2, 1, np.nan]
     >>> codes, uniques = pd.factorize(values, dropna=True)  # default

From b9850a2f07fdd601b0ec6c9201d80a81c509caf3 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 13:09:58 +0200
Subject: [PATCH 10/39] add dropna in series.factorize

---
 pandas/core/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index b62ef668df5e1..11fb470233e60 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1398,8 +1398,8 @@ def memory_usage(self, deep=False):
             """
         ),
     )
-    def factorize(self, sort=False, na_sentinel=-1):
-        return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel)
+    def factorize(self, sort=False, na_sentinel=-1, dropna=True):
+        return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel, dropna=dropna)
 
     _shared_docs[
         "searchsorted"

From 3a18c65c518fa7245ce73e25291dccdb1326ebb9 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 13:10:21 +0200
Subject: [PATCH 11/39] black

---
 pandas/core/base.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 11fb470233e60..94de2836024c8 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1399,7 +1399,9 @@ def memory_usage(self, deep=False):
         ),
     )
     def factorize(self, sort=False, na_sentinel=-1, dropna=True):
-        return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel, dropna=dropna)
+        return algorithms.factorize(
+            self, sort=sort, na_sentinel=na_sentinel, dropna=dropna
+        )
 
     _shared_docs[
         "searchsorted"

From 9d7f1e697cdb05d62b09d620cd4c4996292438d2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 13:22:12 +0200
Subject: [PATCH 12/39] add test

---
 pandas/tests/base/test_factorize.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
index 415a8b7e4362f..1efe331345d23 100644
--- a/pandas/tests/base/test_factorize.py
+++ b/pandas/tests/base/test_factorize.py
@@ -26,3 +26,21 @@ def test_factorize(index_or_series_obj, sort):
 
     tm.assert_numpy_array_equal(result_codes, expected_codes)
     tm.assert_index_equal(result_uniques, expected_uniques)
+
+
+@pytest.mark.parametrize("dropna", [True, False])
+def test_factorize_dropna(dropna):
+    # 35667
+    values = np.array([1, 2, 1, np.nan])
+    ser = pd.Series(values)
+    codes, uniques = ser.factorize(dropna=dropna)
+
+    if dropna:
+        expected_codes = np.array([0, 1, 0, -1])
+        expected_uniques = pd.Index([1., 2.])
+    else:
+        expected_codes = np.array([0, 1, 0, 2])
+        expected_uniques = pd.Index([1.,  2., np.nan])
+
+    tm.assert_numpy_array_equal(codes, expected_codes)
+    tm.assert_index_equal(uniques, expected_uniques)

From 97fd2e67df65cf5b22476173645b66f699183e7f Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 13:46:21 +0200
Subject: [PATCH 13/39] linting

---
 pandas/core/algorithms.py           | 2 +-
 pandas/tests/base/test_factorize.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c3107c981ee6a..8be5a0069af9d 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -627,7 +627,7 @@ def factorize(
     values, it can be achieved by setting ``dropna=False``. The default is
     ``dropna=True`` which excludes NaN from the uniques.
 
-    >>> values = np.array([1, 2, 1, np.nan]
+    >>> values = np.array([1, 2, 1, np.nan])
     >>> codes, uniques = pd.factorize(values, dropna=True)  # default
     >>> codes
     array([0, 1, 0, -1]...)
diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
index 1efe331345d23..8f6065a8d2d37 100644
--- a/pandas/tests/base/test_factorize.py
+++ b/pandas/tests/base/test_factorize.py
@@ -40,7 +40,7 @@ def test_factorize_dropna(dropna):
         expected_uniques = pd.Index([1., 2.])
     else:
         expected_codes = np.array([0, 1, 0, 2])
-        expected_uniques = pd.Index([1.,  2., np.nan])
+        expected_uniques = pd.Index([1., 2., np.nan])
 
     tm.assert_numpy_array_equal(codes, expected_codes)
     tm.assert_index_equal(uniques, expected_uniques)

From 68527ef2d8809fab0796838759ebd02c75ab9ffc Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 13:49:23 +0200
Subject: [PATCH 14/39] linting

---
 pandas/core/algorithms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 8be5a0069af9d..56a4aee3ffa93 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -632,13 +632,13 @@ def factorize(
     >>> codes
     array([0, 1, 0, -1]...)
     >>> uniques
-    array([1., 2.])
+    Index([1., 2.], dtype='float64')
 
     >>> codes, uniques = pd.factorize(values, dropna=False)
     >>> codes
     array([0, 1, 0, 2]...)
     >>> uniques
-    array([1., 2., nan])
+    Index([1., 2., nan], dtype='float64')
     """
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)

From 817905c09249d6d06a77a90bd987a026fa9a4be2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 14:05:49 +0200
Subject: [PATCH 15/39] doct

---
 pandas/core/algorithms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 56a4aee3ffa93..3223286f8b88e 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -630,13 +630,13 @@ def factorize(
     >>> values = np.array([1, 2, 1, np.nan])
     >>> codes, uniques = pd.factorize(values, dropna=True)  # default
     >>> codes
-    array([0, 1, 0, -1]...)
+    array([ 0,  1,  0, -1])
     >>> uniques
     Index([1., 2.], dtype='float64')
 
     >>> codes, uniques = pd.factorize(values, dropna=False)
     >>> codes
-    array([0, 1, 0, 2]...)
+    array([ 0,  1,  0, 2])
     >>> uniques
     Index([1., 2., nan], dtype='float64')
     """

From 364aeae22f87b488695bd74f67c3ee4140fab810 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 14:07:03 +0200
Subject: [PATCH 16/39] fix black

---
 pandas/tests/base/test_factorize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
index 8f6065a8d2d37..92674ff25c9d5 100644
--- a/pandas/tests/base/test_factorize.py
+++ b/pandas/tests/base/test_factorize.py
@@ -37,10 +37,10 @@ def test_factorize_dropna(dropna):
 
     if dropna:
         expected_codes = np.array([0, 1, 0, -1])
-        expected_uniques = pd.Index([1., 2.])
+        expected_uniques = pd.Index([1.0, 2.0])
     else:
         expected_codes = np.array([0, 1, 0, 2])
-        expected_uniques = pd.Index([1., 2., np.nan])
+        expected_uniques = pd.Index([1.0, 2.0, np.nan])
 
     tm.assert_numpy_array_equal(codes, expected_codes)
     tm.assert_index_equal(uniques, expected_uniques)

From 7cd0cce648bd1a1a5ed0c98db0c4ae23628c5262 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 14:23:11 +0200
Subject: [PATCH 17/39] fixup

---
 pandas/core/algorithms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 3223286f8b88e..8fe8ee829e1d8 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -632,13 +632,13 @@ def factorize(
     >>> codes
     array([ 0,  1,  0, -1])
     >>> uniques
-    Index([1., 2.], dtype='float64')
+    array([1., 2.])
 
     >>> codes, uniques = pd.factorize(values, dropna=False)
     >>> codes
     array([ 0,  1,  0, 2])
     >>> uniques
-    Index([1., 2., nan], dtype='float64')
+    array([1., 2., nan])
     """
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)

From 23682237d67b54e31abf9793097b1ed9a2db001d Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 14:39:14 +0200
Subject: [PATCH 18/39] fix doctest

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 8fe8ee829e1d8..96fd4abd6264b 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -636,7 +636,7 @@ def factorize(
 
     >>> codes, uniques = pd.factorize(values, dropna=False)
     >>> codes
-    array([ 0,  1,  0, 2])
+    array([0, 1, 0, 2])
     >>> uniques
     array([1., 2., nan])
     """

From 8ca0652fac93a19972d8206b67e627810dbbea80 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 14:42:57 +0200
Subject: [PATCH 19/39] add whatsnew

---
 doc/source/whatsnew/v1.2.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 09a5bcb0917c2..9e09cfbeb9fec 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -54,6 +54,7 @@ For example:
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
+- :meth:`Series.factorize` accepts ``dropna`` to include NaN in the uniques of the values by setting ``dropna=False``, default is ``dropna=True`` which excludes NaN from the uniques (:issue:`35667`)
 -
 -
 

From b452513e7fed1941c8829a431f63118bb543195a Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 14:58:45 +0200
Subject: [PATCH 20/39] linting

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 96fd4abd6264b..a46b58df0cb91 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -638,7 +638,7 @@ def factorize(
     >>> codes
     array([0, 1, 0, 2])
     >>> uniques
-    array([1., 2., nan])
+    array([ 1.,  2., nan])
     """
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)

From 344c072a2cc9788672dcb2d5941c1c308556eb19 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 15:27:05 +0200
Subject: [PATCH 21/39] fix test

---
 pandas/tests/base/test_factorize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
index 92674ff25c9d5..c96e0c150fcc6 100644
--- a/pandas/tests/base/test_factorize.py
+++ b/pandas/tests/base/test_factorize.py
@@ -36,10 +36,10 @@ def test_factorize_dropna(dropna):
     codes, uniques = ser.factorize(dropna=dropna)
 
     if dropna:
-        expected_codes = np.array([0, 1, 0, -1])
+        expected_codes = np.array([0, 1, 0, -1], dtype="int64")
         expected_uniques = pd.Index([1.0, 2.0])
     else:
-        expected_codes = np.array([0, 1, 0, 2])
+        expected_codes = np.array([0, 1, 0, 2], dtype="int64")
         expected_uniques = pd.Index([1.0, 2.0, np.nan])
 
     tm.assert_numpy_array_equal(codes, expected_codes)

From 1a5c3586f8f5452e67fe410d38ca746ec89fc27b Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 22 Aug 2020 22:02:57 +0200
Subject: [PATCH 22/39] try one time

---
 pandas/tests/base/test_factorize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
index c96e0c150fcc6..98ad482e2233f 100644
--- a/pandas/tests/base/test_factorize.py
+++ b/pandas/tests/base/test_factorize.py
@@ -30,7 +30,7 @@ def test_factorize(index_or_series_obj, sort):
 
 @pytest.mark.parametrize("dropna", [True, False])
 def test_factorize_dropna(dropna):
-    # 35667
+    # GH35667
     values = np.array([1, 2, 1, np.nan])
     ser = pd.Series(values)
     codes, uniques = ser.factorize(dropna=dropna)

From 460795312b40f1e336a08f517972ca14dbd26e87 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 27 Aug 2020 19:11:14 +0200
Subject: [PATCH 23/39] hide dropna and use na_sentinel=None

---
 pandas/core/algorithms.py           | 10 +++++++---
 pandas/core/base.py                 | 18 +++++++++++++++++-
 pandas/tests/base/test_factorize.py | 15 +++++----------
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index a46b58df0cb91..036a0d54f26f5 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -522,6 +522,12 @@ def _factorize_array(
         Hint to the hashtable sizer.
     """
     ),
+    dropna=dedent(
+        """\
+    dropna : bool, default True
+        Drop the NaN from the uniques of the values.
+    """
+    ),
 )
 def factorize(
     values,
@@ -543,9 +549,7 @@ def factorize(
     {values}{sort}
     na_sentinel : int, default -1
         Value to mark "not found".
-    {size_hint}
-    dropna : bool, default True
-        Drop the NaN from the uniques of the values.
+    {size_hint}{dropna}
 
     Returns
     -------
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 94de2836024c8..557c4d6045bfd 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1390,6 +1390,7 @@ def memory_usage(self, deep=False):
         values="",
         order="",
         size_hint="",
+        dropna="",
         sort=textwrap.dedent(
             """\
             sort : bool, default False
@@ -1397,8 +1398,23 @@ def memory_usage(self, deep=False):
                 relationship.
             """
         ),
+        na_sentinel=textwrap.dedent(
+            """\
+            na_sentinel : int or None, default -1
+                Value to mark "not found". If None, will drop the NaN 
+                from the uniques of the values.
+            """
+        ),
     )
-    def factorize(self, sort=False, na_sentinel=-1, dropna=True):
+    def factorize(self, sort: bool = False, na_sentinel: Optional[int] = -1):
+
+        # GH35667, na_sentinel=-1 and dropna to keep backward compatibility of
+        # algorithm.factorize so as not to break.
+        if na_sentinel is None:
+            na_sentinel = -1
+            dropna = False
+        else:
+            dropna = True
         return algorithms.factorize(
             self, sort=sort, na_sentinel=na_sentinel, dropna=dropna
         )
diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
index 98ad482e2233f..1532939256cbd 100644
--- a/pandas/tests/base/test_factorize.py
+++ b/pandas/tests/base/test_factorize.py
@@ -28,19 +28,14 @@ def test_factorize(index_or_series_obj, sort):
     tm.assert_index_equal(result_uniques, expected_uniques)
 
 
-@pytest.mark.parametrize("dropna", [True, False])
-def test_factorize_dropna(dropna):
+def test_factorize_dropna():
     # GH35667
     values = np.array([1, 2, 1, np.nan])
     ser = pd.Series(values)
-    codes, uniques = ser.factorize(dropna=dropna)
-
-    if dropna:
-        expected_codes = np.array([0, 1, 0, -1], dtype="int64")
-        expected_uniques = pd.Index([1.0, 2.0])
-    else:
-        expected_codes = np.array([0, 1, 0, 2], dtype="int64")
-        expected_uniques = pd.Index([1.0, 2.0, np.nan])
+    codes, uniques = ser.factorize(na_sentinel=None)
+
+    expected_codes = np.array([0, 1, 0, 2], dtype="int64")
+    expected_uniques = pd.Index([1.0, 2.0, np.nan])
 
     tm.assert_numpy_array_equal(codes, expected_codes)
     tm.assert_index_equal(uniques, expected_uniques)

From 3ef1459f5236c2e32f1b85cce0a7d909b0ccc0f9 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 27 Aug 2020 19:13:29 +0200
Subject: [PATCH 24/39] update whatsnew

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 694016632824e..1063cd6af07b0 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -54,7 +54,7 @@ For example:
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
-- :meth:`Series.factorize` accepts ``dropna`` to include NaN in the uniques of the values by setting ``dropna=False``, default is ``dropna=True`` which excludes NaN from the uniques (:issue:`35667`)
+- :meth:`Series.factorize` now ``na_sentinel=None`` to include NaN in the uniques of the values (:issue:`35667`)
 -
 -
 

From fca73009d87060327e7871b7de08d9110feb870c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 27 Aug 2020 19:19:18 +0200
Subject: [PATCH 25/39] rename test function

---
 pandas/tests/base/test_factorize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
index 1532939256cbd..9fad9856d53cc 100644
--- a/pandas/tests/base/test_factorize.py
+++ b/pandas/tests/base/test_factorize.py
@@ -28,7 +28,7 @@ def test_factorize(index_or_series_obj, sort):
     tm.assert_index_equal(result_uniques, expected_uniques)
 
 
-def test_factorize_dropna():
+def test_series_factorize_na_sentinel_none():
     # GH35667
     values = np.array([1, 2, 1, np.nan])
     ser = pd.Series(values)

From f0a6556d4d1218e5d10b61b246c71eaffbbb8309 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 27 Aug 2020 20:28:17 +0200
Subject: [PATCH 26/39] remove dropna from factorize

---
 pandas/core/algorithms.py      | 31 ++++++++++--------------
 pandas/core/base.py            | 20 +---------------
 pandas/core/groupby/grouper.py |  7 +++++-
 pandas/tests/test_algos.py     | 44 +++++++---------------------------
 4 files changed, 29 insertions(+), 73 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 036a0d54f26f5..04895896d5b26 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -522,19 +522,12 @@ def _factorize_array(
         Hint to the hashtable sizer.
     """
     ),
-    dropna=dedent(
-        """\
-    dropna : bool, default True
-        Drop the NaN from the uniques of the values.
-    """
-    ),
 )
 def factorize(
     values,
     sort: bool = False,
-    na_sentinel: int = -1,
+    na_sentinel: Optional[int] = -1,
     size_hint: Optional[int] = None,
-    dropna: bool = True,
 ) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]:
     """
     Encode the object as an enumerated type or categorical variable.
@@ -547,9 +540,10 @@ def factorize(
     Parameters
     ----------
     {values}{sort}
-    na_sentinel : int, default -1
-        Value to mark "not found".
-    {size_hint}{dropna}
+    na_sentinel : int or None, default -1
+        Value to mark "not found". If None, will not drop the NaN
+        from the uniques of the values.
+    {size_hint}
 
     Returns
     -------
@@ -632,13 +626,7 @@ def factorize(
     ``dropna=True`` which excludes NaN from the uniques.
 
     >>> values = np.array([1, 2, 1, np.nan])
-    >>> codes, uniques = pd.factorize(values, dropna=True)  # default
-    >>> codes
-    array([ 0,  1,  0, -1])
-    >>> uniques
-    array([1., 2.])
-
-    >>> codes, uniques = pd.factorize(values, dropna=False)
+    >>> codes, uniques = pd.factorize(values, na_sentinel=None)
     >>> codes
     array([0, 1, 0, 2])
     >>> uniques
@@ -656,6 +644,13 @@ def factorize(
     values = _ensure_arraylike(values)
     original = values
 
+    # if na_sentinel=None, we will not dropna NaNs from the uniques of values,
+    # still assign na_sentinel=-1 to keep backward compatability.
+    dropna = True
+    if na_sentinel is None:
+        na_sentinel = -1
+        dropna = False
+
     if is_extension_array_dtype(values.dtype):
         values = extract_array(values)
         codes, uniques = values.factorize(na_sentinel=na_sentinel)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 557c4d6045bfd..1926803d8f04b 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1390,7 +1390,6 @@ def memory_usage(self, deep=False):
         values="",
         order="",
         size_hint="",
-        dropna="",
         sort=textwrap.dedent(
             """\
             sort : bool, default False
@@ -1398,26 +1397,9 @@ def memory_usage(self, deep=False):
                 relationship.
             """
         ),
-        na_sentinel=textwrap.dedent(
-            """\
-            na_sentinel : int or None, default -1
-                Value to mark "not found". If None, will drop the NaN 
-                from the uniques of the values.
-            """
-        ),
     )
     def factorize(self, sort: bool = False, na_sentinel: Optional[int] = -1):
-
-        # GH35667, na_sentinel=-1 and dropna to keep backward compatibility of
-        # algorithm.factorize so as not to break.
-        if na_sentinel is None:
-            na_sentinel = -1
-            dropna = False
-        else:
-            dropna = True
-        return algorithms.factorize(
-            self, sort=sort, na_sentinel=na_sentinel, dropna=dropna
-        )
+        return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel)
 
     _shared_docs[
         "searchsorted"
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 8239a792c65dd..272afe7335c6a 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -585,8 +585,13 @@ def _make_codes(self) -> None:
                 codes = self.grouper.codes_info
                 uniques = self.grouper.result_index
             else:
+                # GH35667, replace dropna=False with na_sentinel=None
+                if not self.dropna:
+                    na_sentinel = None
+                else:
+                    na_sentinel = -1
                 codes, uniques = algorithms.factorize(
-                    self.grouper, sort=self.sort, dropna=self.dropna
+                    self.grouper, sort=self.sort, na_sentinel=na_sentinel
                 )
                 uniques = Index(uniques, name=self.name)
             self._codes = codes
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 67a2dc2303550..b4e97f1e341e4 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -340,73 +340,47 @@ def test_factorize_na_sentinel(self, sort, na_sentinel, data, uniques):
             tm.assert_extension_array_equal(uniques, expected_uniques)
 
     @pytest.mark.parametrize(
-        "data, dropna, expected_codes, expected_uniques",
+        "data, expected_codes, expected_uniques",
         [
             (
                 ["a", None, "b", "a"],
-                True,
-                np.array([0, -1, 1, 0], dtype=np.dtype("intp")),
-                np.array(["a", "b"], dtype=object),
-            ),
-            (
-                ["a", np.nan, "b", "a"],
-                True,
-                np.array([0, -1, 1, 0], dtype=np.dtype("intp")),
-                np.array(["a", "b"], dtype=object),
-            ),
-            (
-                ["a", None, "b", "a"],
-                False,
                 np.array([0, 2, 1, 0], dtype=np.dtype("intp")),
                 np.array(["a", "b", np.nan], dtype=object),
             ),
             (
                 ["a", np.nan, "b", "a"],
-                False,
                 np.array([0, 2, 1, 0], dtype=np.dtype("intp")),
                 np.array(["a", "b", np.nan], dtype=object),
             ),
         ],
     )
-    def test_object_factorize_dropna(
-        self, data, dropna, expected_codes, expected_uniques
+    def test_object_factorize_na_sentinel_none(
+        self, data, expected_codes, expected_uniques
     ):
-        codes, uniques = algos.factorize(data, dropna=dropna)
+        codes, uniques = algos.factorize(data, na_sentinel=None)
 
         tm.assert_numpy_array_equal(uniques, expected_uniques)
         tm.assert_numpy_array_equal(codes, expected_codes)
 
     @pytest.mark.parametrize(
-        "data, dropna, expected_codes, expected_uniques",
+        "data, expected_codes, expected_uniques",
         [
             (
                 [1, None, 1, 2],
-                True,
-                np.array([0, -1, 0, 1], dtype=np.dtype("intp")),
-                np.array([1, 2], dtype="O"),
-            ),
-            (
-                [1, np.nan, 1, 2],
-                True,
-                np.array([0, -1, 0, 1], dtype=np.dtype("intp")),
-                np.array([1, 2], dtype=np.float64),
-            ),
-            (
-                [1, None, 1, 2],
-                False,
                 np.array([0, 2, 0, 1], dtype=np.dtype("intp")),
                 np.array([1, 2, np.nan], dtype="O"),
             ),
             (
                 [1, np.nan, 1, 2],
-                False,
                 np.array([0, 2, 0, 1], dtype=np.dtype("intp")),
                 np.array([1, 2, np.nan], dtype=np.float64),
             ),
         ],
     )
-    def test_int_factorize_dropna(self, data, dropna, expected_codes, expected_uniques):
-        codes, uniques = algos.factorize(data, dropna=dropna)
+    def test_int_factorize_na_sentinel_none(
+        self, data, expected_codes, expected_uniques
+    ):
+        codes, uniques = algos.factorize(data, na_sentinel=None)
 
         tm.assert_numpy_array_equal(uniques, expected_uniques)
         tm.assert_numpy_array_equal(codes, expected_codes)

From c81e79e1ad30d1f11a0742df53028fb7dc09d8bd Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 27 Aug 2020 20:30:24 +0200
Subject: [PATCH 27/39] update doc

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 1063cd6af07b0..493bdc427aa4e 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -54,7 +54,7 @@ For example:
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
-- :meth:`Series.factorize` now ``na_sentinel=None`` to include NaN in the uniques of the values (:issue:`35667`)
+- :meth:`Series.factorize` now ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword from :meth:`factorize` (:issue:`35667`)
 -
 -
 

From 37ca0342ea59d87b38999dc5dae7beb60b136ffe Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 27 Aug 2020 20:31:02 +0200
Subject: [PATCH 28/39] docstring

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 04895896d5b26..650f1cacf39cd 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -543,7 +543,7 @@ def factorize(
     na_sentinel : int or None, default -1
         Value to mark "not found". If None, will not drop the NaN
         from the uniques of the values.
-    {size_hint}
+    {size_hint}\
 
     Returns
     -------

From 4f0f22610a37d187d5335208f16cc42fa44906a2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 27 Aug 2020 20:31:47 +0200
Subject: [PATCH 29/39] update doc

---
 pandas/core/algorithms.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 650f1cacf39cd..3f0cd9bca309f 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -622,8 +622,7 @@ def factorize(
     Index(['a', 'c'], dtype='object')
 
     If NaN is in the values, and we want to include NaN in the uniques of the
-    values, it can be achieved by setting ``dropna=False``. The default is
-    ``dropna=True`` which excludes NaN from the uniques.
+    values, it can be achieved by setting ``na_sentinel=None``.
 
     >>> values = np.array([1, 2, 1, np.nan])
     >>> codes, uniques = pd.factorize(values, na_sentinel=None)

From 5fcabe7adcdcb20196f9bd07f52f7fa2d1273c86 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 27 Aug 2020 20:34:02 +0200
Subject: [PATCH 30/39] add comment

---
 pandas/core/algorithms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 3f0cd9bca309f..8f86449134177 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -643,8 +643,8 @@ def factorize(
     values = _ensure_arraylike(values)
     original = values
 
-    # if na_sentinel=None, we will not dropna NaNs from the uniques of values,
-    # still assign na_sentinel=-1 to keep backward compatability.
+    # GH35667, if na_sentinel=None, we will not dropna NaNs from the uniques
+    # of values, still assign na_sentinel=-1 to keep backward compatability.
     dropna = True
     if na_sentinel is None:
         na_sentinel = -1

From b7cd91539c03fdb2323eb4c232a3432721180bbd Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 28 Aug 2020 09:13:10 +0200
Subject: [PATCH 31/39] code change on review

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 pandas/core/algorithms.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 493bdc427aa4e..d8a4b776540aa 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -54,7 +54,7 @@ For example:
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
-- :meth:`Series.factorize` now ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword from :meth:`factorize` (:issue:`35667`)
+- :meth:`Series.factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword from :meth:`factorize` (:issue:`35667`)
 -
 -
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 8f86449134177..1deffd0a2d03f 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -644,7 +644,7 @@ def factorize(
     original = values
 
     # GH35667, if na_sentinel=None, we will not dropna NaNs from the uniques
-    # of values, still assign na_sentinel=-1 to keep backward compatability.
+    # of values, assign na_sentinel=-1 to replace code value for NaN.
     dropna = True
     if na_sentinel is None:
         na_sentinel = -1

From 8a2a1f76c6103aa5e0c010dc32197ecb7e28f755 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 28 Aug 2020 09:16:26 +0200
Subject: [PATCH 32/39] update doc

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index d8a4b776540aa..31ed533af2f56 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -54,7 +54,7 @@ For example:
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
-- :meth:`Series.factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword from :meth:`factorize` (:issue:`35667`)
+- :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword from :meth:`factorize` (:issue:`35667`)
 -
 -
 

From e0c7342543e65e0c15e49c2b9525e3b8b967ab1d Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 28 Aug 2020 10:11:24 +0200
Subject: [PATCH 33/39] code change on review

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 pandas/core/algorithms.py      | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 31ed533af2f56..ad7b84e3dc478 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -54,7 +54,7 @@ For example:
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
-- :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword from :meth:`factorize` (:issue:`35667`)
+- :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize`(:issue:`35667`)
 -
 -
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 1deffd0a2d03f..59c64bf4ea9b2 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -543,6 +543,8 @@ def factorize(
     na_sentinel : int or None, default -1
         Value to mark "not found". If None, will not drop the NaN
         from the uniques of the values.
+
+        .. versionchanged:: 1.2.0
     {size_hint}\
 
     Returns

From 0480d9fb4441bba493d7a411b7745687c31ac4e5 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 28 Aug 2020 10:38:48 +0200
Subject: [PATCH 34/39] minor move in whatsnew

---
 doc/source/whatsnew/v1.1.2.rst | 8 ++++++++
 doc/source/whatsnew/v1.2.0.rst | 1 -
 pandas/core/algorithms.py      | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 9747a8ef3e71f..d0e4b934ba746 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -33,6 +33,14 @@ Bug fixes
 
 .. ---------------------------------------------------------------------------
 
+.. _whatsnew_112.other:
+
+Other
+~~~~~
+- :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize`(:issue:`35667`)
+
+.. ---------------------------------------------------------------------------
+
 .. _whatsnew_112.contributors:
 
 Contributors
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index ad7b84e3dc478..55570341cf4e8 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -54,7 +54,6 @@ For example:
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
-- :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize`(:issue:`35667`)
 -
 -
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 59c64bf4ea9b2..b28868348a875 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -544,7 +544,7 @@ def factorize(
         Value to mark "not found". If None, will not drop the NaN
         from the uniques of the values.
 
-        .. versionchanged:: 1.2.0
+        .. versionchanged:: 1.1.2
     {size_hint}\
 
     Returns

From 5c87cd185d99610ffffb0d03b7fb3f6c6874e248 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 1 Sep 2020 19:53:12 +0200
Subject: [PATCH 35/39] add default example

---
 pandas/core/algorithms.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index b28868348a875..96f45b288ef38 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -627,6 +627,11 @@ def factorize(
     values, it can be achieved by setting ``na_sentinel=None``.
 
     >>> values = np.array([1, 2, 1, np.nan])
+    >>> codes, uniques = pd.factorize(values)  # default
+    >>> codes
+    array([0, 1, 0, -1])
+    >>> uniques
+    array([1., 2.])
     >>> codes, uniques = pd.factorize(values, na_sentinel=None)
     >>> codes
     array([0, 1, 0, 2])

From 076fc10357948fae79d963822404a7ddd1062061 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 1 Sep 2020 20:26:45 +0200
Subject: [PATCH 36/39] doc

---
 pandas/core/algorithms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 70aa5a2cb544a..1fcb494516da9 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -629,9 +629,9 @@ def factorize(
     >>> values = np.array([1, 2, 1, np.nan])
     >>> codes, uniques = pd.factorize(values)  # default
     >>> codes
-    array([0, 1, 0, -1])
+    array([ 0,  1,  0, -1])
     >>> uniques
-    array([1., 2.])
+    array([ 1.,  2.])
     >>> codes, uniques = pd.factorize(values, na_sentinel=None)
     >>> codes
     array([0, 1, 0, 2])

From c9454579c61561c8f3c7aeb036b9e31b7ad2b2d9 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 1 Sep 2020 21:16:03 +0200
Subject: [PATCH 37/39] one more try

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 1fcb494516da9..c49f0dcff9747 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -631,7 +631,7 @@ def factorize(
     >>> codes
     array([ 0,  1,  0, -1])
     >>> uniques
-    array([ 1.,  2.])
+    array([1., 2.])
     >>> codes, uniques = pd.factorize(values, na_sentinel=None)
     >>> codes
     array([0, 1, 0, 2])

From e6c7434e52a97dc1209d610691a7701375d45ad8 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 1 Sep 2020 21:31:12 +0200
Subject: [PATCH 38/39] explicit doc

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c49f0dcff9747..c0f22777a53ff 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -627,7 +627,7 @@ def factorize(
     values, it can be achieved by setting ``na_sentinel=None``.
 
     >>> values = np.array([1, 2, 1, np.nan])
-    >>> codes, uniques = pd.factorize(values)  # default
+    >>> codes, uniques = pd.factorize(values)  # default: na_sentinel=-1
     >>> codes
     array([ 0,  1,  0, -1])
     >>> uniques

From bf8641aec20d62705e55ec6f658d257be477e64b Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 2 Sep 2020 16:11:54 +0200
Subject: [PATCH 39/39] add space

---
 pandas/core/algorithms.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c0f22777a53ff..d2af6c132eca2 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -632,6 +632,7 @@ def factorize(
     array([ 0,  1,  0, -1])
     >>> uniques
     array([1., 2.])
+
     >>> codes, uniques = pd.factorize(values, na_sentinel=None)
     >>> codes
     array([0, 1, 0, 2])