databricks · HyukjinKwon · Sep 4, 2019 · Sep 4, 2019 · Sep 4, 2019 · HyukjinKwon
diff --git a/databricks/koalas/config.py b/databricks/koalas/config.py
@@ -106,21 +106,23 @@ def validate(self, v: Any) -> None:
 
 # Available options.
 _options = [
-    # TODO: None should support unlimited.
     Option(
         key='display.max_rows',
         doc=(
             "This sets the maximum number of rows koalas should output when printing out "
-            "various output. For example, this value determines whether the repr() for a "
-            "dataframe prints out fully or just a truncated repr."),
+            "various output. For example, this value determines the number of rows to be "
+            "shown at the repr() in a dataframe. Set `None` to unlimit the input length. "
+            "Default is 1000."),
         default=1000,
-        types=int,
-        check_func=(lambda v: v >= 0, "'display.max_rows' should be greater than or equal to 0.")),
+        types=(int, type(None)),
+        check_func=(
+            lambda v: v is None or v >= 0,
+            "'display.max_rows' should be greater than or equal to 0.")),
 
     Option(
         key='compute.max_rows',
         doc=(
-            "'compute.max_rows sets the limit of the current DataFrame. Set `None` to unlimit "
+            "'compute.max_rows' sets the limit of the current DataFrame. Set `None` to unlimit "
             "the input length. When the limit is set, it is executed by the shortcut by "
             "collecting the data into driver side, and then using pandas API. If the limit is "
             "unset, the operation is executed by PySpark. Default is 1000."),
@@ -130,7 +132,6 @@ def validate(self, v: Any) -> None:
             lambda v: v is None or v >= 0,
             "'compute.max_rows' should be greater than or equal to 0.")),
 
-    # TODO: None should support unlimited.
     Option(
         key='compute.shortcut_limit',
         doc=(

diff --git a/databricks/koalas/frame.py b/databricks/koalas/frame.py
@@ -6601,6 +6601,9 @@ def _to_internal_pandas(self):
 
     def __repr__(self):
         max_display_count = get_option("display.max_rows")
+        if max_display_count is None:
+            return repr(self._to_internal_pandas())
+
         pdf = self.head(max_display_count + 1)._to_internal_pandas()
         pdf_length = len(pdf)
         repr_string = repr(pdf.iloc[:max_display_count])
@@ -6616,6 +6619,9 @@ def __repr__(self):
 
     def _repr_html_(self):
         max_display_count = get_option("display.max_rows")
+        if max_display_count is None:
+            return self._to_internal_pandas()._repr_html_()
+
         pdf = self.head(max_display_count + 1)._to_internal_pandas()
         pdf_length = len(pdf)
         repr_html = pdf[:max_display_count]._repr_html_()

diff --git a/databricks/koalas/indexes.py b/databricks/koalas/indexes.py
@@ -240,14 +240,24 @@ def __getattr__(self, item: str) -> Any:
 
     def __repr__(self):
         max_display_count = get_option("display.max_rows")
-        sdf = self._kdf._sdf.select(self._scol).limit(max_display_count + 1)
+        sdf = self._kdf._sdf.select(self._scol)
+
+        if max_display_count is None:
+            return repr(DataFrame(self._kdf._internal.copy(
+                sdf=sdf,
+                index_map=[(sdf.schema[0].name, self._kdf._internal.index_names[0])],
+                data_columns=[], column_index=[], column_index_names=None)).index.to_pandas())
+
+        sdf = sdf.limit(max_display_count + 1)
         internal = self._kdf._internal.copy(
             sdf=sdf,
             index_map=[(sdf.schema[0].name, self._kdf._internal.index_names[0])],
             data_columns=[], column_index=[], column_index_names=None)
         pindex = DataFrame(internal).index.to_pandas()
+
         pindex_length = len(pindex)
         repr_string = repr(pindex[:max_display_count])
+
         if pindex_length > max_display_count:
             footer = '\nShowing only the first {}'.format(max_display_count)
             return repr_string + footer

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
@@ -3027,6 +3027,9 @@ def _to_internal_pandas(self):
 
     def __repr__(self):
         max_display_count = get_option("display.max_rows")
+        if max_display_count is None:
+            return repr(self._to_internal_pandas())
+
         pser = self.head(max_display_count + 1)._to_internal_pandas()
         pser_length = len(pser)
         repr_string = repr(pser.iloc[:max_display_count])

diff --git a/databricks/koalas/tests/test_frame_plot.py b/databricks/koalas/tests/test_frame_plot.py
@@ -17,6 +17,7 @@
 
 
 class DataFramePlotTest(ReusedSQLTestCase, TestUtils):
+    sample_ratio_default = None
 
     @classmethod
     def setUpClass(cls):
@@ -233,7 +234,7 @@ def test_sampled_plot_with_ratio(self):
             data = SampledPlot().get_sampled(kdf)
             self.assertEqual(round(len(data) / 2500, 1), 0.5)
         finally:
-            reset_option('plotting.sample_ratio')
+            set_option('plotting.sample_ratio', DataFramePlotTest.sample_ratio_default)
 
     def test_sampled_plot_with_max_rows(self):
         # 'plotting.max_rows' is 2000

diff --git a/databricks/koalas/tests/test_repr.py b/databricks/koalas/tests/test_repr.py
@@ -37,6 +37,13 @@ def test_repr_dataframe(self):
         kdf = ks.range(ReprTests.max_display_count + 1)
         self.assertTrue("Showing only the first" in repr(kdf))
 
+        set_option("display.max_rows", None)
+        try:
+            kdf = ks.range(ReprTests.max_display_count + 1)
+            self.assert_eq(repr(kdf), repr(kdf.to_pandas()))
+        finally:
+            set_option("display.max_rows", ReprTests.max_display_count)
+
     def test_repr_series(self):
         kser = ks.range(ReprTests.max_display_count).id
         self.assertTrue("Showing only the first" not in repr(kser))
@@ -45,6 +52,13 @@ def test_repr_series(self):
         kser = ks.range(ReprTests.max_display_count + 1).id
         self.assertTrue("Showing only the first" in repr(kser))
 
+        set_option("display.max_rows", None)
+        try:
+            kser = ks.range(ReprTests.max_display_count + 1).id
+            self.assert_eq(repr(kser), repr(kser.to_pandas()))
+        finally:
+            set_option("display.max_rows", ReprTests.max_display_count)
+
     def test_repr_indexes(self):
         kdf = ks.range(ReprTests.max_display_count)
         kidx = kdf.index
@@ -55,10 +69,25 @@ def test_repr_indexes(self):
         kidx = kdf.index
         self.assertTrue("Showing only the first" in repr(kidx))
 
+        set_option("display.max_rows", None)
+        try:
+            kdf = ks.range(ReprTests.max_display_count + 1)
+            kidx = kdf.index
+            self.assert_eq(repr(kidx), repr(kidx.to_pandas()))
+        finally:
+            set_option("display.max_rows", ReprTests.max_display_count)
+
     def test_html_repr(self):
         kdf = ks.range(ReprTests.max_display_count)
         self.assertTrue("Showing only the first" not in kdf._repr_html_())
         self.assertEqual(kdf._repr_html_(), kdf.to_pandas()._repr_html_())
 
         kdf = ks.range(ReprTests.max_display_count + 1)
         self.assertTrue("Showing only the first" in kdf._repr_html_())
+
+        set_option("display.max_rows", None)
+        try:
+            kdf = ks.range(ReprTests.max_display_count + 1)
+            self.assertEqual(kdf._repr_html_(), kdf.to_pandas()._repr_html_())
+        finally:
+            set_option("display.max_rows", ReprTests.max_display_count)