BUG: extra leading space in to_string when index=False (pandas-dev#36094

)
simonjayhawkins · Sep 6, 2020 · aca77f7 · aca77f7
1 parent ba552ec
commit aca77f7
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 26 deletions.
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -214,8 +214,6 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
--
 
 Categorical
 ^^^^^^^^^^^
@@ -257,7 +255,7 @@ Conversion
 
 Strings
 ^^^^^^^
-
+- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`)
 -
 -
 
@@ -315,6 +313,7 @@ Groupby/resample/rolling
 - Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
 - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
+- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
 -
 
 Reshaping

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -345,6 +345,7 @@ def _get_formatted_values(self) -> List[str]:
             None,
             float_format=self.float_format,
             na_rep=self.na_rep,
+            leading_space=self.index,
         )
 
     def to_string(self) -> str:
@@ -960,6 +961,7 @@ def _format_col(self, i: int) -> List[str]:
             na_rep=self.na_rep,
             space=self.col_space.get(frame.columns[i]),
             decimal=self.decimal,
+            leading_space=self.index,
         )
 
     def to_html(
@@ -1111,7 +1113,7 @@ def format_array(
     space: Optional[Union[str, int]] = None,
     justify: str = "right",
     decimal: str = ".",
-    leading_space: Optional[bool] = None,
+    leading_space: Optional[bool] = True,
     quoting: Optional[int] = None,
 ) -> List[str]:
     """
@@ -1127,7 +1129,7 @@ def format_array(
     space
     justify
     decimal
-    leading_space : bool, optional
+    leading_space : bool, optional, default True
         Whether the array should be formatted with a leading space.
         When an array as a column of a Series or DataFrame, we do want
         the leading space to pad between columns.
@@ -1194,7 +1196,7 @@ def __init__(
         decimal: str = ".",
         quoting: Optional[int] = None,
         fixed_width: bool = True,
-        leading_space: Optional[bool] = None,
+        leading_space: Optional[bool] = True,
     ):
         self.values = values
         self.digits = digits
@@ -1395,9 +1397,11 @@ def format_values_with(float_format):
         float_format: Optional[FloatFormatType]
         if self.float_format is None:
             if self.fixed_width:
-                float_format = partial(
-                    "{value: .{digits:d}f}".format, digits=self.digits
-                )
+                if self.leading_space is True:
+                    fmt_str = "{value: .{digits:d}f}"
+                else:
+                    fmt_str = "{value:.{digits:d}f}"
+                float_format = partial(fmt_str.format, digits=self.digits)
             else:
                 float_format = self.float_format
         else:
@@ -1429,7 +1433,11 @@ def format_values_with(float_format):
             ).any()
 
         if has_small_values or (too_long and has_large_values):
-            float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
+            if self.leading_space is True:
+                fmt_str = "{value: .{digits:d}e}"
+            else:
+                fmt_str = "{value:.{digits:d}e}"
+            float_format = partial(fmt_str.format, digits=self.digits)
             formatted_values = format_values_with(float_format)
 
         return formatted_values
@@ -1444,7 +1452,11 @@ def _format_strings(self) -> List[str]:
 
 class IntArrayFormatter(GenericArrayFormatter):
     def _format_strings(self) -> List[str]:
-        formatter = self.formatter or (lambda x: f"{x: d}")
+        if self.leading_space is False:
+            formatter_str = lambda x: f"{x:d}".format(x=x)
+        else:
+            formatter_str = lambda x: f"{x: d}".format(x=x)
+        formatter = self.formatter or formatter_str
         fmt_values = [formatter(x) for x in self.values]
         return fmt_values
 

diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -1546,11 +1546,11 @@ def test_to_string_no_index(self):
 
         df_s = df.to_string(index=False)
         # Leading space is expected for positive numbers.
-        expected = "  x   y    z\n 11  33  AAA\n 22 -44     "
+        expected = " x   y   z\n11  33 AAA\n22 -44    "
         assert df_s == expected
 
         df_s = df[["y", "x", "z"]].to_string(index=False)
-        expected = "  y   x    z\n 33  11  AAA\n-44  22     "
+        expected = "  y  x   z\n 33 11 AAA\n-44 22    "
         assert df_s == expected
 
     def test_to_string_line_width_no_index(self):
@@ -1565,7 +1565,7 @@ def test_to_string_line_width_no_index(self):
         df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]})
 
         df_s = df.to_string(line_width=1, index=False)
-        expected = "  x  \\\n 11   \n 22   \n 33   \n\n y  \n 4  \n 5  \n 6  "
+        expected = " x  \\\n11   \n22   \n33   \n\n y  \n 4  \n 5  \n 6  "
 
         assert df_s == expected
 
@@ -2269,7 +2269,7 @@ def test_to_string_without_index(self):
         # GH 11729 Test index=False option
         s = Series([1, 2, 3, 4])
         result = s.to_string(index=False)
-        expected = " 1\n" + " 2\n" + " 3\n" + " 4"
+        expected = "1\n" + "2\n" + "3\n" + "4"
         assert result == expected
 
     def test_unicode_name_in_footer(self):
@@ -3391,3 +3391,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
     msg = "buf is not a file name and it has no write method"
     with pytest.raises(TypeError, match=msg):
         getattr(float_frame, method)(buf=object())
+
+
+@pytest.mark.parametrize(
+    "input_array, expected",
+    [
+        ("a", "a"),
+        (["a", "b"], "a\nb"),
+        ([1, "a"], "1\na"),
+        (1, "1"),
+        ([0, -1], " 0\n-1"),
+        (1.0, "1.0"),
+        ([" a", " b"], " a\n b"),
+        ([".1", "1"], ".1\n 1"),
+        (["10", "-10"], " 10\n-10"),
+    ],
+)
+def test_format_remove_leading_space_series(input_array, expected):
+    # GH: 24980
+    s = pd.Series(input_array).to_string(index=False)
+    assert s == expected
+
+
+@pytest.mark.parametrize(
+    "input_array, expected",
+    [
+        ({"A": ["a"]}, "A\na"),
+        ({"A": ["a", "b"], "B": ["c", "dd"]}, "A  B\na  c\nb dd"),
+        ({"A": ["a", 1], "B": ["aa", 1]}, "A  B\na aa\n1  1"),
+    ],
+)
+def test_format_remove_leading_space_dataframe(input_array, expected):
+    # GH: 24980
+    df = pd.DataFrame(input_array).to_string(index=False)
+    assert df == expected
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
@@ -50,10 +50,10 @@ def test_to_latex(self, float_frame):
         withoutindex_result = df.to_latex(index=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
- a &   b \\
+ a &  b \\
 \midrule
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \bottomrule
 \end{tabular}
 """
@@ -413,7 +413,7 @@ def test_to_latex_longtable(self):
         withoutindex_result = df.to_latex(index=False, longtable=True)
         withoutindex_expected = r"""\begin{longtable}{rl}
 \toprule
- a &   b \\
+ a &  b \\
 \midrule
 \endhead
 \midrule
@@ -423,8 +423,8 @@ def test_to_latex_longtable(self):
 
 \bottomrule
 \endlastfoot
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \end{longtable}
 """
 
@@ -663,8 +663,8 @@ def test_to_latex_no_header(self):
         withoutindex_result = df.to_latex(index=False, header=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
- 1 &  b1 \\
- 2 &  b2 \\
+1 & b1 \\
+2 & b2 \\
 \bottomrule
 \end{tabular}
 """
@@ -690,10 +690,10 @@ def test_to_latex_specified_header(self):
         withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
-AA &  BB \\
+AA & BB \\
 \midrule
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \bottomrule
 \end{tabular}
 """