From aca77f7b1cc7987a5b757a3f80a278e1f5fc7998 Mon Sep 17 00:00:00 2001 From: Honfung Wong Date: Mon, 7 Sep 2020 01:49:26 +0800 Subject: [PATCH] BUG: extra leading space in to_string when index=False (#36094) --- doc/source/whatsnew/v1.2.0.rst | 5 ++- pandas/io/formats/format.py | 28 +++++++++++----- pandas/tests/io/formats/test_format.py | 42 +++++++++++++++++++++--- pandas/tests/io/formats/test_to_latex.py | 22 ++++++------- 4 files changed, 71 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b4fdbf9588ffe..9a778acba4764 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -214,8 +214,6 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) -- Categorical ^^^^^^^^^^^ @@ -257,7 +255,7 @@ Conversion Strings ^^^^^^^ - +- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`) - - @@ -315,6 +313,7 @@ Groupby/resample/rolling - Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`) - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`) - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) +- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) - Reshaping diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 53b2b533215f0..70e38c3106bdb 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -345,6 +345,7 @@ def _get_formatted_values(self) -> List[str]: None, float_format=self.float_format, na_rep=self.na_rep, + leading_space=self.index, ) def to_string(self) -> str: @@ -960,6 +961,7 @@ def _format_col(self, i: int) -> List[str]: na_rep=self.na_rep, space=self.col_space.get(frame.columns[i]), decimal=self.decimal, + leading_space=self.index, ) def to_html( @@ -1111,7 +1113,7 @@ def format_array( space: Optional[Union[str, int]] = None, justify: str = "right", decimal: str = ".", - leading_space: Optional[bool] = None, + leading_space: Optional[bool] = True, quoting: Optional[int] = None, ) -> List[str]: """ @@ -1127,7 +1129,7 @@ def format_array( space justify decimal - leading_space : bool, optional + leading_space : bool, optional, default True Whether the array should be formatted with a leading space. When an array as a column of a Series or DataFrame, we do want the leading space to pad between columns. @@ -1194,7 +1196,7 @@ def __init__( decimal: str = ".", quoting: Optional[int] = None, fixed_width: bool = True, - leading_space: Optional[bool] = None, + leading_space: Optional[bool] = True, ): self.values = values self.digits = digits @@ -1395,9 +1397,11 @@ def format_values_with(float_format): float_format: Optional[FloatFormatType] if self.float_format is None: if self.fixed_width: - float_format = partial( - "{value: .{digits:d}f}".format, digits=self.digits - ) + if self.leading_space is True: + fmt_str = "{value: .{digits:d}f}" + else: + fmt_str = "{value:.{digits:d}f}" + float_format = partial(fmt_str.format, digits=self.digits) else: float_format = self.float_format else: @@ -1429,7 +1433,11 @@ def format_values_with(float_format): ).any() if has_small_values or (too_long and has_large_values): - float_format = partial("{value: .{digits:d}e}".format, digits=self.digits) + if self.leading_space is True: + fmt_str = "{value: .{digits:d}e}" + else: + fmt_str = "{value:.{digits:d}e}" + float_format = partial(fmt_str.format, digits=self.digits) formatted_values = format_values_with(float_format) return formatted_values @@ -1444,7 +1452,11 @@ def _format_strings(self) -> List[str]: class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self) -> List[str]: - formatter = self.formatter or (lambda x: f"{x: d}") + if self.leading_space is False: + formatter_str = lambda x: f"{x:d}".format(x=x) + else: + formatter_str = lambda x: f"{x: d}".format(x=x) + formatter = self.formatter or formatter_str fmt_values = [formatter(x) for x in self.values] return fmt_values diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 1fb957505987f..f00fa6274fca2 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1546,11 +1546,11 @@ def test_to_string_no_index(self): df_s = df.to_string(index=False) # Leading space is expected for positive numbers. - expected = " x y z\n 11 33 AAA\n 22 -44 " + expected = " x y z\n11 33 AAA\n22 -44 " assert df_s == expected df_s = df[["y", "x", "z"]].to_string(index=False) - expected = " y x z\n 33 11 AAA\n-44 22 " + expected = " y x z\n 33 11 AAA\n-44 22 " assert df_s == expected def test_to_string_line_width_no_index(self): @@ -1565,7 +1565,7 @@ def test_to_string_line_width_no_index(self): df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) df_s = df.to_string(line_width=1, index=False) - expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 " + expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " assert df_s == expected @@ -2269,7 +2269,7 @@ def test_to_string_without_index(self): # GH 11729 Test index=False option s = Series([1, 2, 3, 4]) result = s.to_string(index=False) - expected = " 1\n" + " 2\n" + " 3\n" + " 4" + expected = "1\n" + "2\n" + "3\n" + "4" assert result == expected def test_unicode_name_in_footer(self): @@ -3391,3 +3391,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method): msg = "buf is not a file name and it has no write method" with pytest.raises(TypeError, match=msg): getattr(float_frame, method)(buf=object()) + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ("a", "a"), + (["a", "b"], "a\nb"), + ([1, "a"], "1\na"), + (1, "1"), + ([0, -1], " 0\n-1"), + (1.0, "1.0"), + ([" a", " b"], " a\n b"), + ([".1", "1"], ".1\n 1"), + (["10", "-10"], " 10\n-10"), + ], +) +def test_format_remove_leading_space_series(input_array, expected): + # GH: 24980 + s = pd.Series(input_array).to_string(index=False) + assert s == expected + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ({"A": ["a"]}, "A\na"), + ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), + ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"), + ], +) +def test_format_remove_leading_space_dataframe(input_array, expected): + # GH: 24980 + df = pd.DataFrame(input_array).to_string(index=False) + assert df == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 96a9ed2b86cf4..9dfd851e91c65 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -50,10 +50,10 @@ def test_to_latex(self, float_frame): withoutindex_result = df.to_latex(index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - a & b \\ + a & b \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """ @@ -413,7 +413,7 @@ def test_to_latex_longtable(self): withoutindex_result = df.to_latex(index=False, longtable=True) withoutindex_expected = r"""\begin{longtable}{rl} \toprule - a & b \\ + a & b \\ \midrule \endhead \midrule @@ -423,8 +423,8 @@ def test_to_latex_longtable(self): \bottomrule \endlastfoot - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \end{longtable} """ @@ -663,8 +663,8 @@ def test_to_latex_no_header(self): withoutindex_result = df.to_latex(index=False, header=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - 1 & b1 \\ - 2 & b2 \\ +1 & b1 \\ +2 & b2 \\ \bottomrule \end{tabular} """ @@ -690,10 +690,10 @@ def test_to_latex_specified_header(self): withoutindex_result = df.to_latex(header=["AA", "BB"], index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule -AA & BB \\ +AA & BB \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """