Skip to content

Commit

Permalink
BUG: extra leading space in to_string when index=False (pandas-dev#36094
Browse files Browse the repository at this point in the history
)
  • Loading branch information
onshek authored Sep 6, 2020
1 parent ba552ec commit aca77f7
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 26 deletions.
5 changes: 2 additions & 3 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,6 @@ Performance improvements

Bug fixes
~~~~~~~~~
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
-

Categorical
^^^^^^^^^^^
Expand Down Expand Up @@ -257,7 +255,7 @@ Conversion

Strings
^^^^^^^

- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`)
-
-

Expand Down Expand Up @@ -315,6 +313,7 @@ Groupby/resample/rolling
- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
- Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
-

Reshaping
Expand Down
28 changes: 20 additions & 8 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ def _get_formatted_values(self) -> List[str]:
None,
float_format=self.float_format,
na_rep=self.na_rep,
leading_space=self.index,
)

def to_string(self) -> str:
Expand Down Expand Up @@ -960,6 +961,7 @@ def _format_col(self, i: int) -> List[str]:
na_rep=self.na_rep,
space=self.col_space.get(frame.columns[i]),
decimal=self.decimal,
leading_space=self.index,
)

def to_html(
Expand Down Expand Up @@ -1111,7 +1113,7 @@ def format_array(
space: Optional[Union[str, int]] = None,
justify: str = "right",
decimal: str = ".",
leading_space: Optional[bool] = None,
leading_space: Optional[bool] = True,
quoting: Optional[int] = None,
) -> List[str]:
"""
Expand All @@ -1127,7 +1129,7 @@ def format_array(
space
justify
decimal
leading_space : bool, optional
leading_space : bool, optional, default True
Whether the array should be formatted with a leading space.
When an array as a column of a Series or DataFrame, we do want
the leading space to pad between columns.
Expand Down Expand Up @@ -1194,7 +1196,7 @@ def __init__(
decimal: str = ".",
quoting: Optional[int] = None,
fixed_width: bool = True,
leading_space: Optional[bool] = None,
leading_space: Optional[bool] = True,
):
self.values = values
self.digits = digits
Expand Down Expand Up @@ -1395,9 +1397,11 @@ def format_values_with(float_format):
float_format: Optional[FloatFormatType]
if self.float_format is None:
if self.fixed_width:
float_format = partial(
"{value: .{digits:d}f}".format, digits=self.digits
)
if self.leading_space is True:
fmt_str = "{value: .{digits:d}f}"
else:
fmt_str = "{value:.{digits:d}f}"
float_format = partial(fmt_str.format, digits=self.digits)
else:
float_format = self.float_format
else:
Expand Down Expand Up @@ -1429,7 +1433,11 @@ def format_values_with(float_format):
).any()

if has_small_values or (too_long and has_large_values):
float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
if self.leading_space is True:
fmt_str = "{value: .{digits:d}e}"
else:
fmt_str = "{value:.{digits:d}e}"
float_format = partial(fmt_str.format, digits=self.digits)
formatted_values = format_values_with(float_format)

return formatted_values
Expand All @@ -1444,7 +1452,11 @@ def _format_strings(self) -> List[str]:

class IntArrayFormatter(GenericArrayFormatter):
def _format_strings(self) -> List[str]:
formatter = self.formatter or (lambda x: f"{x: d}")
if self.leading_space is False:
formatter_str = lambda x: f"{x:d}".format(x=x)
else:
formatter_str = lambda x: f"{x: d}".format(x=x)
formatter = self.formatter or formatter_str
fmt_values = [formatter(x) for x in self.values]
return fmt_values

Expand Down
42 changes: 38 additions & 4 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1546,11 +1546,11 @@ def test_to_string_no_index(self):

df_s = df.to_string(index=False)
# Leading space is expected for positive numbers.
expected = " x y z\n 11 33 AAA\n 22 -44 "
expected = " x y z\n11 33 AAA\n22 -44 "
assert df_s == expected

df_s = df[["y", "x", "z"]].to_string(index=False)
expected = " y x z\n 33 11 AAA\n-44 22 "
expected = " y x z\n 33 11 AAA\n-44 22 "
assert df_s == expected

def test_to_string_line_width_no_index(self):
Expand All @@ -1565,7 +1565,7 @@ def test_to_string_line_width_no_index(self):
df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]})

df_s = df.to_string(line_width=1, index=False)
expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 "
expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 "

assert df_s == expected

Expand Down Expand Up @@ -2269,7 +2269,7 @@ def test_to_string_without_index(self):
# GH 11729 Test index=False option
s = Series([1, 2, 3, 4])
result = s.to_string(index=False)
expected = " 1\n" + " 2\n" + " 3\n" + " 4"
expected = "1\n" + "2\n" + "3\n" + "4"
assert result == expected

def test_unicode_name_in_footer(self):
Expand Down Expand Up @@ -3391,3 +3391,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
msg = "buf is not a file name and it has no write method"
with pytest.raises(TypeError, match=msg):
getattr(float_frame, method)(buf=object())


@pytest.mark.parametrize(
"input_array, expected",
[
("a", "a"),
(["a", "b"], "a\nb"),
([1, "a"], "1\na"),
(1, "1"),
([0, -1], " 0\n-1"),
(1.0, "1.0"),
([" a", " b"], " a\n b"),
([".1", "1"], ".1\n 1"),
(["10", "-10"], " 10\n-10"),
],
)
def test_format_remove_leading_space_series(input_array, expected):
# GH: 24980
s = pd.Series(input_array).to_string(index=False)
assert s == expected


@pytest.mark.parametrize(
"input_array, expected",
[
({"A": ["a"]}, "A\na"),
({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"),
({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"),
],
)
def test_format_remove_leading_space_dataframe(input_array, expected):
# GH: 24980
df = pd.DataFrame(input_array).to_string(index=False)
assert df == expected
22 changes: 11 additions & 11 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def test_to_latex(self, float_frame):
withoutindex_result = df.to_latex(index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
a & b \\
a & b \\
\midrule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand Down Expand Up @@ -413,7 +413,7 @@ def test_to_latex_longtable(self):
withoutindex_result = df.to_latex(index=False, longtable=True)
withoutindex_expected = r"""\begin{longtable}{rl}
\toprule
a & b \\
a & b \\
\midrule
\endhead
\midrule
Expand All @@ -423,8 +423,8 @@ def test_to_latex_longtable(self):
\bottomrule
\endlastfoot
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\end{longtable}
"""

Expand Down Expand Up @@ -663,8 +663,8 @@ def test_to_latex_no_header(self):
withoutindex_result = df.to_latex(index=False, header=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand All @@ -690,10 +690,10 @@ def test_to_latex_specified_header(self):
withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
AA & BB \\
AA & BB \\
\midrule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand Down

0 comments on commit aca77f7

Please sign in to comment.