From bd3e37f9140c9fff3924098cd7754cd49015404a Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 26 Sep 2020 04:32:17 -0700 Subject: [PATCH] Backport PR #36560: [BUG]: Fix regression in read_table with delim_whitespace=True (#36661) Co-authored-by: patrick <61934744+phofl@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/io/parsers.py | 10 ++++++++++ pandas/tests/io/parser/test_common.py | 21 +++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index aeb9076617787..eded30ca45025 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -40,6 +40,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.apply` with ``raw=True`` and user-function returning string (:issue:`35940`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) +- Fixed regression in :func:`read_table` raised ``ValueError`` when ``delim_whitespace`` was set to ``True`` (:issue:`35958`) - Fixed regression in :meth:`Series.dt.normalize` when normalizing pre-epoch dates the result was shifted one day (:issue:`36294`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index d4f346f8c1087..a02b059967e88 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -752,6 +752,16 @@ def read_table( memory_map=False, float_precision=None, ): + # TODO: validation duplicated in read_csv + if delim_whitespace and (delimiter is not None or sep != "\t"): + raise ValueError( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) + if delim_whitespace: + # In this case sep is not used so we set it to the read_csv + # default to avoid a ValueError + sep = "," return read_csv(**locals()) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 3d5f6ae3a4af9..c6a43d22ca155 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2191,3 +2191,24 @@ def test_read_csv_with_use_inf_as_na(all_parsers): result = parser.read_csv(StringIO(data), header=None) expected = DataFrame([1.0, np.nan, 3.0]) tm.assert_frame_equal(result, expected) + + +def test_read_table_delim_whitespace_default_sep(all_parsers): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + result = parser.read_table(f, delim_whitespace=True) + expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]}) + tm.assert_frame_equal(result, expected) + + +def test_read_table_delim_whitespace_non_default_sep(all_parsers): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + msg = ( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) + with pytest.raises(ValueError, match=msg): + parser.read_table(f, delim_whitespace=True, sep=",")