From 8802fa8bde7c132cbf22782b125e7879c182b4d2 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 22 Sep 2020 23:05:28 +0200 Subject: [PATCH 1/7] Fix regression in read_table --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/io/parsers.py | 2 ++ pandas/tests/io/parser/test_common.py | 9 +++++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index e3a96c69918db..5523a4c3b9045 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -36,6 +36,7 @@ Fixed regressions - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) +- Fixed regression in :meth:`read_table()` raised ``ValueError`` when ``delim_whitespace`` was set to ``True`` (:issue:`35958`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index bc622ab8c1f18..b3fe468d97d66 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -757,6 +757,8 @@ def read_table( memory_map=False, float_precision=None, ): + if delim_whitespace: + sep = "," return read_csv(**locals()) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 08eab69900400..bf5cb6e996194 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2200,3 +2200,12 @@ def test_read_csv_with_use_inf_as_na(all_parsers): result = parser.read_csv(StringIO(data), header=None) expected = DataFrame([1.0, np.nan, 3.0]) tm.assert_frame_equal(result, expected) + + +def test_read_table_delim_whitespace_default_sep(all_parsers): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + result = parser.read_table(f, delim_whitespace=True) + expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]}) + tm.assert_frame_equal(result, expected) From e565f62f6b7528df16b7127e0ea020f31758154f Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 22 Sep 2020 23:18:09 +0200 Subject: [PATCH 2/7] Change if condition --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b3fe468d97d66..7980c4745ba2c 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -757,7 +757,7 @@ def read_table( memory_map=False, float_precision=None, ): - if delim_whitespace: + if delim_whitespace and sep == "\t": sep = "," return read_csv(**locals()) From e8b0f739cbacee5d16c726d7cd93a9c3c392357e Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Wed, 23 Sep 2020 15:31:17 +0200 Subject: [PATCH 3/7] Update meth description Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 3d1ce9693941e..2bbaf522bff82 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -37,7 +37,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) -- Fixed regression in :meth:`read_table()` raised ``ValueError`` when ``delim_whitespace`` was set to ``True`` (:issue:`35958`) +- Fixed regression in :func:`read_table` raised ``ValueError`` when ``delim_whitespace`` was set to ``True`` (:issue:`35958`) .. --------------------------------------------------------------------------- From 9117b7cf4b8ef03419c9ecd7cc28bc09e8e0e09c Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 23 Sep 2020 20:09:02 +0200 Subject: [PATCH 4/7] Add comment --- pandas/io/parsers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 7980c4745ba2c..decfdf2707e6e 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -758,6 +758,8 @@ def read_table( float_precision=None, ): if delim_whitespace and sep == "\t": + # In this case sep is not used so we set it to the read_csv + # default to avoid a ValueError sep = "," return read_csv(**locals()) From df8794bfa29cfd78cccec77cfbeb2f22ad585d54 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 23 Sep 2020 20:55:44 +0200 Subject: [PATCH 5/7] Add validation and add test --- pandas/io/parsers.py | 6 ++++++ pandas/tests/io/parser/test_common.py | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index decfdf2707e6e..4bead84fbdc89 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -757,6 +757,12 @@ def read_table( memory_map=False, float_precision=None, ): + # TODO: validation duplicated in read_csv + if delim_whitespace and (delimiter is not None or sep != "\t"): + raise ValueError( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) if delim_whitespace and sep == "\t": # In this case sep is not used so we set it to the read_csv # default to avoid a ValueError diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index bf5cb6e996194..b78d59cb24e4a 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2209,3 +2209,13 @@ def test_read_table_delim_whitespace_default_sep(all_parsers): result = parser.read_table(f, delim_whitespace=True) expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]}) tm.assert_frame_equal(result, expected) + + +def test_read_table_delim_whitespace_non_default_sep(all_parsers): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + msg = "Specified a delimiter with both sep and " \ + "delim_whitespace=True; you can only specify one." + with pytest.raises(ValueError, match=msg): + parser.read_table(f, delim_whitespace=True, sep=",") From 5e6229226a6c1b591d888469627e244cc35df63a Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 23 Sep 2020 21:01:58 +0200 Subject: [PATCH 6/7] Run black pandas --- pandas/tests/io/parser/test_common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index b78d59cb24e4a..78c2f2bce5a02 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2215,7 +2215,9 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers): # GH: 35958 f = StringIO("a b c\n1 -2 -3\n4 5 6") parser = all_parsers - msg = "Specified a delimiter with both sep and " \ - "delim_whitespace=True; you can only specify one." + msg = ( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) with pytest.raises(ValueError, match=msg): parser.read_table(f, delim_whitespace=True, sep=",") From 0bd4f4027016d6a062ef7cea01af9ab123b5ea51 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 23 Sep 2020 21:11:36 +0200 Subject: [PATCH 7/7] Simplify if condition --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4bead84fbdc89..9300cacd99534 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -763,7 +763,7 @@ def read_table( "Specified a delimiter with both sep and " "delim_whitespace=True; you can only specify one." ) - if delim_whitespace and sep == "\t": + if delim_whitespace: # In this case sep is not used so we set it to the read_csv # default to avoid a ValueError sep = ","