From dc191e34cc74ebc57830e98270b39d535dd8a59a Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Sun, 10 Dec 2023 22:30:21 +0800 Subject: [PATCH 01/23] Bug fix for spss kwargs --- pandas/io/spss.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index db31a07df79e6..57a8303e6c131 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -24,6 +24,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, + **kwargs ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. @@ -46,6 +47,8 @@ def read_spss( DataFrame. .. versionadded:: 2.0 + **kwargs : dict, optional + Additional keyword arguments passed to :func:`pyreadstat.read_sav`. Returns ------- @@ -64,7 +67,7 @@ def read_spss( usecols = list(usecols) # pyreadstat requires a list df, metadata = pyreadstat.read_sav( - stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals + stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals, **kwargs ) df.attrs = metadata.__dict__ if dtype_backend is not lib.no_default: From c90c25705ff9fc813ce781fc6f2afe8d6deb3143 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Sun, 10 Dec 2023 22:35:28 +0800 Subject: [PATCH 02/23] Update to whatsnew with bug fix details --- doc/source/whatsnew/v2.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index ad44e87cacf82..2c3c07fac3b41 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -629,6 +629,7 @@ I/O - Bug in :meth:`~pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`) - Bug in :meth:`~pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) - Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) +- Bug in :meth: `read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) Period ^^^^^^ From 56f08403403b64c177c3146b0419383dbf2bc4a8 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Sun, 10 Dec 2023 22:44:21 +0800 Subject: [PATCH 03/23] Black formatting fix --- pandas/io/spss.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 57a8303e6c131..9120488a91466 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -24,7 +24,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - **kwargs + **kwargs, ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. @@ -67,7 +67,10 @@ def read_spss( usecols = list(usecols) # pyreadstat requires a list df, metadata = pyreadstat.read_sav( - stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals, **kwargs + stringify_path(path), + usecols=usecols, + apply_value_formats=convert_categoricals, + **kwargs, ) df.attrs = metadata.__dict__ if dtype_backend is not lib.no_default: From f9e9c9155b92d85e7ebdc5f9809aad984b83c339 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Sun, 10 Dec 2023 22:48:59 +0800 Subject: [PATCH 04/23] Fix for whatsnew rst pre-commit --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 2c3c07fac3b41..92cac426ccd11 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -629,7 +629,7 @@ I/O - Bug in :meth:`~pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`) - Bug in :meth:`~pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) - Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) -- Bug in :meth: `read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) +- Bug in :meth:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) Period ^^^^^^ From 0968e43f72b3c1bca1d65e2544452e3b6afb689c Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Sun, 10 Dec 2023 22:54:56 +0800 Subject: [PATCH 05/23] Sort whatsnew alphabetically --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 92cac426ccd11..df241c6dad48a 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -624,12 +624,12 @@ I/O - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) - Bug in :func:`read_json` not handling dtype conversion properly if ``infer_string`` is set (:issue:`56195`) +- Bug in :meth:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) - Bug in :meth:`DataFrame.to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) - Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`) - Bug in :meth:`~pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`) - Bug in :meth:`~pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) - Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) -- Bug in :meth:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) Period ^^^^^^ From 397adcaf1fd54d8c89c3009d0a1cc36ebb936aa9 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Sun, 10 Dec 2023 22:59:01 +0800 Subject: [PATCH 06/23] Fixing alphabetical sorting to whatsnew --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index df241c6dad48a..7f789f655f781 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -624,11 +624,11 @@ I/O - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) - Bug in :func:`read_json` not handling dtype conversion properly if ``infer_string`` is set (:issue:`56195`) -- Bug in :meth:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) - Bug in :meth:`DataFrame.to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) - Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`) - Bug in :meth:`~pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`) - Bug in :meth:`~pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) +- Bug in :meth:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) - Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) Period From f12b3014b503d0c0216e84725b916fc48397da54 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Sun, 10 Dec 2023 23:11:04 +0800 Subject: [PATCH 07/23] Sorting whatsnew --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 7f789f655f781..cb53d7abff804 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -624,11 +624,11 @@ I/O - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) - Bug in :func:`read_json` not handling dtype conversion properly if ``infer_string`` is set (:issue:`56195`) +- Bug in :func:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) - Bug in :meth:`DataFrame.to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) - Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`) - Bug in :meth:`~pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`) - Bug in :meth:`~pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) -- Bug in :meth:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) - Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) Period From df2fd08e3b41c661737b9f26f6a742939becd63f Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Mon, 11 Dec 2023 20:33:13 +0800 Subject: [PATCH 08/23] Fixing kwargs --- pandas/io/spss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 9120488a91466..61c86b643c96e 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -24,7 +24,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - **kwargs, + kwargs: dict | None = None, ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. @@ -47,7 +47,7 @@ def read_spss( DataFrame. .. versionadded:: 2.0 - **kwargs : dict, optional + kwargs : dict, optional Additional keyword arguments passed to :func:`pyreadstat.read_sav`. Returns From 68db7ba3b48bd1f3773bf45df354305d5885bb90 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Mon, 11 Dec 2023 21:39:52 +0800 Subject: [PATCH 09/23] Fixing tests --- pandas/io/spss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 61c86b643c96e..517a37e35df10 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -24,7 +24,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - kwargs: dict | None = None, + kwargs: dict = {}, ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. From 45d8d5915b55e3c0834929efb7336259172091ee Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Mon, 1 Jan 2024 15:41:03 +0800 Subject: [PATCH 10/23] Test fixes --- pandas/tests/io/test_spss.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index e118c90d9bc02..faae63702e47b 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -65,6 +65,23 @@ def test_spss_labelled_str(datapath): tm.assert_frame_equal(df, expected) +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") +@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") +def test_spss_kwargs(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT + fname = datapath("io", "data", "spss", "labelled-str.sav") + + df = pd.read_spss(fname, convert_categoricals=True, row_limit=1) + expected = pd.DataFrame({"gender": ["Male", "Female"]}) + expected["gender"] = pd.Categorical(expected["gender"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False, row_offset=0) + expected = pd.DataFrame({"gender": ["M", "F"]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") @pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_umlauts(datapath): From d17a898f065181b29e778820702734a7d9ed14cc Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Mon, 1 Jan 2024 16:25:03 +0800 Subject: [PATCH 11/23] Minor change in kwargs --- pandas/io/spss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 517a37e35df10..8828bc05dafca 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -24,7 +24,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - kwargs: dict = {}, + **kwargs: dict, ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. From 02c900615f95de422030e64ba042b72c2f67e059 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Mon, 15 Jan 2024 19:02:41 +0800 Subject: [PATCH 12/23] Resolving PR comments --- pandas/io/spss.py | 4 ++-- pandas/tests/io/test_spss.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 8828bc05dafca..8512507a43006 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -24,7 +24,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - **kwargs: dict, + **kwargs, ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. @@ -47,7 +47,7 @@ def read_spss( DataFrame. .. versionadded:: 2.0 - kwargs : dict, optional + kwargs Additional keyword arguments passed to :func:`pyreadstat.read_sav`. Returns diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index 61358e08c0d4e..1aa9f6dca0303 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -73,8 +73,7 @@ def test_spss_kwargs(datapath): fname = datapath("io", "data", "spss", "labelled-str.sav") df = pd.read_spss(fname, convert_categoricals=True, row_limit=1) - expected = pd.DataFrame({"gender": ["Male"]}) - expected["gender"] = pd.Categorical(expected["gender"]) + expected = pd.DataFrame({"gender": ["Male"]}, dtype="category") tm.assert_frame_equal(df, expected) df = pd.read_spss(fname, convert_categoricals=False, row_offset=1) From ffd032e8e9799b1b2c7867472f6ac51eb38f046b Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Mon, 15 Jan 2024 19:34:45 +0800 Subject: [PATCH 13/23] Resolving mypy issue --- pandas/io/spss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 8512507a43006..c63b995154290 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -24,7 +24,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - **kwargs, + **kwargs: dict, ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. From f0430104629110135503095f3157e1ac3fc6b2a8 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Mon, 15 Jan 2024 20:45:32 +0800 Subject: [PATCH 14/23] Docstring update for kwargs mypy --- pandas/io/spss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index c63b995154290..8828bc05dafca 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -47,7 +47,7 @@ def read_spss( DataFrame. .. versionadded:: 2.0 - kwargs + kwargs : dict, optional Additional keyword arguments passed to :func:`pyreadstat.read_sav`. Returns From 61e298a13f9952cd6a4f9989d00d11897a3471e3 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Tue, 16 Jan 2024 22:27:23 +0800 Subject: [PATCH 15/23] Updates based on PR comments --- pandas/io/spss.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 8828bc05dafca..d4c7b0c19f423 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -1,6 +1,9 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Any, +) from pandas._libs import lib from pandas.compat._optional import import_optional_dependency @@ -24,7 +27,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - **kwargs: dict, + **kwargs: dict[str, Any], ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. @@ -47,8 +50,9 @@ def read_spss( DataFrame. .. versionadded:: 2.0 - kwargs : dict, optional + **kwargs : dict, optional Additional keyword arguments passed to :func:`pyreadstat.read_sav`. + .. versionadded:: 3.0 Returns ------- From 47d191dc7e8ca6dfef40866c32b94476e639aa1c Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Tue, 16 Jan 2024 23:01:43 +0800 Subject: [PATCH 16/23] Fixing kwargs types --- pandas/io/spss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index d4c7b0c19f423..07739e7b4a461 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -27,7 +27,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - **kwargs: dict[str, Any], + **kwargs: Any, ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. From 8ef79d35deac1684cb478573ea5ee89f897306b0 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Fri, 19 Jan 2024 16:50:16 +0800 Subject: [PATCH 17/23] Finalizing updates based on suggestions --- pandas/io/spss.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 07739e7b4a461..a23212c33d2fe 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -50,8 +50,9 @@ def read_spss( DataFrame. .. versionadded:: 2.0 - **kwargs : dict, optional + **kwargs Additional keyword arguments passed to :func:`pyreadstat.read_sav`. + .. versionadded:: 3.0 Returns From 2ed067127954398f470fe6044b6995c5ce2f6392 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Fri, 19 Jan 2024 18:48:45 +0800 Subject: [PATCH 18/23] Updating to try to fix other random test fails --- pandas/io/spss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index a23212c33d2fe..d1f1b82e2f604 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -51,7 +51,7 @@ def read_spss( .. versionadded:: 2.0 **kwargs - Additional keyword arguments passed to :func:`pyreadstat.read_sav`. + Additional keyword arguments that can be passed to :func:`pyreadstat.read_sav`. .. versionadded:: 3.0 From 83102b0938a5f2202f9eba9739329f1775e21cf0 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Fri, 19 Jan 2024 19:30:42 +0800 Subject: [PATCH 19/23] Unrelated test fix --- pandas/tests/generic/test_to_xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py index e0d79c3f15282..572ebdd25f919 100644 --- a/pandas/tests/generic/test_to_xarray.py +++ b/pandas/tests/generic/test_to_xarray.py @@ -41,7 +41,7 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string): df.index.name = "foo" df.columns.name = "bar" result = df.to_xarray() - assert result.dims["foo"] == 4 + assert result.sizes["foo"] == 4 assert len(result.coords) == 1 assert len(result.data_vars) == 8 tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) From 20ef6f5a6a5e8a7392c5215829c13f849ef85f06 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Fri, 19 Jan 2024 20:25:38 +0800 Subject: [PATCH 20/23] Test fixes for dims/sizes --- pandas/tests/generic/test_to_xarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py index 572ebdd25f919..d8401a8b2ae3f 100644 --- a/pandas/tests/generic/test_to_xarray.py +++ b/pandas/tests/generic/test_to_xarray.py @@ -62,7 +62,7 @@ def test_to_xarray_empty(self, df): df.index.name = "foo" result = df[0:0].to_xarray() - assert result.dims["foo"] == 0 + assert result.sizes["foo"] == 0 assert isinstance(result, Dataset) def test_to_xarray_with_multiindex(self, df, using_infer_string): @@ -71,8 +71,8 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string): # MultiIndex df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"]) result = df.to_xarray() - assert result.dims["one"] == 1 - assert result.dims["two"] == 4 + assert result.sizes["one"] == 1 + assert result.sizes["two"] == 4 assert len(result.coords) == 2 assert len(result.data_vars) == 8 tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) From b2e06a6a2094befae11d4c54620be65e849957a8 Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Fri, 26 Jan 2024 16:55:15 +0800 Subject: [PATCH 21/23] Updates to whatsnew --- doc/source/whatsnew/v2.2.0.rst | 1 - doc/source/whatsnew/v3.0.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 07728131c6cd0..8319f3012a1eb 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -865,7 +865,6 @@ I/O - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a CSV with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when the file contains ``NaN`` or ``Inf`` (:issue:`54564`) - Bug in :func:`read_json` not handling dtype conversion properly if ``infer_string`` is set (:issue:`56195`) -- Bug in :func:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) - Bug in :meth:`DataFrame.to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) - Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`) - Bug in :meth:`DataFrame.to_stata` raising for extension dtypes (:issue:`54671`) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 950082f9281c5..451fe1f883936 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -177,7 +177,7 @@ MultiIndex I/O ^^^ -- +- Bug in :func:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) - Period From 3524db00db4495cf946fcd4bc17422404d7d2d3f Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Fri, 26 Jan 2024 16:56:47 +0800 Subject: [PATCH 22/23] Case fix --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8319f3012a1eb..d9ab0452c8334 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -865,7 +865,7 @@ I/O - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a CSV with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when the file contains ``NaN`` or ``Inf`` (:issue:`54564`) - Bug in :func:`read_json` not handling dtype conversion properly if ``infer_string`` is set (:issue:`56195`) -- Bug in :meth:`DataFrame.to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) +- Bug in :meth:`DataFrame.to_excel`, with ``OdsWriter`` (``ods`` files) writing Boolean/string value (:issue:`54994`) - Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`) - Bug in :meth:`DataFrame.to_stata` raising for extension dtypes (:issue:`54671`) - Bug in :meth:`~pandas.read_excel` with ``engine="odf"`` (``ods`` files) when a string cell contains an annotation (:issue:`55200`) From 9592112ddeb35eb83bcbbc52c54aa2eb3c2ee2af Mon Sep 17 00:00:00 2001 From: Shubhankar Agrawal Date: Fri, 26 Jan 2024 18:16:59 +0800 Subject: [PATCH 23/23] Update to reword to enhancements --- doc/source/whatsnew/v3.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 451fe1f883936..3ac2097d698a8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`) - .. --------------------------------------------------------------------------- @@ -177,7 +177,7 @@ MultiIndex I/O ^^^ -- Bug in :func:`read_spss` where kwargs were not passed to pyreadstat (:issue:`56356`) +- - Period