From 5e681c1de8890c8004fb2c558ed52e9e73701f8b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 29 Aug 2023 10:36:20 +0200 Subject: [PATCH] Infer string storage based on infer_string option (#54794) --- doc/source/whatsnew/v2.1.0.rst | 6 +++++- pandas/core/arrays/string_.py | 6 +++++- pandas/core/config_init.py | 3 ++- pandas/tests/series/test_constructors.py | 8 ++++++++ 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 8f2667d69a322..7aea1fa99f655 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -39,11 +39,15 @@ We are collecting feedback on this decision `here libmissing.NAType | float: # type: ignore[override] def __init__(self, storage=None) -> None: if storage is None: - storage = get_option("mode.string_storage") + infer_string = get_option("future.infer_string") + if infer_string: + storage = "pyarrow_numpy" + else: + storage = get_option("mode.string_storage") if storage not in {"python", "pyarrow", "pyarrow_numpy"}: raise ValueError( f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index d425a378b8d5b..645ed81c16ed3 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -493,7 +493,8 @@ def use_inf_as_na_cb(key) -> None: string_storage_doc = """ : string - The default storage for StringDtype. + The default storage for StringDtype. This option is ignored if + ``future.infer_string`` is set to True. """ with cf.config_prefix("mode"): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ef734e9664844..2c3fdf627788a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2115,6 +2115,14 @@ def test_series_string_inference_array_string_dtype(self): ser = Series(np.array(["a", "b"])) tm.assert_series_equal(ser, expected) + def test_series_string_inference_storage_definition(self): + # GH#54793 + pytest.importorskip("pyarrow") + expected = Series(["a", "b"], dtype="string[pyarrow_numpy]") + with pd.option_context("future.infer_string", True): + result = Series(["a", "b"], dtype="string") + tm.assert_series_equal(result, expected) + class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self):