diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 3aa53d4b07aa5..5fa1a984b8aea 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -12,6 +12,7 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
 from pandas._config.localization import (
     can_set_locale,
     get_locales,
@@ -106,7 +107,10 @@
 ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES]
 
 COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
-STRING_DTYPES: list[Dtype] = [str, "str", "U"]
+if using_string_dtype():
+    STRING_DTYPES: list[Dtype] = [str, "U"]
+else:
+    STRING_DTYPES: list[Dtype] = [str, "str", "U"]  # type: ignore[no-redef]
 COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
 
 DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index a17056b51a014..d07bfeda50e1d 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -575,7 +575,10 @@ def __getitem__(self, item: PositionalIndexer):
         if isinstance(item, np.ndarray):
             if not len(item):
                 # Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string]
-                if self._dtype.name == "string" and self._dtype.storage == "pyarrow":
+                if (
+                    isinstance(self._dtype, StringDtype)
+                    and self._dtype.storage == "pyarrow"
+                ):
                     # TODO(infer_string) should this be large_string?
                     pa_dtype = pa.string()
                 else:
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index b75dbaa3a15e8..8a4fd9fc1b34d 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -4,7 +4,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    ClassVar,
     Literal,
     cast,
 )
@@ -118,9 +117,12 @@ class StringDtype(StorageExtensionDtype):
     string[pyarrow]
     """
 
-    # error: Cannot override instance variable (previously declared on
-    # base class "StorageExtensionDtype") with class variable
-    name: ClassVar[str] = "string"  # type: ignore[misc]
+    @property
+    def name(self) -> str:  # type: ignore[override]
+        if self._na_value is libmissing.NA:
+            return "string"
+        else:
+            return "str"
 
     #: StringDtype().na_value uses pandas.NA except the implementation that
     # follows NumPy semantics, which uses nan.
@@ -137,7 +139,7 @@ def __init__(
     ) -> None:
         # infer defaults
         if storage is None:
-            if using_string_dtype() and na_value is not libmissing.NA:
+            if na_value is not libmissing.NA:
                 if HAS_PYARROW:
                     storage = "pyarrow"
                 else:
@@ -170,11 +172,19 @@ def __init__(
         self.storage = storage
         self._na_value = na_value
 
+    def __repr__(self) -> str:
+        if self._na_value is libmissing.NA:
+            return f"{self.name}[{self.storage}]"
+        else:
+            # TODO add more informative repr
+            return self.name
+
     def __eq__(self, other: object) -> bool:
         # we need to override the base class __eq__ because na_value (NA or NaN)
         # cannot be checked with normal `==`
         if isinstance(other, str):
-            if other == self.name:
+            # TODO should dtype == "string" work for the NaN variant?
+            if other == "string" or other == self.name:  # noqa: PLR1714
                 return True
             try:
                 other = self.construct_from_string(other)
@@ -231,6 +241,8 @@ def construct_from_string(cls, string) -> Self:
             )
         if string == "string":
             return cls()
+        elif string == "str" and using_string_dtype():
+            return cls(na_value=np.nan)
         elif string == "string[python]":
             return cls(storage="python")
         elif string == "string[pyarrow]":
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ea91046f4b8e4..b8039746d9952 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4807,7 +4807,9 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame:
         -----
         * To select all *numeric* types, use ``np.number`` or ``'number'``
         * To select strings you must use the ``object`` dtype, but note that
-          this will return *all* object dtype columns
+          this will return *all* object dtype columns. With
+          ``pd.options.future.infer_string`` enabled, using ``"str"`` will
+          work to select all string columns.
         * See the `numpy dtype hierarchy
           <https://numpy.org/doc/stable/reference/arrays.scalars.html>`__
         * To select datetimes, use ``np.datetime64``, ``'datetime'`` or
diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py
index fd1c7c9639242..035a1f8abdbc5 100644
--- a/pandas/core/interchange/utils.py
+++ b/pandas/core/interchange/utils.py
@@ -135,7 +135,12 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str:
     if format_str is not None:
         return format_str
 
-    if lib.is_np_dtype(dtype, "M"):
+    if isinstance(dtype, pd.StringDtype):
+        # TODO(infer_string) this should be LARGE_STRING for pyarrow storage,
+        # but current tests don't cover this distinction
+        return ArrowCTypes.STRING
+
+    elif lib.is_np_dtype(dtype, "M"):
         # Selecting the first char of resolution string:
         # dtype.str -> '<M8[ns]' -> 'n'
         resolution = np.datetime_data(dtype)[0][0]
diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
index aee9100702350..6ac0b49f0e4e7 100644
--- a/pandas/tests/apply/test_numba.py
+++ b/pandas/tests/apply/test_numba.py
@@ -110,7 +110,7 @@ def test_numba_unsupported_dtypes(apply_axis):
 
     with pytest.raises(
         ValueError,
-        match="Column b must have a numeric dtype. Found 'object|string' instead",
+        match="Column b must have a numeric dtype. Found 'object|str' instead",
     ):
         df.apply(f, engine="numba", axis=apply_axis)
 
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index 53fc135e77780..76704de6f2d10 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -224,7 +224,7 @@ def test_apply_categorical(by_row, using_infer_string):
     result = ser.apply(lambda x: "A")
     exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
     tm.assert_series_equal(result, exp)
-    assert result.dtype == object if not using_infer_string else "string[pyarrow_numpy]"
+    assert result.dtype == object if not using_infer_string else "str"
 
 
 @pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py
index 932e903c0e448..8c2672218f273 100644
--- a/pandas/tests/arrays/boolean/test_astype.py
+++ b/pandas/tests/arrays/boolean/test_astype.py
@@ -5,7 +5,7 @@
 import pandas._testing as tm
 
 
-def test_astype():
+def test_astype(using_infer_string):
     # with missing values
     arr = pd.array([True, False, None], dtype="boolean")
 
@@ -20,8 +20,14 @@ def test_astype():
     tm.assert_numpy_array_equal(result, expected)
 
     result = arr.astype("str")
-    expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
-    tm.assert_numpy_array_equal(result, expected)
+    if using_infer_string:
+        expected = pd.array(
+            ["True", "False", None], dtype=pd.StringDtype(na_value=np.nan)
+        )
+        tm.assert_extension_array_equal(result, expected)
+    else:
+        expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
+        tm.assert_numpy_array_equal(result, expected)
 
     # no missing values
     arr = pd.array([True, False, True], dtype="boolean")
diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py
index 7cfd8ec8dfadf..00999d491b242 100644
--- a/pandas/tests/arrays/categorical/test_astype.py
+++ b/pandas/tests/arrays/categorical/test_astype.py
@@ -88,7 +88,7 @@ def test_astype(self, ordered):
         expected = np.array(cat)
         tm.assert_numpy_array_equal(result, expected)
 
-        msg = r"Cannot cast object|string dtype to float64"
+        msg = r"Cannot cast object|str dtype to float64"
         with pytest.raises(ValueError, match=msg):
             cat.astype(float)
 
diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
index e2e5d47f50209..3a2c489920eb0 100644
--- a/pandas/tests/arrays/categorical/test_repr.py
+++ b/pandas/tests/arrays/categorical/test_repr.py
@@ -22,7 +22,7 @@ def test_print(self, using_infer_string):
         if using_infer_string:
             expected = [
                 "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
-                "Categories (3, string): [a < b < c]",
+                "Categories (3, str): [a < b < c]",
             ]
         else:
             expected = [
diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py
index ade3dbd2c99da..ccf644b34051d 100644
--- a/pandas/tests/arrays/floating/test_astype.py
+++ b/pandas/tests/arrays/floating/test_astype.py
@@ -63,12 +63,21 @@ def test_astype_to_integer_array():
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_astype_str():
+def test_astype_str(using_infer_string):
     a = pd.array([0.1, 0.2, None], dtype="Float64")
-    expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
 
-    tm.assert_numpy_array_equal(a.astype(str), expected)
-    tm.assert_numpy_array_equal(a.astype("str"), expected)
+    if using_infer_string:
+        expected = pd.array(["0.1", "0.2", None], dtype=pd.StringDtype(na_value=np.nan))
+        tm.assert_extension_array_equal(a.astype("str"), expected)
+
+        # TODO(infer_string) this should also be a string array like above
+        expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
+        tm.assert_numpy_array_equal(a.astype(str), expected)
+    else:
+        expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
+
+        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_numpy_array_equal(a.astype("str"), expected)
 
 
 def test_astype_copy():
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index 8ed6dbcd32d3d..fadd7ac67b58d 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -276,12 +276,21 @@ def test_to_numpy_na_raises(dtype):
         a.to_numpy(dtype=dtype)
 
 
-def test_astype_str():
+def test_astype_str(using_infer_string):
     a = pd.array([1, 2, None], dtype="Int64")
-    expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
 
-    tm.assert_numpy_array_equal(a.astype(str), expected)
-    tm.assert_numpy_array_equal(a.astype("str"), expected)
+    if using_infer_string:
+        expected = pd.array(["1", "2", None], dtype=pd.StringDtype(na_value=np.nan))
+        tm.assert_extension_array_equal(a.astype("str"), expected)
+
+        # TODO(infer_string) this should also be a string array like above
+        expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
+        tm.assert_numpy_array_equal(a.astype(str), expected)
+    else:
+        expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
+
+        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_numpy_array_equal(a.astype("str"), expected)
 
 
 def test_astype_boolean():
diff --git a/pandas/tests/arrays/interval/test_interval_pyarrow.py b/pandas/tests/arrays/interval/test_interval_pyarrow.py
index be87d5d3ef7ba..ef8701be81e2b 100644
--- a/pandas/tests/arrays/interval/test_interval_pyarrow.py
+++ b/pandas/tests/arrays/interval/test_interval_pyarrow.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import IntervalArray
@@ -82,7 +80,6 @@ def test_arrow_array_missing():
     assert result.storage.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.filterwarnings(
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py
index ff86b696c8403..431309aca0df2 100644
--- a/pandas/tests/arrays/period/test_arrow_compat.py
+++ b/pandas/tests/arrays/period/test_arrow_compat.py
@@ -1,7 +1,5 @@
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat.pyarrow import pa_version_under10p1
 
 from pandas.core.dtypes.dtypes import PeriodDtype
@@ -79,7 +77,6 @@ def test_arrow_array_missing():
     assert result.storage.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_arrow_table_roundtrip():
     from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
 
@@ -99,7 +96,6 @@ def test_arrow_table_roundtrip():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_arrow_load_from_zero_chunks():
     # GH-41040
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 3688d2998b3c7..91ad01a2fb0eb 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -66,7 +66,7 @@ def test_repr(dtype):
     assert repr(df) == expected
 
     if dtype.na_value is np.nan:
-        expected = "0      a\n1    NaN\n2      b\nName: A, dtype: string"
+        expected = "0      a\n1    NaN\n2      b\nName: A, dtype: str"
     else:
         expected = "0       a\n1    <NA>\n2       b\nName: A, dtype: string"
     assert repr(df.A) == expected
@@ -76,10 +76,10 @@ def test_repr(dtype):
         expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
     elif dtype.storage == "pyarrow" and dtype.na_value is np.nan:
         arr_name = "ArrowStringArrayNumpySemantics"
-        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: string"
+        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str"
     elif dtype.storage == "python" and dtype.na_value is np.nan:
         arr_name = "StringArrayNumpySemantics"
-        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: string"
+        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str"
     else:
         arr_name = "StringArray"
         expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
@@ -500,7 +500,7 @@ def test_fillna_args(dtype):
     tm.assert_extension_array_equal(res, expected)
 
     if dtype.storage == "pyarrow":
-        msg = "Invalid value '1' for dtype string"
+        msg = "Invalid value '1' for dtype str"
     else:
         msg = "Cannot set non-string value '1' into a StringArray."
     with pytest.raises(TypeError, match=msg):
@@ -522,7 +522,7 @@ def test_arrow_array(dtype):
     assert arr.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
     # roundtrip possible from arrow 1.0.0
@@ -537,14 +537,17 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
         assert table.field("a").type == "large_string"
     with pd.option_context("string_storage", string_storage):
         result = table.to_pandas()
-    assert isinstance(result["a"].dtype, pd.StringDtype)
-    expected = df.astype(f"string[{string_storage}]")
-    tm.assert_frame_equal(result, expected)
-    # ensure the missing value is represented by NA and not np.nan or None
-    assert result.loc[2, "a"] is result["a"].dtype.na_value
+    if dtype.na_value is np.nan and not using_string_dtype():
+        assert result["a"].dtype == "object"
+    else:
+        assert isinstance(result["a"].dtype, pd.StringDtype)
+        expected = df.astype(f"string[{string_storage}]")
+        tm.assert_frame_equal(result, expected)
+        # ensure the missing value is represented by NA and not np.nan or None
+        assert result.loc[2, "a"] is result["a"].dtype.na_value
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
     # GH-41040
@@ -561,9 +564,13 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
     table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
     with pd.option_context("string_storage", string_storage):
         result = table.to_pandas()
-    assert isinstance(result["a"].dtype, pd.StringDtype)
-    expected = df.astype(f"string[{string_storage}]")
-    tm.assert_frame_equal(result, expected)
+
+    if dtype.na_value is np.nan and not using_string_dtype():
+        assert result["a"].dtype == "object"
+    else:
+        assert isinstance(result["a"].dtype, pd.StringDtype)
+        expected = df.astype(f"string[{string_storage}]")
+        tm.assert_frame_equal(result, expected)
 
 
 def test_value_counts_na(dtype):
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index 65c6ce8e9cd08..7d4aae0f7bb4e 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -27,8 +28,9 @@ def test_eq_all_na():
 
 
 def test_config(string_storage, request, using_infer_string):
-    if using_infer_string and string_storage == "python":
-        # python string storage with na_value=NaN is not yet implemented
+    if using_infer_string and string_storage == "python" and HAS_PYARROW:
+        # string storage with na_value=NaN always uses pyarrow if available
+        # -> does not yet honor the option
         request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
 
     with pd.option_context("string_storage", string_storage):
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 3d8f8d791b763..5834b268be2be 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -297,7 +297,9 @@ def test_searchsorted(self):
         assert result == 10
 
     @pytest.mark.parametrize("box", [None, "index", "series"])
-    def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
+    def test_searchsorted_castable_strings(
+        self, arr1d, box, string_storage, using_infer_string
+    ):
         arr = arr1d
         if box is None:
             pass
@@ -333,7 +335,8 @@ def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
                 TypeError,
                 match=re.escape(
                     f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
-                    "or array of those. Got string array instead."
+                    "or array of those. Got "
+                    f"{'str' if using_infer_string else 'string'} array instead."
                 ),
             ):
                 arr.searchsorted([str(arr[1]), "baz"])
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index f47815ee059af..4bf97b1fd8494 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -799,3 +799,22 @@ def test_pandas_dtype_ea_not_instance():
     # GH 31356 GH 54592
     with tm.assert_produces_warning(UserWarning, match="without any arguments"):
         assert pandas_dtype(CategoricalDtype) == CategoricalDtype()
+
+
+def test_pandas_dtype_string_dtypes(string_storage):
+    # TODO(infer_string) remove skip if "python" is supported
+    pytest.importorskip("pyarrow")
+    with pd.option_context("future.infer_string", True):
+        with pd.option_context("string_storage", string_storage):
+            result = pandas_dtype("str")
+    # TODO(infer_string) hardcoded to pyarrow until python is supported
+    assert result == pd.StringDtype("pyarrow", na_value=np.nan)
+
+    with pd.option_context("future.infer_string", False):
+        with pd.option_context("string_storage", string_storage):
+            result = pandas_dtype("str")
+    assert result == np.dtype("U")
+
+    with pd.option_context("string_storage", string_storage):
+        result = pandas_dtype("string")
+    assert result == pd.StringDtype(string_storage, na_value=pd.NA)
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index b6c5becf49fa0..7c7da41124b83 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -1061,7 +1061,7 @@ def test_str_vs_repr(self, ordered, using_infer_string):
         c1 = CategoricalDtype(["a", "b"], ordered=ordered)
         assert str(c1) == "category"
         # Py2 will have unicode prefixes
-        dtype = "string" if using_infer_string else "object"
+        dtype = "str" if using_infer_string else "object"
         pat = (
             r"CategoricalDtype\(categories=\[.*\], ordered={ordered}, "
             rf"categories_dtype={dtype}\)"
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 2ab248787a1cf..b59c10824c5c4 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -117,6 +117,20 @@ def test_is_not_string_type(self, dtype):
         # because StringDtype is a string type
         assert is_string_dtype(dtype)
 
+    def test_is_dtype_from_name(self, dtype, using_infer_string):
+        if dtype.na_value is np.nan and not using_infer_string:
+            result = type(dtype).is_dtype(dtype.name)
+            assert result is False
+        else:
+            super().test_is_dtype_from_name(dtype)
+
+    def test_construct_from_string_own_name(self, dtype, using_infer_string):
+        if dtype.na_value is np.nan and not using_infer_string:
+            with pytest.raises(TypeError, match="Cannot construct a 'StringDtype'"):
+                dtype.construct_from_string(dtype.name)
+        else:
+            super().test_construct_from_string_own_name(dtype)
+
     def test_view(self, data):
         if data.dtype.storage == "pyarrow":
             pytest.skip(reason="2D support not implemented for ArrowStringArray")
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index b0b33b4a565ec..826ac2be3339b 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -323,7 +323,7 @@ def test_setitem(self, float_frame, using_infer_string):
         smaller["col10"] = ["1", "2"]
 
         if using_infer_string:
-            assert smaller["col10"].dtype == "string"
+            assert smaller["col10"].dtype == "str"
         else:
             assert smaller["col10"].dtype == np.object_
         assert (smaller["col10"] == ["1", "2"]).all()
@@ -458,13 +458,13 @@ def test_setitem_corner(self, float_frame, using_infer_string):
         del dm["foo"]
         dm["foo"] = "bar"
         if using_infer_string:
-            assert dm["foo"].dtype == "string"
+            assert dm["foo"].dtype == "str"
         else:
             assert dm["foo"].dtype == np.object_
 
         dm["coercible"] = ["1", "2", "3"]
         if using_infer_string:
-            assert dm["coercible"].dtype == "string"
+            assert dm["coercible"].dtype == "str"
         else:
             assert dm["coercible"].dtype == np.object_
 
@@ -500,7 +500,7 @@ def test_setitem_ambig(self, using_infer_string):
         dm[2] = uncoercable_series
         assert len(dm.columns) == 3
         if using_infer_string:
-            assert dm[2].dtype == "string"
+            assert dm[2].dtype == "str"
         else:
             assert dm[2].dtype == np.object_
 
diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py
index aaf95daf232e2..124505f440e6c 100644
--- a/pandas/tests/frame/indexing/test_set_value.py
+++ b/pandas/tests/frame/indexing/test_set_value.py
@@ -28,7 +28,7 @@ def test_set_value_resize(self, float_frame, using_infer_string):
         res = float_frame.copy()
         res._set_value("foobar", "baz", "sam")
         if using_infer_string:
-            assert res["baz"].dtype == "string"
+            assert res["baz"].dtype == "str"
         else:
             assert res["baz"].dtype == np.object_
         res = float_frame.copy()
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index c6c702a1a0b1b..8647df0e8ad96 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -201,7 +201,7 @@ def test_astype_dict_like(self, dtype_class):
         expected = DataFrame(
             {
                 "a": a,
-                "b": Series(["0", "1", "2", "3", "4"], dtype="object"),
+                "b": Series(["0", "1", "2", "3", "4"], dtype="str"),
                 "c": c,
                 "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"),
             }
@@ -262,9 +262,9 @@ def test_astype_duplicate_col(self):
         a2 = Series([0, 1, 2, 3, 4], name="a")
         df = concat([a1, b, a2], axis=1)
 
-        result = df.astype(str)
+        result = df.astype("str")
         a1_str = Series(["1", "2", "3", "4", "5"], dtype="str", name="a")
-        b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype=str, name="b")
+        b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype="str", name="b")
         a2_str = Series(["0", "1", "2", "3", "4"], dtype="str", name="a")
         expected = concat([a1_str, b_str, a2_str], axis=1)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py
index c5d32d56d03c1..6d097e75f6703 100644
--- a/pandas/tests/frame/methods/test_get_numeric_data.py
+++ b/pandas/tests/frame/methods/test_get_numeric_data.py
@@ -33,7 +33,9 @@ def test_get_numeric_data(self, using_infer_string):
             [
                 np.dtype("float64"),
                 np.dtype("int64"),
-                np.dtype(objectname) if not using_infer_string else "string",
+                np.dtype(objectname)
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan),
                 np.dtype(datetime64name),
             ],
             index=["a", "b", "c", "f"],
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index 56bb3126455a5..52e871cc795b4 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -74,7 +74,7 @@ def test_nlargest_n(self, nselect_method, n, order):
         )
         if "b" in order:
             error_msg = (
-                f"Column 'b' has dtype (object|string), "
+                f"Column 'b' has dtype (object|str), "
                 f"cannot use method '{nselect_method}' with this dtype"
             )
             with pytest.raises(TypeError, match=error_msg):
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
index c487bc4cfb89a..88e43b678a7e4 100644
--- a/pandas/tests/frame/methods/test_reset_index.py
+++ b/pandas/tests/frame/methods/test_reset_index.py
@@ -662,7 +662,7 @@ def test_reset_index_dtypes_on_empty_frame_with_multiindex(
     idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
     result = DataFrame(index=idx)[:0].reset_index().dtypes
     if using_infer_string and dtype == object:
-        dtype = "string"
+        dtype = pd.StringDtype(na_value=np.nan)
     expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype})
     tm.assert_series_equal(result, expected)
 
@@ -695,7 +695,7 @@ def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby(
     expected["c3"] = expected["c3"].astype("datetime64[ns]")
     expected["c1"] = expected["c1"].astype("float64")
     if using_infer_string:
-        expected["c2"] = expected["c2"].astype("string[pyarrow_numpy]")
+        expected["c2"] = expected["c2"].astype("str")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
index d1bee6a3de613..875dca321635f 100644
--- a/pandas/tests/frame/methods/test_select_dtypes.py
+++ b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -50,7 +50,7 @@ def copy(self):
 
 
 class TestSelectDtypes:
-    def test_select_dtypes_include_using_list_like(self):
+    def test_select_dtypes_include_using_list_like(self, using_infer_string):
         df = DataFrame(
             {
                 "a": list("abc"),
@@ -94,6 +94,11 @@ def test_select_dtypes_include_using_list_like(self):
         with pytest.raises(NotImplementedError, match=r"^$"):
             df.select_dtypes(include=["period"])
 
+        if using_infer_string:
+            ri = df.select_dtypes(include=["str"])
+            ei = df[["a"]]
+            tm.assert_frame_equal(ri, ei)
+
     def test_select_dtypes_exclude_using_list_like(self):
         df = DataFrame(
             {
@@ -151,7 +156,7 @@ def test_select_dtypes_exclude_include_int(self, include):
         expected = df[["b", "c", "e"]]
         tm.assert_frame_equal(result, expected)
 
-    def test_select_dtypes_include_using_scalars(self):
+    def test_select_dtypes_include_using_scalars(self, using_infer_string):
         df = DataFrame(
             {
                 "a": list("abc"),
@@ -187,6 +192,11 @@ def test_select_dtypes_include_using_scalars(self):
         with pytest.raises(NotImplementedError, match=r"^$"):
             df.select_dtypes(include="period")
 
+        if using_infer_string:
+            ri = df.select_dtypes(include="str")
+            ei = df[["a"]]
+            tm.assert_frame_equal(ri, ei)
+
     def test_select_dtypes_exclude_using_scalars(self):
         df = DataFrame(
             {
@@ -347,7 +357,10 @@ def test_select_dtypes_datetime_with_tz(self):
 
     @pytest.mark.parametrize("dtype", [str, "str", np.bytes_, "S1", np.str_, "U1"])
     @pytest.mark.parametrize("arg", ["include", "exclude"])
-    def test_select_dtypes_str_raises(self, dtype, arg):
+    def test_select_dtypes_str_raises(self, dtype, arg, using_infer_string):
+        if using_infer_string and dtype == "str":
+            # this is tested below
+            pytest.skip("Selecting string columns works with future strings")
         df = DataFrame(
             {
                 "a": list("abc"),
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 7fb1658394632..adb327e90bb76 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -714,10 +714,7 @@ def test_to_csv_interval_index(self, temp_file, using_infer_string):
 
         # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
         expected = df.copy()
-        if using_infer_string:
-            expected.index = expected.index.astype("string[pyarrow_numpy]")
-        else:
-            expected.index = expected.index.astype(str)
+        expected.index = expected.index.astype("str")
 
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index c95c382bb5131..47eb387abc8e8 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -186,7 +186,9 @@ def test_construction_with_mixed(self, float_string_frame, using_infer_string):
         expected = Series(
             [np.dtype("float64")] * 4
             + [
-                np.dtype("object") if not using_infer_string else "string",
+                np.dtype("object")
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan),
                 np.dtype("datetime64[us]"),
                 np.dtype("timedelta64[us]"),
             ],
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 607e333d82823..a210af94561f9 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -256,7 +256,7 @@ def test_emptylike_constructor(self, emptylike, expected_index, expected_columns
         tm.assert_frame_equal(result, expected)
 
     def test_constructor_mixed(self, float_string_frame, using_infer_string):
-        dtype = "string" if using_infer_string else np.object_
+        dtype = "str" if using_infer_string else np.object_
         assert float_string_frame["foo"].dtype == dtype
 
     def test_constructor_cast_failure(self):
@@ -760,7 +760,7 @@ def test_constructor_dict_cast(self, using_infer_string):
 
         frame = DataFrame(test_data)
         assert len(frame) == 3
-        assert frame["B"].dtype == np.object_ if not using_infer_string else "string"
+        assert frame["B"].dtype == np.object_ if not using_infer_string else "str"
         assert frame["A"].dtype == np.float64
 
     def test_constructor_dict_cast2(self):
@@ -1182,7 +1182,7 @@ def test_constructor_scalar_inference(self, using_infer_string):
         assert df["bool"].dtype == np.bool_
         assert df["float"].dtype == np.float64
         assert df["complex"].dtype == np.complex128
-        assert df["object"].dtype == np.object_ if not using_infer_string else "string"
+        assert df["object"].dtype == np.object_ if not using_infer_string else "str"
 
     def test_constructor_arrays_and_scalars(self):
         df = DataFrame({"a": np.random.default_rng(2).standard_normal(10), "b": True})
@@ -1265,7 +1265,7 @@ def test_constructor_list_of_lists(self, using_infer_string):
         # GH #484
         df = DataFrame(data=[[1, "a"], [2, "b"]], columns=["num", "str"])
         assert is_integer_dtype(df["num"])
-        assert df["str"].dtype == np.object_ if not using_infer_string else "string"
+        assert df["str"].dtype == np.object_ if not using_infer_string else "str"
 
         # GH 4851
         # list of 0-dim ndarrays
@@ -1833,7 +1833,12 @@ def test_constructor_with_datetimes(self, using_infer_string):
         result = df.dtypes
         expected = Series(
             [np.dtype("int64")]
-            + [np.dtype(objectname) if not using_infer_string else "string"] * 2
+            + [
+                np.dtype(objectname)
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan)
+            ]
+            * 2
             + [np.dtype("M8[s]"), np.dtype("M8[us]")],
             index=list("ABCDE"),
         )
@@ -1855,7 +1860,11 @@ def test_constructor_with_datetimes(self, using_infer_string):
         expected = Series(
             [np.dtype("float64")]
             + [np.dtype("int64")]
-            + [np.dtype("object") if not using_infer_string else "string"]
+            + [
+                np.dtype("object")
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan)
+            ]
             + [np.dtype("float64")]
             + [np.dtype(intname)],
             index=["a", "b", "c", floatname, intname],
@@ -1877,7 +1886,11 @@ def test_constructor_with_datetimes(self, using_infer_string):
         expected = Series(
             [np.dtype("float64")]
             + [np.dtype("int64")]
-            + [np.dtype("object") if not using_infer_string else "string"]
+            + [
+                np.dtype("object")
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan)
+            ]
             + [np.dtype("float64")]
             + [np.dtype(intname)],
             index=["a", "b", "c", floatname, intname],
@@ -2103,7 +2116,9 @@ def test_constructor_for_list_with_dtypes(self, using_infer_string):
             [
                 np.dtype("int64"),
                 np.dtype("float64"),
-                np.dtype("object") if not using_infer_string else "string",
+                np.dtype("object")
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan),
                 np.dtype("datetime64[us]"),
                 np.dtype("float64"),
             ],
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index 92bcd6f0c7d0c..b4f02b6f81b6f 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -658,7 +658,11 @@ def test_unstack_dtypes(self, using_infer_string):
         df2["D"] = "foo"
         df3 = df2.unstack("B")
         result = df3.dtypes
-        dtype = "string" if using_infer_string else np.dtype("object")
+        dtype = (
+            pd.StringDtype(na_value=np.nan)
+            if using_infer_string
+            else np.dtype("object")
+        )
         expected = Series(
             [np.dtype("float64")] * 2 + [dtype] * 2,
             index=MultiIndex.from_arrays(
diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py
index d8401a8b2ae3f..9fe9bca8abdc9 100644
--- a/pandas/tests/generic/test_to_xarray.py
+++ b/pandas/tests/generic/test_to_xarray.py
@@ -52,7 +52,7 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
         # column names are lost
         expected = df.copy()
         expected["f"] = expected["f"].astype(
-            object if not using_infer_string else "string[pyarrow_numpy]"
+            object if not using_infer_string else "str"
         )
         expected.columns.name = None
         tm.assert_frame_equal(result.to_dataframe(), expected)
@@ -81,7 +81,7 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
         result = result.to_dataframe()
         expected = df.copy()
         expected["f"] = expected["f"].astype(
-            object if not using_infer_string else "string[pyarrow_numpy]"
+            object if not using_infer_string else "str"
         )
         expected.columns.name = None
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 644f93a37a3a3..1a4127ab49b0e 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -79,7 +77,7 @@ def test_apply_index_date(using_infer_string):
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply_index_date_object(using_infer_string):
+def test_apply_index_date_object():
     # GH 5789
     # don't auto coerce dates
     ts = [
@@ -111,10 +109,7 @@ def test_apply_index_date_object(using_infer_string):
         1.40750,
         1.40649,
     ]
-    dtype = "string[pyarrow_numpy]" if using_infer_string else object
-    exp_idx = Index(
-        ["2011-05-16", "2011-05-17", "2011-05-18"], dtype=dtype, name="date"
-    )
+    exp_idx = Index(["2011-05-16", "2011-05-17", "2011-05-18"], name="date")
     expected = Series(["00:00", "02:00", "02:00"], index=exp_idx)
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(DeprecationWarning, match=msg):
@@ -922,12 +917,11 @@ def test_func_returns_object():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "group_column_dtlike",
     [datetime.today(), datetime.today().date(), datetime.today().time()],
 )
-def test_apply_datetime_issue(group_column_dtlike, using_infer_string):
+def test_apply_datetime_issue(group_column_dtlike):
     # GH-28247
     # groupby-apply throws an error if one of the columns in the DataFrame
     #   is a datetime object and the column labels are different from
@@ -938,8 +932,7 @@ def test_apply_datetime_issue(group_column_dtlike, using_infer_string):
     with tm.assert_produces_warning(DeprecationWarning, match=msg):
         result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42]))
 
-    dtype = "string" if using_infer_string else "object"
-    expected = DataFrame(["spam"], Index(["foo"], dtype=dtype, name="a"), columns=[42])
+    expected = DataFrame(["spam"], Index(["foo"], dtype="str", name="a"), columns=[42])
     tm.assert_frame_equal(result, expected)
 
 
@@ -1020,7 +1013,7 @@ def test_groupby_apply_datetime_result_dtypes(using_infer_string):
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(DeprecationWarning, match=msg):
         result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes
-    dtype = "string" if using_infer_string else object
+    dtype = pd.StringDtype(na_value=np.nan) if using_infer_string else object
     expected = Series(
         [np.dtype("datetime64[us]"), dtype, dtype, np.int64, dtype],
         index=["observation", "color", "mood", "intensity", "score"],
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index c35f5d2bc26e8..1e86b5401ee09 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -134,7 +134,7 @@ def f(x):
         result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
     expected.index = Index([1, 2], name="person_id")
-    dtype = "string[pyarrow_numpy]" if using_infer_string else object
+    dtype = "str" if using_infer_string else object
     expected["person_name"] = expected["person_name"].astype(dtype)
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 5ac6dc990c092..791f279bffc94 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -889,7 +889,7 @@ def test_groupby_complex_mean():
     tm.assert_frame_equal(result, expected)
 
 
-def test_groupby_complex_numbers(using_infer_string):
+def test_groupby_complex_numbers():
     # GH 17927
     df = DataFrame(
         [
@@ -898,11 +898,10 @@ def test_groupby_complex_numbers(using_infer_string):
             {"a": 4, "b": 1},
         ]
     )
-    dtype = "string[pyarrow_numpy]" if using_infer_string else object
     expected = DataFrame(
         np.array([1, 1, 1], dtype=np.int64),
         index=Index([(1 + 1j), (1 + 2j), (1 + 0j)], name="b"),
-        columns=Index(["a"], dtype=dtype),
+        columns=Index(["a"]),
     )
     result = df.groupby("b", sort=False).count()
     tm.assert_frame_equal(result, expected)
@@ -1759,7 +1758,7 @@ def get_categorical_invalid_expected():
             idx = Index(lev, name=keys[0])
 
         if using_infer_string:
-            columns = Index([], dtype="string[pyarrow_numpy]")
+            columns = Index([], dtype="str")
         else:
             columns = []
         expected = DataFrame([], columns=columns, index=idx)
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index 7e7c84fa2b390..41e00f8121b14 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -180,6 +180,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "category type does not support sum operations",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
+                    re.escape(f"agg function failed [how->{method},dtype->str]"),
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -197,6 +198,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     f"Cannot perform {method} with non-ordered Categorical",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
+                    re.escape(f"agg function failed [how->{method},dtype->str]"),
                 ]
             )
             with pytest.raises(exception, match=msg):
diff --git a/pandas/tests/indexes/base_class/test_formats.py b/pandas/tests/indexes/base_class/test_formats.py
index dc4763d96bc71..260b4203a4f04 100644
--- a/pandas/tests/indexes/base_class/test_formats.py
+++ b/pandas/tests/indexes/base_class/test_formats.py
@@ -9,7 +9,6 @@
 
 
 class TestIndexRendering:
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_repr_is_valid_construction_code(self):
         # for the case of Index, where the repr is traditional rather than
         # stylized
diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py
index 38e0920b7004e..b2867d4ac8e68 100644
--- a/pandas/tests/indexes/multi/test_constructors.py
+++ b/pandas/tests/indexes/multi/test_constructors.py
@@ -850,7 +850,7 @@ def test_dtype_representation(using_infer_string):
     # GH#46900
     pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
     result = pmidx.dtypes
-    exp = "object" if not using_infer_string else "string"
+    exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
     expected = Series(
         ["int64", exp],
         index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py
index dd4bba42eda6f..7f292aacc39ca 100644
--- a/pandas/tests/indexes/multi/test_get_set.py
+++ b/pandas/tests/indexes/multi/test_get_set.py
@@ -41,7 +41,7 @@ def test_get_dtypes(using_infer_string):
         names=["int", "string", "dt"],
     )
 
-    exp = "object" if not using_infer_string else "string"
+    exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
     expected = pd.Series(
         {
             "int": np.dtype("int64"),
@@ -61,7 +61,7 @@ def test_get_dtypes_no_level_name(using_infer_string):
             pd.date_range("20200101", periods=2, tz="UTC"),
         ],
     )
-    exp = "object" if not using_infer_string else "string"
+    exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
     expected = pd.Series(
         {
             "level_0": np.dtype("int64"),
@@ -82,7 +82,7 @@ def test_get_dtypes_duplicate_level_names(using_infer_string):
         ],
         names=["A", "A", "A"],
     ).dtypes
-    exp = "object" if not using_infer_string else "string"
+    exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
     expected = pd.Series(
         [np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
         index=["A", "A", "A"],
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index 039836da75cd5..1eeeebd6b8ca9 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -171,6 +171,7 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
 
 
 class TestSliceLocs:
+    # TODO(infer_string) parametrize over multiple string dtypes
     @pytest.mark.parametrize(
         "dtype",
         [
@@ -209,6 +210,7 @@ def test_slice_locs_negative_step(self, in_slice, expected, dtype):
         expected = Index(list(expected), dtype=dtype)
         tm.assert_index_equal(result, expected)
 
+    # TODO(infer_string) parametrize over multiple string dtypes
     @td.skip_if_no("pyarrow")
     def test_slice_locs_negative_step_oob(self):
         index = Index(list("bcdxy"), dtype="string[pyarrow_numpy]")
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 8d2245d0d9978..0911f2aec74d6 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -79,7 +79,7 @@ def test_constructor_copy(self, using_infer_string):
         assert new_index.name == "name"
         if using_infer_string:
             tm.assert_extension_array_equal(
-                new_index.values, pd.array(arr, dtype="string[pyarrow_numpy]")
+                new_index.values, pd.array(arr, dtype="str")
             )
         else:
             tm.assert_numpy_array_equal(arr, new_index.values)
@@ -157,7 +157,7 @@ def test_constructor_from_frame_series_freq(self, using_infer_string):
         df = DataFrame(np.random.default_rng(2).random((5, 3)))
         df["date"] = dts
         result = DatetimeIndex(df["date"], freq="MS")
-        dtype = object if not using_infer_string else "string"
+        dtype = object if not using_infer_string else "str"
         assert df["date"].dtype == dtype
         expected.name = "date"
         tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
index d482d20591a8a..ec9767aa4bab4 100644
--- a/pandas/tests/indexing/multiindex/test_loc.py
+++ b/pandas/tests/indexing/multiindex/test_loc.py
@@ -601,7 +601,7 @@ def test_loc_nan_multiindex(using_infer_string):
         np.ones((1, 4)),
         index=Index(
             [np.nan],
-            dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
+            dtype="object" if not using_infer_string else "str",
             name="u3",
         ),
         columns=Index(["d1", "d2", "d3", "d4"]),
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index 6b072bc27ed81..ef8c0e432ca49 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -259,7 +259,7 @@ def test_dups_fancy_indexing_only_missing_label(self, using_infer_string):
             with pytest.raises(
                 KeyError,
                 match=re.escape(
-                    "\"None of [Index(['E'], dtype='string')] are in the [index]\""
+                    "\"None of [Index(['E'], dtype='str')] are in the [index]\""
                 ),
             ):
                 dfnu.loc[["E"]]
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 1b2dc0819006c..247501f1504e7 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -62,12 +62,17 @@ def test_not_change_nan_loc(series, new_series, expected_ser):
 
 
 class TestLoc:
-    def test_none_values_on_string_columns(self):
+    def test_none_values_on_string_columns(self, using_infer_string):
         # Issue #32218
-        df = DataFrame(["1", "2", None], columns=["a"], dtype="str")
-
+        df = DataFrame(["1", "2", None], columns=["a"], dtype=object)
         assert df.loc[2, "a"] is None
 
+        df = DataFrame(["1", "2", None], columns=["a"], dtype="str")
+        if using_infer_string:
+            assert np.isnan(df.loc[2, "a"])
+        else:
+            assert df.loc[2, "a"] is None
+
     def test_loc_getitem_int(self, frame_or_series):
         # int label
         obj = frame_or_series(range(3), index=Index(list("abc"), dtype=object))
@@ -1394,7 +1399,7 @@ def test_loc_setitem_single_row_categorical(self, using_infer_string):
 
         result = df["Alpha"]
         expected = Series(categories, index=df.index, name="Alpha").astype(
-            object if not using_infer_string else "string[pyarrow_numpy]"
+            object if not using_infer_string else "str"
         )
         tm.assert_series_equal(result, expected)
 
@@ -1563,7 +1568,7 @@ def test_loc_setitem_single_column_mixed(self, using_infer_string):
         df.loc[df.index[::2], "str"] = np.nan
         expected = Series(
             [np.nan, "qux", np.nan, "qux", np.nan],
-            dtype=object if not using_infer_string else "string[pyarrow_numpy]",
+            dtype=object if not using_infer_string else "str",
         ).values
         tm.assert_almost_equal(df["str"].values, expected)
 
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index 4d232d5ed1312..3dbdedbb94618 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -227,7 +227,7 @@ def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string):
             {
                 "x": Series(
                     ["1", "2"],
-                    dtype=object if not using_infer_string else "string[pyarrow_numpy]",
+                    dtype=object if not using_infer_string else "str",
                 ),
                 "y": Series([np.nan, np.nan], dtype=object),
             }
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 579d3fbfb3435..c1f71c6de92dd 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -621,7 +621,7 @@ def _compare(old_mgr, new_mgr):
         mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
         mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
         new_mgr = mgr.convert()
-        dtype = "string[pyarrow_numpy]" if using_infer_string else np.object_
+        dtype = "str" if using_infer_string else np.object_
         assert new_mgr.iget(0).dtype == dtype
         assert new_mgr.iget(1).dtype == dtype
         assert new_mgr.iget(2).dtype == dtype
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index 0d753cb871c64..e1cdfb8bfa7e3 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -775,7 +775,7 @@ def test_to_excel_interval_no_labels(self, tmp_excel, using_infer_string):
 
         df["new"] = pd.cut(df[0], 10)
         expected["new"] = pd.cut(expected[0], 10).astype(
-            str if not using_infer_string else "string[pyarrow_numpy]"
+            str if not using_infer_string else "str"
         )
 
         df.to_excel(tmp_excel, sheet_name="test1")
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index bddd71d2bd5f6..7f367ded39863 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -76,7 +76,7 @@ def test_build_table_schema(self, df_schema, using_infer_string):
             "primaryKey": ["idx"],
         }
         if using_infer_string:
-            expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "string"}
+            expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "str"}
         assert result == expected
         result = build_table_schema(df_schema)
         assert "pandas_version" in result
@@ -129,7 +129,7 @@ def test_multiindex(self, df_schema, using_infer_string):
                 "type": "any",
                 "extDtype": "string",
             }
-            expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "string"}
+            expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"}
         assert result == expected
 
         df.index.names = ["idx0", None]
@@ -309,7 +309,7 @@ def test_to_json(self, df_table, using_infer_string):
         ]
 
         if using_infer_string:
-            fields[2] = {"name": "B", "type": "any", "extDtype": "string"}
+            fields[2] = {"name": "B", "type": "any", "extDtype": "str"}
 
         schema = {"fields": fields, "primaryKey": ["idx"]}
         data = [
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index d281729e9704c..1bc227369a968 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -255,7 +255,7 @@ def test_roundtrip_categorical(
 
         expected = categorical_frame.copy()
         expected.index = expected.index.astype(
-            str if not using_infer_string else "string[pyarrow_numpy]"
+            str if not using_infer_string else "str"
         )  # Categorical not preserved
         expected.index.name = None  # index names aren't preserved in JSON
         assert_json_roundtrip_equal(result, expected, orient)
@@ -610,7 +610,7 @@ def test_blocks_compat_GH9037(self, using_infer_string):
 
         # JSON deserialisation always creates unicode strings
         df_mixed.columns = df_mixed.columns.astype(
-            np.str_ if not using_infer_string else "string[pyarrow_numpy]"
+            np.str_ if not using_infer_string else "str"
         )
         data = StringIO(df_mixed.to_json(orient="split"))
         df_roundtrip = read_json(data, orient="split")
@@ -695,7 +695,7 @@ def test_series_roundtrip_simple(self, orient, string_series, using_infer_string
         expected = string_series
         if using_infer_string and orient in ("split", "index", "columns"):
             # These schemas don't contain dtypes, so we infer string
-            expected.index = expected.index.astype("string[pyarrow_numpy]")
+            expected.index = expected.index.astype("str")
         if orient in ("values", "records"):
             expected = expected.reset_index(drop=True)
         if orient != "split":
@@ -1573,7 +1573,6 @@ def test_from_json_to_json_table_index_and_columns(self, index, columns):
         result = read_json(StringIO(dfjson), orient="table")
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_from_json_to_json_table_dtypes(self):
         # GH21345
         expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
diff --git a/pandas/tests/io/pytables/test_keys.py b/pandas/tests/io/pytables/test_keys.py
index 55bd3f0d5a03a..7d0802dcf2e47 100644
--- a/pandas/tests/io/pytables/test_keys.py
+++ b/pandas/tests/io/pytables/test_keys.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas import (
     DataFrame,
     HDFStore,
@@ -13,7 +15,10 @@
     tables,
 )
 
-pytestmark = pytest.mark.single_cpu
+pytestmark = [
+    pytest.mark.single_cpu,
+    pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
+]
 
 
 def test_keys(setup_path):
diff --git a/pandas/tests/io/pytables/test_subclass.py b/pandas/tests/io/pytables/test_subclass.py
index 03622faa2b5a8..bbe1cd77e0d9f 100644
--- a/pandas/tests/io/pytables/test_subclass.py
+++ b/pandas/tests/io/pytables/test_subclass.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas import (
     DataFrame,
     Series,
@@ -17,6 +19,7 @@
 
 class TestHDFStoreSubclass:
     # GH 33748
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_supported_for_subclass_dataframe(self, tmp_path):
         data = {"a": [1, 2], "b": [3, 4]}
         sdf = tm.SubclassedDataFrame(data, dtype=np.intp)
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index c583f9b2c4f99..10e3af601b7ef 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -364,6 +364,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
                     expected = f_path.read()
                     assert result == expected
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_write_fspath_hdf5(self):
         # Same test as write_fspath_all, except HDF5 files aren't
         # necessarily byte-for-byte identical for a given dataframe, so we'll
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
index 7ffee9ea78ddc..45e0cab2165a7 100644
--- a/pandas/tests/io/test_fsspec.py
+++ b/pandas/tests/io/test_fsspec.py
@@ -176,6 +176,7 @@ def test_excel_options(fsspectest):
     assert fsspectest.test[0] == "read"
 
 
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_to_parquet_new_file(cleared_fs, df1):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
@@ -283,7 +284,6 @@ def test_not_present_exception():
         read_csv("memory://test/test.csv")
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_feather_options(fsspectest):
     pytest.importorskip("pyarrow")
     df = DataFrame({"a": [0]})
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index e113fa25b2a3f..bf56a5781f7cd 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -208,6 +208,7 @@ def test_to_csv_compression_encoding_gcs(
     tm.assert_frame_equal(df, read_df)
 
 
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index 409aafee58e49..96ef50f9d7149 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -4,8 +4,6 @@
 
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import ParserWarning
 import pandas.util._test_decorators as td
 
@@ -85,7 +83,6 @@ def read_xml_iterparse(data, **kwargs):
 # DTYPE
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_dtype_single_str(parser):
     df_result = read_xml(StringIO(xml_types), dtype={"degrees": "str"}, parser=parser)
     df_iter = read_xml_iterparse(
@@ -211,7 +208,6 @@ def test_wrong_dtype(xml_books, parser, iterparse):
         )
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_both_dtype_converters(parser):
     df_expected = DataFrame(
         {
diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py
index bbaaf0abecfbd..8e6a14e6bfb8f 100644
--- a/pandas/tests/reshape/concat/test_categorical.py
+++ b/pandas/tests/reshape/concat/test_categorical.py
@@ -59,9 +59,7 @@ def test_categorical_concat_dtypes(self, using_infer_string):
         num = Series([1, 2, 3])
         df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
 
-        result = df.dtypes == (
-            object if not using_infer_string else "string[pyarrow_numpy]"
-        )
+        result = df.dtypes == (object if not using_infer_string else "str")
         expected = Series([False, True, False], index=index)
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py
index 06d57c48df817..4869cfbf3a556 100644
--- a/pandas/tests/reshape/concat/test_empty.py
+++ b/pandas/tests/reshape/concat/test_empty.py
@@ -27,7 +27,7 @@ def test_handle_empty_objects(self, sort, using_infer_string):
 
         expected = df.reindex(columns=["a", "b", "c", "d", "foo"])
         expected["foo"] = expected["foo"].astype(
-            object if not using_infer_string else "string[pyarrow_numpy]"
+            object if not using_infer_string else "str"
         )
         expected.loc[0:4, "foo"] = "bar"
 
@@ -282,7 +282,7 @@ def test_concat_empty_dataframe_different_dtypes(self, using_infer_string):
 
         result = concat([df1[:0], df2[:0]])
         assert result["a"].dtype == np.int64
-        assert result["b"].dtype == np.object_ if not using_infer_string else "string"
+        assert result["b"].dtype == np.object_ if not using_infer_string else "str"
 
     def test_concat_to_empty_ea(self):
         """48510 `concat` to an empty EA should maintain type EA dtype."""
diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py
index e13b042192fc6..17f1a9d4ecbf1 100644
--- a/pandas/tests/reshape/concat/test_index.py
+++ b/pandas/tests/reshape/concat/test_index.py
@@ -449,9 +449,7 @@ def test_concat_axis_1_sort_false_rangeindex(self, using_infer_string):
         s1 = Series(["a", "b", "c"])
         s2 = Series(["a", "b"])
         s3 = Series(["a", "b", "c", "d"])
-        s4 = Series(
-            [], dtype=object if not using_infer_string else "string[pyarrow_numpy]"
-        )
+        s4 = Series([], dtype=object if not using_infer_string else "str")
         result = concat(
             [s1, s2, s3, s4], sort=False, join="outer", ignore_index=False, axis=1
         )
@@ -462,7 +460,7 @@ def test_concat_axis_1_sort_false_rangeindex(self, using_infer_string):
                 ["c", np.nan] * 2,
                 [np.nan] * 2 + ["d"] + [np.nan],
             ],
-            dtype=object if not using_infer_string else "string[pyarrow_numpy]",
+            dtype=object if not using_infer_string else "str",
         )
         tm.assert_frame_equal(
             result, expected, check_index_type=True, check_column_type=True
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 883a50a0c1399..f090ded06119a 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -155,7 +155,7 @@ def test_join_on(self, target_source, infer_string):
 
         # overlap
         msg = (
-            "You are trying to merge on float64 and object|string columns for key "
+            "You are trying to merge on float64 and object|str columns for key "
             "'A'. If you wish to proceed you should use pd.concat"
         )
         with pytest.raises(ValueError, match=msg):
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 4a6228e47eba0..ad704d87a491b 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -812,7 +812,7 @@ def test_overlapping_columns_error_message(self):
 
         # #2649, #10639
         df2.columns = ["key1", "foo", "foo"]
-        msg = r"Data columns not unique: Index\(\['foo'\], dtype='object|string'\)"
+        msg = r"Data columns not unique: Index\(\['foo'\], dtype='object|str'\)"
         with pytest.raises(MergeError, match=msg):
             merge(df, df2)
 
@@ -1859,7 +1859,7 @@ def test_identical(self, left, using_infer_string):
         # merging on the same, should preserve dtypes
         merged = merge(left, left, on="X")
         result = merged.dtypes.sort_index()
-        dtype = np.dtype("O") if not using_infer_string else "string"
+        dtype = np.dtype("O") if not using_infer_string else "str"
         expected = Series(
             [CategoricalDtype(categories=["foo", "bar"]), dtype, dtype],
             index=["X", "Y_x", "Y_y"],
@@ -1871,7 +1871,7 @@ def test_basic(self, left, right, using_infer_string):
         # so should preserve the merged column
         merged = merge(left, right, on="X")
         result = merged.dtypes.sort_index()
-        dtype = np.dtype("O") if not using_infer_string else "string"
+        dtype = np.dtype("O") if not using_infer_string else "str"
         expected = Series(
             [
                 CategoricalDtype(categories=["foo", "bar"]),
@@ -1985,7 +1985,7 @@ def test_other_columns(self, left, right, using_infer_string):
 
         merged = merge(left, right, on="X")
         result = merged.dtypes.sort_index()
-        dtype = np.dtype("O") if not using_infer_string else "string"
+        dtype = np.dtype("O") if not using_infer_string else "str"
         expected = Series(
             [
                 CategoricalDtype(categories=["foo", "bar"]),
@@ -2022,7 +2022,7 @@ def test_dtype_on_merged_different(
         merged = merge(left, right, on="X", how=join_type)
 
         result = merged.dtypes.sort_index()
-        dtype = np.dtype("O") if not using_infer_string else "string"
+        dtype = np.dtype("O") if not using_infer_string else "str"
         expected = Series([dtype, dtype, np.dtype("int64")], index=["X", "Y", "Z"])
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index 62fd8c5a7e231..8d972087b0dff 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -3164,7 +3164,7 @@ def test_by_nullable(self, any_numeric_ea_dtype, using_infer_string):
         )
         expected["value_y"] = np.array([np.nan, np.nan, np.nan], dtype=object)
         if using_infer_string:
-            expected["value_y"] = expected["value_y"].astype("string[pyarrow_numpy]")
+            expected["value_y"] = expected["value_y"].astype("str")
         tm.assert_frame_equal(result, expected)
 
     def test_merge_by_col_tz_aware(self):
@@ -3215,7 +3215,7 @@ def test_by_mixed_tz_aware(self, using_infer_string):
         )
         expected["value_y"] = np.array([np.nan], dtype=object)
         if using_infer_string:
-            expected["value_y"] = expected["value_y"].astype("string[pyarrow_numpy]")
+            expected["value_y"] = expected["value_y"].astype("str")
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("dtype", ["float64", "int16", "m8[ns]", "M8[us]"])
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index bfb6a3c0167c8..da1930323f464 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -337,7 +337,7 @@ def test_no_prefix_string_cats_default_category(
     result = from_dummies(dummies, default_category=default_category)
     expected = DataFrame(expected)
     if using_infer_string:
-        expected[""] = expected[""].astype("string[pyarrow_numpy]")
+        expected[""] = expected[""].astype("str")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py
index 304ba65f38d3c..27a34decae7b0 100644
--- a/pandas/tests/reshape/test_get_dummies.py
+++ b/pandas/tests/reshape/test_get_dummies.py
@@ -122,7 +122,7 @@ def test_get_dummies_basic_types(self, sparse, dtype, using_infer_string):
 
         result = get_dummies(s_df, columns=["a"], sparse=sparse, dtype=dtype)
 
-        key = "string" if using_infer_string else "object"
+        key = "str" if using_infer_string else "object"
         expected_counts = {"int64": 1, key: 1}
         expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0)
 
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index 03e823ce607fb..3e1ece6b7f59e 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -585,7 +585,7 @@ def test_strftime_period_days(self, using_infer_string):
             dtype="=U10",
         )
         if using_infer_string:
-            expected = expected.astype("string[pyarrow_numpy]")
+            expected = expected.astype("str")
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
diff --git a/pandas/tests/series/indexing/test_delitem.py b/pandas/tests/series/indexing/test_delitem.py
index 3d1082c3d040b..7440ef2692c47 100644
--- a/pandas/tests/series/indexing/test_delitem.py
+++ b/pandas/tests/series/indexing/test_delitem.py
@@ -31,16 +31,15 @@ def test_delitem(self):
         del s[0]
         tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
 
-    def test_delitem_object_index(self, using_infer_string):
+    def test_delitem_object_index(self):
         # Index(dtype=object)
-        dtype = "string[pyarrow_numpy]" if using_infer_string else object
-        s = Series(1, index=Index(["a"], dtype=dtype))
+        s = Series(1, index=Index(["a"], dtype="str"))
         del s["a"]
-        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
+        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="str")))
         s["a"] = 1
-        tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
+        tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype="str")))
         del s["a"]
-        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
+        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="str")))
 
     def test_delitem_missing_key(self):
         # empty
diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py
index ede39ba61dfeb..8ba5e8452711d 100644
--- a/pandas/tests/series/indexing/test_getitem.py
+++ b/pandas/tests/series/indexing/test_getitem.py
@@ -360,9 +360,7 @@ def test_getitem_no_matches(self, box):
         key = Series(["C"], dtype=object)
         key = box(key)
 
-        msg = (
-            r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
-        )
+        msg = r"None of \[Index\(\['C'\], dtype='object|str'\)\] are in the \[index\]"
         with pytest.raises(KeyError, match=msg):
             ser[key]
 
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index 3fcf664c3f01b..742091d761d62 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -594,7 +594,7 @@ def test_setitem_enlargement_object_none(self, nulls_fixture, using_infer_string
         ser = Series(["a", "b"])
         ser[3] = nulls_fixture
         dtype = (
-            "string[pyarrow_numpy]"
+            "str"
             if using_infer_string and not isinstance(nulls_fixture, Decimal)
             else object
         )
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
index d2d92d7273d3d..579d41f964df0 100644
--- a/pandas/tests/series/methods/test_astype.py
+++ b/pandas/tests/series/methods/test_astype.py
@@ -532,12 +532,12 @@ def test_astype_categorical_to_other(self):
         expected = ser
         tm.assert_series_equal(ser.astype("category"), expected)
         tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
-        msg = r"Cannot cast object|string dtype to float64"
+        msg = r"Cannot cast object|str dtype to float64"
         with pytest.raises(ValueError, match=msg):
             ser.astype("float64")
 
         cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
-        exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype=object)
+        exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype="str")
         tm.assert_series_equal(cat.astype("str"), exp)
         s2 = Series(Categorical(["1", "2", "3", "4"]))
         exp2 = Series([1, 2, 3, 4]).astype("int")
diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py
index f4f72854e50d3..fe84ffafa70b4 100644
--- a/pandas/tests/series/methods/test_map.py
+++ b/pandas/tests/series/methods/test_map.py
@@ -101,16 +101,16 @@ def test_map_series_stringdtype(any_string_dtype, using_infer_string):
 
     expected = Series(data=["rabbit", "dog", "cat", item], dtype=any_string_dtype)
     if using_infer_string and any_string_dtype == "object":
-        expected = expected.astype("string[pyarrow_numpy]")
+        expected = expected.astype("str")
 
     tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize(
     "data, expected_dtype",
-    [(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], object)],
+    [(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], "str")],
 )
-def test_map_categorical_with_nan_values(data, expected_dtype, using_infer_string):
+def test_map_categorical_with_nan_values(data, expected_dtype):
     # GH 20714 bug fixed in: GH 24275
     def func(val):
         return val.split("-")[0]
@@ -118,8 +118,6 @@ def func(val):
     s = Series(data, dtype="category")
 
     result = s.map(func, na_action="ignore")
-    if using_infer_string and expected_dtype == object:
-        expected_dtype = "string[pyarrow_numpy]"
     expected = Series(["1", "1", np.nan], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -145,9 +143,7 @@ def test_map_simple_str_callables_same_as_astype(
     # test that we are evaluating row-by-row first
     # before vectorized evaluation
     result = string_series.map(func)
-    expected = string_series.astype(
-        str if not using_infer_string else "string[pyarrow_numpy]"
-    )
+    expected = string_series.astype(str if not using_infer_string else "str")
     tm.assert_series_equal(result, expected)
 
 
@@ -493,7 +489,7 @@ def test_map_categorical(na_action, using_infer_string):
     result = s.map(lambda x: "A", na_action=na_action)
     exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
     tm.assert_series_equal(result, exp)
-    assert result.dtype == object if not using_infer_string else "string"
+    assert result.dtype == object if not using_infer_string else "str"
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py
index 1da98b3a273be..f5a97d61990a4 100644
--- a/pandas/tests/series/methods/test_rename.py
+++ b/pandas/tests/series/methods/test_rename.py
@@ -64,7 +64,7 @@ def test_rename_set_name_inplace(self, using_infer_string):
             assert ser.name == name
             exp = np.array(["a", "b", "c"], dtype=np.object_)
             if using_infer_string:
-                exp = array(exp, dtype="string[pyarrow_numpy]")
+                exp = array(exp, dtype="str")
                 tm.assert_extension_array_equal(ser.index.values, exp)
             else:
                 tm.assert_numpy_array_equal(ser.index.values, exp)
diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py
index 32b6cbd5acce3..d42aafc001680 100644
--- a/pandas/tests/series/methods/test_reset_index.py
+++ b/pandas/tests/series/methods/test_reset_index.py
@@ -193,7 +193,7 @@ def test_reset_index_dtypes_on_empty_series_with_multiindex(
     # GH 19602 - Preserve dtype on empty Series with MultiIndex
     idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
     result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
-    exp = "string" if using_infer_string else object
+    exp = "str" if using_infer_string else object
     expected = Series(
         {
             "level_0": np.int64,
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
index 0bcad49847291..6eb7c74d2eca0 100644
--- a/pandas/tests/series/methods/test_to_csv.py
+++ b/pandas/tests/series/methods/test_to_csv.py
@@ -176,8 +176,5 @@ def test_to_csv_interval_index(self, using_infer_string, temp_file):
 
         # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
         expected = s
-        if using_infer_string:
-            expected.index = expected.index.astype("string[pyarrow_numpy]")
-        else:
-            expected.index = expected.index.astype(str)
+        expected.index = expected.index.astype("str")
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index ff6ece4de9ec4..57b14d4b82a63 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -166,7 +166,7 @@ def test_constructor(self, datetime_series, using_infer_string):
 
         # Mixed type Series
         mixed = Series(["hello", np.nan], index=[0, 1])
-        assert mixed.dtype == np.object_ if not using_infer_string else "string"
+        assert mixed.dtype == np.object_ if not using_infer_string else "str"
         assert np.isnan(mixed[1])
 
         assert not empty_series.index._is_all_dates
@@ -1454,7 +1454,7 @@ def test_fromDict(self, using_infer_string):
 
         data = {"a": 0, "b": "1", "c": "2", "d": "3"}
         series = Series(data)
-        assert series.dtype == np.object_ if not using_infer_string else "string"
+        assert series.dtype == np.object_ if not using_infer_string else "str"
 
         data = {"a": "0", "b": "1"}
         series = Series(data, dtype=float)
@@ -1466,7 +1466,7 @@ def test_fromValue(self, datetime_series, using_infer_string):
         assert len(nans) == len(datetime_series)
 
         strings = Series("foo", index=datetime_series.index)
-        assert strings.dtype == np.object_ if not using_infer_string else "string"
+        assert strings.dtype == np.object_ if not using_infer_string else "str"
         assert len(strings) == len(datetime_series)
 
         d = datetime.now()
@@ -2121,6 +2121,11 @@ def test_series_string_inference_storage_definition(self):
             result = Series(["a", "b"], dtype="string")
         tm.assert_series_equal(result, expected)
 
+        expected = Series(["a", "b"], dtype=pd.StringDtype(na_value=np.nan))
+        with pd.option_context("future.infer_string", True):
+            result = Series(["a", "b"], dtype="str")
+        tm.assert_series_equal(result, expected)
+
     def test_series_constructor_infer_string_scalar(self):
         # GH#55537
         with pd.option_context("future.infer_string", True):
diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py
index 1d95fbf8dccb8..ab083d5c58b35 100644
--- a/pandas/tests/series/test_formats.py
+++ b/pandas/tests/series/test_formats.py
@@ -314,7 +314,7 @@ def test_categorical_repr(self, using_infer_string):
                 "0     a\n1     b\n"
                 "     ..\n"
                 "48    a\n49    b\n"
-                "Length: 50, dtype: category\nCategories (2, string): [a, b]"
+                "Length: 50, dtype: category\nCategories (2, str): [a, b]"
             )
         else:
             exp = (
@@ -332,7 +332,7 @@ def test_categorical_repr(self, using_infer_string):
             exp = (
                 "0    a\n1    b\n"
                 "dtype: category\n"
-                "Categories (26, string): [a < b < c < d ... w < x < y < z]"
+                "Categories (26, str): [a < b < c < d ... w < x < y < z]"
             )
         else:
             exp = (
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index 1e6538ca5a8fb..ee26fdae74960 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -10,8 +10,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import IntCastingNaNError
 
 import pandas as pd
@@ -166,7 +164,6 @@ def test_pandas_datareader():
     pytest.importorskip("pandas_datareader")
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_pyarrow(df):
     pyarrow = pytest.importorskip("pyarrow")
diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py
index a3f4b091713f9..28d96ea25cba7 100644
--- a/pandas/tests/util/test_assert_frame_equal.py
+++ b/pandas/tests/util/test_assert_frame_equal.py
@@ -112,7 +112,7 @@ def test_empty_dtypes(check_dtype):
 @pytest.mark.parametrize("check_like", [True, False])
 def test_frame_equal_index_mismatch(check_like, frame_or_series, using_infer_string):
     if using_infer_string:
-        dtype = "string"
+        dtype = "str"
     else:
         dtype = "object"
     msg = f"""{frame_or_series.__name__}\\.index are different
@@ -134,7 +134,7 @@ def test_frame_equal_index_mismatch(check_like, frame_or_series, using_infer_str
 @pytest.mark.parametrize("check_like", [True, False])
 def test_frame_equal_columns_mismatch(check_like, frame_or_series, using_infer_string):
     if using_infer_string:
-        dtype = "string"
+        dtype = "str"
     else:
         dtype = "object"
     msg = f"""{frame_or_series.__name__}\\.columns are different
diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py
index dc6efdcec380e..ab52d6c8e9f39 100644
--- a/pandas/tests/util/test_assert_index_equal.py
+++ b/pandas/tests/util/test_assert_index_equal.py
@@ -207,7 +207,7 @@ def test_index_equal_names(name1, name2):
 
 def test_index_equal_category_mismatch(check_categorical, using_infer_string):
     if using_infer_string:
-        dtype = "string"
+        dtype = "str"
     else:
         dtype = "object"
     msg = f"""Index are different
diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
index f75f48157aad2..a3b24c029fbac 100644
--- a/pandas/tests/util/test_assert_series_equal.py
+++ b/pandas/tests/util/test_assert_series_equal.py
@@ -220,9 +220,9 @@ def test_series_equal_categorical_values_mismatch(rtol, using_infer_string):
 Series values are different \\(66\\.66667 %\\)
 \\[index\\]: \\[0, 1, 2\\]
 \\[left\\]:  \\['a', 'b', 'c'\\]
-Categories \\(3, string\\): \\[a, b, c\\]
+Categories \\(3, str\\): \\[a, b, c\\]
 \\[right\\]: \\['a', 'c', 'b'\\]
-Categories \\(3, string\\): \\[a, b, c\\]"""
+Categories \\(3, str\\): \\[a, b, c\\]"""
     else:
         msg = """Series are different
 
@@ -257,7 +257,7 @@ def test_series_equal_datetime_values_mismatch(rtol):
 
 def test_series_equal_categorical_mismatch(check_categorical, using_infer_string):
     if using_infer_string:
-        dtype = "string"
+        dtype = "str"
     else:
         dtype = "object"
     msg = f"""Attributes of Series are different
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index 5ba08ac13fcee..15eaa8c167487 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -71,7 +71,7 @@ def test_sum_object_str_raises(step):
     df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
     r = df.rolling(window=3, step=step)
     with pytest.raises(
-        DataError, match="Cannot aggregate non-numeric type: object|string"
+        DataError, match="Cannot aggregate non-numeric type: object|str"
     ):
         # GH#42738, enforced in 2.0
         r.sum()