From 22995e95b5a58fa3e880655d69f1d4cc35a6d8b4 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Mon, 25 Mar 2024 01:11:36 -0400
Subject: [PATCH 01/64] test not creating indexes on concatenation

---
 xarray/tests/test_concat.py | 53 +++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index 0cf4cc03a09..ed5c8f287c1 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -978,6 +978,32 @@ def test_concat_str_dtype(self, dtype, dim) -> None:
 
         assert np.issubdtype(actual.x2.dtype, dtype)
 
+    def test_concat_avoids_index_auto_creation(self) -> None:
+        # TODO once passing indexes={} directly to DataArray constructor is allowed then no need to create coords first
+        coords = Coordinates({"x": np.array([1, 2, 3])}, indexes={})
+        datasets = [
+            Dataset(
+                {"a": (["x", "y"], np.zeros((3, 3)))},
+                coords=coords,
+            )
+            for _ in range(2)
+        ]
+        # should not raise on concat
+        combined = concat(datasets, dim="x")
+        assert combined["a"].shape == (6, 3)
+        assert combined["a"].dims == ("x", "y")
+
+        # nor have auto-created any indexes
+        assert combined.indexes == {}
+
+        # should not raise on stack
+        combined = concat(datasets, dim="z")
+        assert combined["a"].shape == (2, 3, 3)
+        assert combined["a"].dims == ("z", "x", "y")
+
+        # nor have auto-created any indexes
+        assert combined.indexes == {}
+
 
 class TestConcatDataArray:
     def test_concat(self) -> None:
@@ -1051,6 +1077,33 @@ def test_concat_lazy(self) -> None:
         assert combined.shape == (2, 3, 3)
         assert combined.dims == ("z", "x", "y")
 
+    def test_concat_avoids_index_auto_creation(self) -> None:
+        # TODO once passing indexes={} directly to DataArray constructor is allowed then no need to create coords first
+        coords = Coordinates({"x": np.array([1, 2, 3])}, indexes={})
+        arrays = [
+            DataArray(
+                np.zeros((3, 3)),
+                dims=["x", "y"],
+                coords=coords,
+            )
+            for _ in range(2)
+        ]
+        # should not raise on concat
+        combined = concat(arrays, dim="x")
+        assert combined.shape == (6, 3)
+        assert combined.dims == ("x", "y")
+
+        # nor have auto-created any indexes
+        assert combined.indexes == {}
+
+        # should not raise on stack
+        combined = concat(arrays, dim="z")
+        assert combined.shape == (2, 3, 3)
+        assert combined.dims == ("z", "x", "y")
+
+        # nor have auto-created any indexes
+        assert combined.indexes == {}
+
     @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0])
     def test_concat_fill_value(self, fill_value) -> None:
         foo = DataArray([1, 2], coords=[("x", [1, 2])])

From 7142c9f2cf227abe7eee20b181d0f398a1644789 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Mon, 25 Mar 2024 01:12:03 -0400
Subject: [PATCH 02/64] construct result dataset using Coordinates object with
 indexes passed explicitly

---
 xarray/core/concat.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index d95cbccd36a..708a395c66b 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -8,6 +8,7 @@
 
 from xarray.core import dtypes, utils
 from xarray.core.alignment import align, reindex_variables
+from xarray.core.coordinates import Coordinates
 from xarray.core.duck_array_ops import lazy_array_equiv
 from xarray.core.indexes import Index, PandasIndex
 from xarray.core.merge import (
@@ -646,14 +647,26 @@ def get_indexes(name):
             # preserves original variable order
             result_vars[name] = result_vars.pop(name)
 
-    result = type(datasets[0])(result_vars, attrs=result_attrs)
-
-    absent_coord_names = coord_names - set(result.variables)
+    absent_coord_names = coord_names - set(result_vars)
     if absent_coord_names:
         raise ValueError(
             f"Variables {absent_coord_names!r} are coordinates in some datasets but not others."
         )
-    result = result.set_coords(coord_names)
+    coord_vars = {
+        name: result_var
+        for name, result_var in result_vars.items()
+        if name in coord_names
+    }
+    coords = Coordinates(coord_vars, indexes=result_indexes)
+
+    # TODO: this is just the complement of the set of coord_vars
+    result_data_vars = {
+        name: result_var
+        for name, result_var in result_vars.items()
+        if name not in coord_names
+    }
+
+    result = type(datasets[0])(result_data_vars, coords=coords, attrs=result_attrs)
     result.encoding = result_encoding
 
     result = result.drop_vars(unlabeled_dims, errors="ignore")
@@ -668,6 +681,7 @@ def get_indexes(name):
         result_indexes[dim] = index
 
     # TODO: add indexes at Dataset creation (when it is supported)
+    # TODO: do we actually need this step anymore?
     result = result._overwrite_indexes(result_indexes)
 
     return result

From 7fb075a8821065bdec76b4e837858a0bc978f845 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Mon, 25 Mar 2024 01:28:47 -0400
Subject: [PATCH 03/64] remove unnecessary overwriting of indexes

---
 xarray/core/concat.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 708a395c66b..468c0595b0a 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -678,11 +678,6 @@ def get_indexes(name):
         else:
             index_vars = index.create_variables()
         result[dim] = index_vars[dim]
-        result_indexes[dim] = index
-
-    # TODO: add indexes at Dataset creation (when it is supported)
-    # TODO: do we actually need this step anymore?
-    result = result._overwrite_indexes(result_indexes)
 
     return result
 

From 285c1dee688503c21c233a290b3801bfd6c64a84 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sun, 24 Mar 2024 21:24:10 -0400
Subject: [PATCH 04/64] ConcatenatableArray class

---
 xarray/tests/__init__.py | 101 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 5007db9eeb2..81beba80cf1 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -4,7 +4,9 @@
 import platform
 import string
 import warnings
+from collections.abc import Iterable
 from contextlib import contextmanager, nullcontext
+from typing import Any, Callable
 from unittest import mock  # noqa: F401
 
 import numpy as np
@@ -208,6 +210,105 @@ def __getitem__(self, key):
         raise UnexpectedDataAccess("Tried accessing data.")
 
 
+HANDLED_ARRAY_FUNCTIONS: dict[str, Callable] = {}
+
+
+def implements(numpy_function):
+    """Register an __array_function__ implementation for ManifestArray objects."""
+
+    def decorator(func):
+        HANDLED_ARRAY_FUNCTIONS[numpy_function] = func
+        return func
+
+    return decorator
+
+
+@implements(np.concatenate)
+def concatenate(
+    arrays: Iterable[ConcatenatableArray], /, *, axis=0
+) -> ConcatenatableArray:
+    if any(not isinstance(arr, ConcatenatableArray) for arr in arrays):
+        raise TypeError
+
+    result = np.concatenate([arr.array for arr in arrays], axis=axis)
+    return ConcatenatableArray(result)
+
+
+@implements(np.stack)
+def stack(arrays: Iterable[ConcatenatableArray], /, *, axis=0) -> ConcatenatableArray:
+    if any(not isinstance(arr, ConcatenatableArray) for arr in arrays):
+        raise TypeError
+
+    result = np.stack([arr.array for arr in arrays], axis=axis)
+    return ConcatenatableArray(result)
+
+
+@implements(np.result_type)
+def result_type(*arrays_and_dtypes) -> np.dtype:
+    """Called by xarray to ensure all arguments to concat have the same dtype."""
+    first_dtype, *other_dtypes = (np.dtype(obj) for obj in arrays_and_dtypes)
+    for other_dtype in other_dtypes:
+        if other_dtype != first_dtype:
+            raise ValueError("dtypes not all consistent")
+    return first_dtype
+
+
+@implements(np.broadcast_to)
+def broadcast_to(
+    x: ConcatenatableArray, /, shape: tuple[int, ...]
+) -> ConcatenatableArray:
+    """
+    Broadcasts an array to a specified shape, by either manipulating chunk keys or copying chunk manifest entries.
+    """
+    if not isinstance(x, ConcatenatableArray):
+        raise TypeError
+
+    result = np.broadcast_to(x.array, shape=shape)
+    return ConcatenatableArray(result)
+
+
+class ConcatenatableArray(utils.NDArrayMixin):
+    """Disallows loading or coercing to an index but does support concatenation / stacking."""
+
+    # TODO only reason this is different from InaccessibleArray is to avoid it being a subclass of ExplicitlyIndexed
+
+    HANDLED_ARRAY_FUNCTIONS = [concatenate, stack, result_type]
+
+    def __init__(self, array):
+        self.array = array
+
+    def get_duck_array(self):
+        raise UnexpectedDataAccess("Tried accessing data")
+
+    def __array__(self, dtype: np.typing.DTypeLike = None):
+        raise UnexpectedDataAccess("Tried accessing data")
+
+    def __getitem__(self, key):
+        raise UnexpectedDataAccess("Tried accessing data.")
+
+    def __array_function__(self, func, types, args, kwargs) -> Any:
+        if func not in HANDLED_ARRAY_FUNCTIONS:
+            return NotImplemented
+
+        # Note: this allows subclasses that don't override
+        # __array_function__ to handle ManifestArray objects
+        if not all(issubclass(t, ConcatenatableArray) for t in types):
+            return NotImplemented
+
+        return HANDLED_ARRAY_FUNCTIONS[func](*args, **kwargs)
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> Any:
+        """We have to define this in order to convince xarray that this class is a duckarray, even though we will never support ufuncs."""
+        return NotImplemented
+
+    def astype(self, dtype: np.dtype, /, *, copy: bool = True) -> ConcatenatableArray:
+        """Needed because xarray will call this even when it's a no-op"""
+        if dtype != self.dtype:
+            raise NotImplementedError()
+        else:
+            return self
+
+
 class FirstElementAccessibleArray(InaccessibleArray):
     def __getitem__(self, key):
         tuple_idxr = key.tuple

From cc2475703d02063345767627db7fb60f643dbd98 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 27 Mar 2024 20:50:18 -0400
Subject: [PATCH 05/64] use ConcatenableArray in tests

---
 xarray/tests/test_concat.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index ed5c8f287c1..a9ed15658f0 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -12,6 +12,7 @@
 from xarray.core.coordinates import Coordinates
 from xarray.core.indexes import PandasIndex
 from xarray.tests import (
+    ConcatenatableArray,
     InaccessibleArray,
     assert_array_equal,
     assert_equal,
@@ -979,11 +980,13 @@ def test_concat_str_dtype(self, dtype, dim) -> None:
         assert np.issubdtype(actual.x2.dtype, dtype)
 
     def test_concat_avoids_index_auto_creation(self) -> None:
-        # TODO once passing indexes={} directly to DataArray constructor is allowed then no need to create coords first
-        coords = Coordinates({"x": np.array([1, 2, 3])}, indexes={})
+        # TODO once passing indexes={} directly to Dataset constructor is allowed then no need to create coords first
+        coords = Coordinates(
+            {"x": ConcatenatableArray(np.array([1, 2, 3]))}, indexes={}
+        )
         datasets = [
             Dataset(
-                {"a": (["x", "y"], np.zeros((3, 3)))},
+                {"a": (["x", "y"], ConcatenatableArray(np.zeros((3, 3))))},
                 coords=coords,
             )
             for _ in range(2)
@@ -1079,10 +1082,12 @@ def test_concat_lazy(self) -> None:
 
     def test_concat_avoids_index_auto_creation(self) -> None:
         # TODO once passing indexes={} directly to DataArray constructor is allowed then no need to create coords first
-        coords = Coordinates({"x": np.array([1, 2, 3])}, indexes={})
+        coords = Coordinates(
+            {"x": ConcatenatableArray(np.array([1, 2, 3]))}, indexes={}
+        )
         arrays = [
             DataArray(
-                np.zeros((3, 3)),
+                ConcatenatableArray(np.zeros((3, 3))),
                 dims=["x", "y"],
                 coords=coords,
             )

From beb665a7109bdb627aa66ee277fd87edc195356d Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 27 Mar 2024 22:05:54 -0400
Subject: [PATCH 06/64] add regression tests

---
 xarray/tests/test_coordinates.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py
index 40743194ce6..aed2f37700e 100644
--- a/xarray/tests/test_coordinates.py
+++ b/xarray/tests/test_coordinates.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -8,7 +9,7 @@
 from xarray.core.dataarray import DataArray
 from xarray.core.dataset import Dataset
 from xarray.core.indexes import PandasIndex, PandasMultiIndex
-from xarray.core.variable import IndexVariable
+from xarray.core.variable import IndexVariable, Variable
 from xarray.tests import assert_identical, source_ndarray
 
 
@@ -69,6 +70,12 @@ def test_init_dim_sizes_conflict(self) -> None:
         with pytest.raises(ValueError):
             Coordinates(coords={"foo": ("x", [1, 2]), "bar": ("x", [1, 2, 3, 4])})
 
+    def test_init_var_shares_name_with_dim(self) -> None:
+        # regression test for GH #8883
+        var = Variable(data=np.arange(6).reshape(2, 3), dims=["x", "y"])
+        with pytest.raises(ValueError):
+            Coordinates(coords={"x": var}, indexes={})
+
     def test_from_pandas_multiindex(self) -> None:
         midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two"))
         coords = Coordinates.from_pandas_multiindex(midx, "x")

From 22f361dc590a83b2b3660539175a8a7cb1cba051 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 27 Mar 2024 22:08:57 -0400
Subject: [PATCH 07/64] fix by performing check

---
 xarray/core/coordinates.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index 251edd1fc6f..e85aafc4479 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -22,7 +22,11 @@
     assert_no_index_corrupted,
     create_default_index_implicit,
 )
-from xarray.core.merge import merge_coordinates_without_align, merge_coords
+from xarray.core.merge import (
+    assert_valid_explicit_coords,
+    merge_coordinates_without_align,
+    merge_coords,
+)
 from xarray.core.types import DataVars, Self, T_DataArray, T_Xarray
 from xarray.core.utils import (
     Frozen,
@@ -306,6 +310,14 @@ def __init__(
                 else:
                     variables[name] = var
 
+            dims = set(d for var in variables.values() for d in var.dims)
+            sizes = {
+                d: -1 for d in dims
+            }  # TODO the lengths here are never used, so assert_valid_explicit_coords should be refactored
+            assert_valid_explicit_coords(
+                variables, dims=sizes, explicit_coords=list(variables)
+            )
+
         if indexes is None:
             indexes = {}
         else:

From 55166fc7e002fa07d7a84f8d7fc460ddaad9674f Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 27 Mar 2024 22:12:15 -0400
Subject: [PATCH 08/64] refactor assert_valid_explicit_coords and rename
 dims->sizes

---
 xarray/core/coordinates.py |  5 +----
 xarray/core/merge.py       | 10 +++++-----
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index e85aafc4479..653e6be0ca2 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -311,11 +311,8 @@ def __init__(
                     variables[name] = var
 
             dims = set(d for var in variables.values() for d in var.dims)
-            sizes = {
-                d: -1 for d in dims
-            }  # TODO the lengths here are never used, so assert_valid_explicit_coords should be refactored
             assert_valid_explicit_coords(
-                variables, dims=sizes, explicit_coords=list(variables)
+                variables, dims=dims, explicit_coords=list(variables)
             )
 
         if indexes is None:
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index cbd06c8fdc5..b8afdecec6e 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -564,7 +564,7 @@ def merge_coords(
 
 def assert_valid_explicit_coords(
     variables: Mapping[Any, Any],
-    dims: Mapping[Any, int],
+    dims: Iterable[Any],
     explicit_coords: Iterable[Hashable],
 ) -> None:
     """Validate explicit coordinate names/dims.
@@ -721,16 +721,16 @@ def merge_core(
         collected, prioritized, compat=compat, combine_attrs=combine_attrs
     )
 
-    dims = calculate_dimensions(variables)
+    sizes = calculate_dimensions(variables)
 
     coord_names, noncoord_names = determine_coords(coerced)
     if compat == "minimal":
         # coordinates may be dropped in merged results
         coord_names.intersection_update(variables)
     if explicit_coords is not None:
-        assert_valid_explicit_coords(variables, dims, explicit_coords)
+        assert_valid_explicit_coords(variables, sizes.keys(), explicit_coords)
         coord_names.update(explicit_coords)
-    for dim, size in dims.items():
+    for dim, size in sizes.items():
         if dim in variables:
             coord_names.add(dim)
     ambiguous_coords = coord_names.intersection(noncoord_names)
@@ -745,7 +745,7 @@ def merge_core(
         combine_attrs,
     )
 
-    return _MergeResult(variables, coord_names, dims, out_indexes, attrs)
+    return _MergeResult(variables, coord_names, sizes, out_indexes, attrs)
 
 
 def merge(

From da6692b3a856af636fd3c447321759abfb366e56 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Thu, 28 Mar 2024 10:48:59 -0400
Subject: [PATCH 09/64] Revert "add regression tests"

This reverts commit beb665a7109bdb627aa66ee277fd87edc195356d.
---
 xarray/tests/test_coordinates.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py
index aed2f37700e..40743194ce6 100644
--- a/xarray/tests/test_coordinates.py
+++ b/xarray/tests/test_coordinates.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import numpy as np
 import pandas as pd
 import pytest
 
@@ -9,7 +8,7 @@
 from xarray.core.dataarray import DataArray
 from xarray.core.dataset import Dataset
 from xarray.core.indexes import PandasIndex, PandasMultiIndex
-from xarray.core.variable import IndexVariable, Variable
+from xarray.core.variable import IndexVariable
 from xarray.tests import assert_identical, source_ndarray
 
 
@@ -70,12 +69,6 @@ def test_init_dim_sizes_conflict(self) -> None:
         with pytest.raises(ValueError):
             Coordinates(coords={"foo": ("x", [1, 2]), "bar": ("x", [1, 2, 3, 4])})
 
-    def test_init_var_shares_name_with_dim(self) -> None:
-        # regression test for GH #8883
-        var = Variable(data=np.arange(6).reshape(2, 3), dims=["x", "y"])
-        with pytest.raises(ValueError):
-            Coordinates(coords={"x": var}, indexes={})
-
     def test_from_pandas_multiindex(self) -> None:
         midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two"))
         coords = Coordinates.from_pandas_multiindex(midx, "x")

From 35dfb67beb0b0c4de1cf83d7894f0594d5392d50 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Thu, 28 Mar 2024 10:49:53 -0400
Subject: [PATCH 10/64] Revert "fix by performing check"

This reverts commit 22f361dc590a83b2b3660539175a8a7cb1cba051.
---
 xarray/core/coordinates.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index 653e6be0ca2..251edd1fc6f 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -22,11 +22,7 @@
     assert_no_index_corrupted,
     create_default_index_implicit,
 )
-from xarray.core.merge import (
-    assert_valid_explicit_coords,
-    merge_coordinates_without_align,
-    merge_coords,
-)
+from xarray.core.merge import merge_coordinates_without_align, merge_coords
 from xarray.core.types import DataVars, Self, T_DataArray, T_Xarray
 from xarray.core.utils import (
     Frozen,
@@ -310,11 +306,6 @@ def __init__(
                 else:
                     variables[name] = var
 
-            dims = set(d for var in variables.values() for d in var.dims)
-            assert_valid_explicit_coords(
-                variables, dims=dims, explicit_coords=list(variables)
-            )
-
         if indexes is None:
             indexes = {}
         else:

From fd3de2be6b5b18f37fad7470e399e12044b22844 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Thu, 28 Mar 2024 10:50:34 -0400
Subject: [PATCH 11/64] Revert "refactor assert_valid_explicit_coords and
 rename dims->sizes"

This reverts commit 55166fc7e002fa07d7a84f8d7fc460ddaad9674f.
---
 xarray/core/merge.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index b8afdecec6e..cbd06c8fdc5 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -564,7 +564,7 @@ def merge_coords(
 
 def assert_valid_explicit_coords(
     variables: Mapping[Any, Any],
-    dims: Iterable[Any],
+    dims: Mapping[Any, int],
     explicit_coords: Iterable[Hashable],
 ) -> None:
     """Validate explicit coordinate names/dims.
@@ -721,16 +721,16 @@ def merge_core(
         collected, prioritized, compat=compat, combine_attrs=combine_attrs
     )
 
-    sizes = calculate_dimensions(variables)
+    dims = calculate_dimensions(variables)
 
     coord_names, noncoord_names = determine_coords(coerced)
     if compat == "minimal":
         # coordinates may be dropped in merged results
         coord_names.intersection_update(variables)
     if explicit_coords is not None:
-        assert_valid_explicit_coords(variables, sizes.keys(), explicit_coords)
+        assert_valid_explicit_coords(variables, dims, explicit_coords)
         coord_names.update(explicit_coords)
-    for dim, size in sizes.items():
+    for dim, size in dims.items():
         if dim in variables:
             coord_names.add(dim)
     ambiguous_coords = coord_names.intersection(noncoord_names)
@@ -745,7 +745,7 @@ def merge_core(
         combine_attrs,
     )
 
-    return _MergeResult(variables, coord_names, sizes, out_indexes, attrs)
+    return _MergeResult(variables, coord_names, dims, out_indexes, attrs)
 
 
 def merge(

From 21afbb1ee88d44e0193f0ec384dac0c3d3b90f05 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Thu, 28 Mar 2024 16:06:23 -0400
Subject: [PATCH 12/64] fix failing test

---
 xarray/core/concat.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 468c0595b0a..5ef3c01d427 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -652,14 +652,26 @@ def get_indexes(name):
         raise ValueError(
             f"Variables {absent_coord_names!r} are coordinates in some datasets but not others."
         )
+
     coord_vars = {
         name: result_var
         for name, result_var in result_vars.items()
         if name in coord_names
     }
+
+    if index is not None:
+        if dim_var is not None:
+            index_vars = index.create_variables({dim: dim_var})
+        else:
+            index_vars = index.create_variables()
+
+        coord_vars[dim] = index_vars[dim]
+        result_indexes[dim] = index
+        unlabeled_dims = unlabeled_dims - set([dim])
+
+    # TODO: add indexes at Dataset creation (when it is supported)
     coords = Coordinates(coord_vars, indexes=result_indexes)
 
-    # TODO: this is just the complement of the set of coord_vars
     result_data_vars = {
         name: result_var
         for name, result_var in result_vars.items()
@@ -671,14 +683,6 @@ def get_indexes(name):
 
     result = result.drop_vars(unlabeled_dims, errors="ignore")
 
-    if index is not None:
-        # add concat index / coordinate last to ensure that its in the final Dataset
-        if dim_var is not None:
-            index_vars = index.create_variables({dim: dim_var})
-        else:
-            index_vars = index.create_variables()
-        result[dim] = index_vars[dim]
-
     return result
 
 

From 6e9ead6603de73c5ea6bd8f76d973525bb70b417 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Thu, 28 Mar 2024 16:50:22 -0400
Subject: [PATCH 13/64] possible fix for failing groupby test

---
 xarray/core/concat.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 5ef3c01d427..8cd75502fa3 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -658,6 +658,11 @@ def get_indexes(name):
         for name, result_var in result_vars.items()
         if name in coord_names
     }
+    result_data_vars = {
+        name: result_var
+        for name, result_var in result_vars.items()
+        if name not in coord_names
+    }
 
     if index is not None:
         if dim_var is not None:
@@ -668,16 +673,14 @@ def get_indexes(name):
         coord_vars[dim] = index_vars[dim]
         result_indexes[dim] = index
         unlabeled_dims = unlabeled_dims - set([dim])
+    else:
+        if dim in result_data_vars:
+            coord_vars[dim] = result_data_vars[dim]
+            result_data_vars.pop(dim)
 
     # TODO: add indexes at Dataset creation (when it is supported)
     coords = Coordinates(coord_vars, indexes=result_indexes)
 
-    result_data_vars = {
-        name: result_var
-        for name, result_var in result_vars.items()
-        if name not in coord_names
-    }
-
     result = type(datasets[0])(result_data_vars, coords=coords, attrs=result_attrs)
     result.encoding = result_encoding
 

From 2534712041377b7ef934f0f0f2191c27bf0e99c0 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 29 Mar 2024 11:00:10 -0400
Subject: [PATCH 14/64] Revert "possible fix for failing groupby test"

This reverts commit 6e9ead6603de73c5ea6bd8f76d973525bb70b417.
---
 xarray/core/concat.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 8cd75502fa3..5ef3c01d427 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -658,11 +658,6 @@ def get_indexes(name):
         for name, result_var in result_vars.items()
         if name in coord_names
     }
-    result_data_vars = {
-        name: result_var
-        for name, result_var in result_vars.items()
-        if name not in coord_names
-    }
 
     if index is not None:
         if dim_var is not None:
@@ -673,14 +668,16 @@ def get_indexes(name):
         coord_vars[dim] = index_vars[dim]
         result_indexes[dim] = index
         unlabeled_dims = unlabeled_dims - set([dim])
-    else:
-        if dim in result_data_vars:
-            coord_vars[dim] = result_data_vars[dim]
-            result_data_vars.pop(dim)
 
     # TODO: add indexes at Dataset creation (when it is supported)
     coords = Coordinates(coord_vars, indexes=result_indexes)
 
+    result_data_vars = {
+        name: result_var
+        for name, result_var in result_vars.items()
+        if name not in coord_names
+    }
+
     result = type(datasets[0])(result_data_vars, coords=coords, attrs=result_attrs)
     result.encoding = result_encoding
 

From 3e848ebdcca5cc641be3ff9b1b9791f74208d487 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 19 Apr 2024 11:11:25 -0400
Subject: [PATCH 15/64] test expand_dims doesn't create Index

---
 xarray/tests/test_dataset.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index e2a64964775..020af51a5f8 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3431,6 +3431,23 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
         )
         assert_identical(other_way_expected, other_way)
 
+    def test_expand_dims_creates_indexvariable(self):
+        # data variables should not gain an index ever
+        ds = Dataset({"a": 0})
+        for flag in [True, False]:
+            expanded = ds.expand_dims("x", create_1d_index=flag)
+            expected = Dataset({"a": ("x", [0])})
+            assert_identical(expanded, expected)
+            assert expanded.indexes == {}
+
+        # coordinate variables should gain an index only if create_1d_index is True (the default)
+        ds = Dataset(coords={"x": 0})
+        expanded = ds.expand_dims("x")
+        expected = Dataset({"x": ("x", [0])})
+        assert_identical(expanded, expected)
+        expanded_no_index = ds.expand_dims("x", create_1d_index=False)
+        assert expanded_no_index.indexes == {}
+
     @requires_pandas_version_two
     def test_expand_dims_non_nanosecond_conversion(self) -> None:
         # Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000

From 95d453ccff1d2e2746c1970c0157f2de0b582105 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 19 Apr 2024 11:40:06 -0400
Subject: [PATCH 16/64] add option to not create 1D index in expand_dims

---
 xarray/core/dataset.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 10bf1466156..861356f3d19 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4479,6 +4479,7 @@ def expand_dims(
         self,
         dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis: None | int | Sequence[int] = None,
+        create_1d_index: bool = True,
         **dim_kwargs: Any,
     ) -> Self:
         """Return a new object with an additional axis (or axes) inserted at
@@ -4488,6 +4489,8 @@ def expand_dims(
         If dim is already a scalar coordinate, it will be promoted to a 1D
         coordinate consisting of a single value.
 
+        The 
+        
         Parameters
         ----------
         dim : hashable, sequence of hashable, mapping, or None
@@ -4503,6 +4506,8 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
+        create_1d_index : bool, default is True
+            Whether to create new PandasIndex objects for new 1D coordinate variables.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
             are either the lengths of the new dims (if int is given), or their
@@ -4622,6 +4627,8 @@ def expand_dims(
                 # save the coordinates to the variables dict, and set the
                 # value within the dim dict to the length of the iterable
                 # for later use.
+
+                # TODO should we have an option to not create a variable here?
                 index = PandasIndex(v, k)
                 indexes[k] = index
                 variables.update(index.create_variables())
@@ -4660,11 +4667,16 @@ def expand_dims(
                     variables[k] = v.set_dims(dict(all_dims))
             else:
                 if k not in variables:
-                    # If dims includes a label of a non-dimension coordinate,
-                    # it will be promoted to a 1D coordinate with a single value.
-                    index, index_vars = create_default_index_implicit(v.set_dims(k))
-                    indexes[k] = index
-                    variables.update(index_vars)
+                    if create_1d_index:
+                        # If dims includes a label of a non-dimension coordinate,
+                        # it will be promoted to a 1D coordinate with a single value.
+                        index, index_vars = create_default_index_implicit(v.set_dims(k))
+                        indexes[k] = index
+                        variables.update(index_vars)
+                    else:
+                        # create 1D variable without creating a new index
+                        new_1d_var = v.set_dims(k)
+                        variables.update({k: new_1d_var})
 
         return self._replace_with_new_dims(
             variables, coord_names=coord_names, indexes=indexes

From ba5627eebf7b580d0a0b9a171f1f94d7412662e3 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 19 Apr 2024 22:47:07 -0400
Subject: [PATCH 17/64] refactor tests to consider data variables and
 coordinate variables separately

---
 xarray/tests/test_dataset.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 020af51a5f8..8c76e1ffd24 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3431,21 +3431,34 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
         )
         assert_identical(other_way_expected, other_way)
 
-    def test_expand_dims_creates_indexvariable(self):
+    @pytest.mark.parametrize("create_1d_index_flag", [True, False])
+    def test_expand_dims_create_index_data_variable(self, create_1d_index_flag):
         # data variables should not gain an index ever
-        ds = Dataset({"a": 0})
-        for flag in [True, False]:
-            expanded = ds.expand_dims("x", create_1d_index=flag)
-            expected = Dataset({"a": ("x", [0])})
-            assert_identical(expanded, expected)
-            assert expanded.indexes == {}
+        ds = Dataset({"x": 0})
+        expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag)
+
+        # TODO I can't just create the expected dataset directly using constructor because of GH issue 8959
+        # expected = Dataset(data_vars={"x": ("x", [0])})
+        expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
+
+        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
+        # assert_identical(expanded, expected)
+        assert expected.data_vars == {"x": Variable(data=[0], dims=["x"])}
+        assert expanded.indexes == {}
 
+    def test_expand_dims_create_index_coordinate_variable(self):
         # coordinate variables should gain an index only if create_1d_index is True (the default)
         ds = Dataset(coords={"x": 0})
         expanded = ds.expand_dims("x")
         expected = Dataset({"x": ("x", [0])})
         assert_identical(expanded, expected)
+
         expanded_no_index = ds.expand_dims("x", create_1d_index=False)
+        expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")
+
+        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
+        # assert_identical(expanded, expected)
+        assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])}
         assert expanded_no_index.indexes == {}
 
     @requires_pandas_version_two

From 3719ba7ca6d66dbb1c7a105907beb13e8d826e37 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 19 Apr 2024 11:11:25 -0400
Subject: [PATCH 18/64] test expand_dims doesn't create Index

---
 xarray/tests/test_dataset.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index a948fafc815..54a7b0a6f87 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3431,6 +3431,23 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
         )
         assert_identical(other_way_expected, other_way)
 
+    def test_expand_dims_creates_indexvariable(self):
+        # data variables should not gain an index ever
+        ds = Dataset({"a": 0})
+        for flag in [True, False]:
+            expanded = ds.expand_dims("x", create_1d_index=flag)
+            expected = Dataset({"a": ("x", [0])})
+            assert_identical(expanded, expected)
+            assert expanded.indexes == {}
+
+        # coordinate variables should gain an index only if create_1d_index is True (the default)
+        ds = Dataset(coords={"x": 0})
+        expanded = ds.expand_dims("x")
+        expected = Dataset({"x": ("x", [0])})
+        assert_identical(expanded, expected)
+        expanded_no_index = ds.expand_dims("x", create_1d_index=False)
+        assert expanded_no_index.indexes == {}
+
     @requires_pandas_version_two
     def test_expand_dims_non_nanosecond_conversion(self) -> None:
         # Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000

From 018e74bb1c94141cd86502bfa2ac964a44f28860 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 19 Apr 2024 11:40:06 -0400
Subject: [PATCH 19/64] add option to not create 1D index in expand_dims

---
 xarray/core/dataset.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 96f3be00995..233b7833785 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4497,6 +4497,7 @@ def expand_dims(
         self,
         dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis: None | int | Sequence[int] = None,
+        create_1d_index: bool = True,
         **dim_kwargs: Any,
     ) -> Self:
         """Return a new object with an additional axis (or axes) inserted at
@@ -4506,6 +4507,8 @@ def expand_dims(
         If dim is already a scalar coordinate, it will be promoted to a 1D
         coordinate consisting of a single value.
 
+        The 
+        
         Parameters
         ----------
         dim : hashable, sequence of hashable, mapping, or None
@@ -4521,6 +4524,8 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
+        create_1d_index : bool, default is True
+            Whether to create new PandasIndex objects for new 1D coordinate variables.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
             are either the lengths of the new dims (if int is given), or their
@@ -4640,6 +4645,8 @@ def expand_dims(
                 # save the coordinates to the variables dict, and set the
                 # value within the dim dict to the length of the iterable
                 # for later use.
+
+                # TODO should we have an option to not create a variable here?
                 index = PandasIndex(v, k)
                 indexes[k] = index
                 variables.update(index.create_variables())
@@ -4678,11 +4685,16 @@ def expand_dims(
                     variables[k] = v.set_dims(dict(all_dims))
             else:
                 if k not in variables:
-                    # If dims includes a label of a non-dimension coordinate,
-                    # it will be promoted to a 1D coordinate with a single value.
-                    index, index_vars = create_default_index_implicit(v.set_dims(k))
-                    indexes[k] = index
-                    variables.update(index_vars)
+                    if create_1d_index:
+                        # If dims includes a label of a non-dimension coordinate,
+                        # it will be promoted to a 1D coordinate with a single value.
+                        index, index_vars = create_default_index_implicit(v.set_dims(k))
+                        indexes[k] = index
+                        variables.update(index_vars)
+                    else:
+                        # create 1D variable without creating a new index
+                        new_1d_var = v.set_dims(k)
+                        variables.update({k: new_1d_var})
 
         return self._replace_with_new_dims(
             variables, coord_names=coord_names, indexes=indexes

From f680505d3dd630bc8fa0a200e5d9efde7cf8ada9 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 19 Apr 2024 22:47:07 -0400
Subject: [PATCH 20/64] refactor tests to consider data variables and
 coordinate variables separately

---
 xarray/tests/test_dataset.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 54a7b0a6f87..74c4af1790d 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3431,21 +3431,34 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
         )
         assert_identical(other_way_expected, other_way)
 
-    def test_expand_dims_creates_indexvariable(self):
+    @pytest.mark.parametrize("create_1d_index_flag", [True, False])
+    def test_expand_dims_create_index_data_variable(self, create_1d_index_flag):
         # data variables should not gain an index ever
-        ds = Dataset({"a": 0})
-        for flag in [True, False]:
-            expanded = ds.expand_dims("x", create_1d_index=flag)
-            expected = Dataset({"a": ("x", [0])})
-            assert_identical(expanded, expected)
-            assert expanded.indexes == {}
+        ds = Dataset({"x": 0})
+        expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag)
+
+        # TODO I can't just create the expected dataset directly using constructor because of GH issue 8959
+        # expected = Dataset(data_vars={"x": ("x", [0])})
+        expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
+
+        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
+        # assert_identical(expanded, expected)
+        assert expected.data_vars == {"x": Variable(data=[0], dims=["x"])}
+        assert expanded.indexes == {}
 
+    def test_expand_dims_create_index_coordinate_variable(self):
         # coordinate variables should gain an index only if create_1d_index is True (the default)
         ds = Dataset(coords={"x": 0})
         expanded = ds.expand_dims("x")
         expected = Dataset({"x": ("x", [0])})
         assert_identical(expanded, expected)
+
         expanded_no_index = ds.expand_dims("x", create_1d_index=False)
+        expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")
+
+        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
+        # assert_identical(expanded, expected)
+        assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])}
         assert expanded_no_index.indexes == {}
 
     @requires_pandas_version_two

From f10509aa983a4baae66163030b26827ef6c11344 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 19 Apr 2024 23:22:11 -0400
Subject: [PATCH 21/64] fix bug causing new test to fail

---
 xarray/core/dataset.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 233b7833785..9752afbd749 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4507,8 +4507,9 @@ def expand_dims(
         If dim is already a scalar coordinate, it will be promoted to a 1D
         coordinate consisting of a single value.
 
-        The 
-        
+        The automatic creation of indexes to back new 1D coordinate variables
+        controlled by the create_1d_index kwarg.
+
         Parameters
         ----------
         dim : hashable, sequence of hashable, mapping, or None
@@ -4525,7 +4526,7 @@ def expand_dims(
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
         create_1d_index : bool, default is True
-            Whether to create new PandasIndex objects for new 1D coordinate variables.
+            Whether to create new PandasIndex objects for any new 1D coordinate variables.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
             are either the lengths of the new dims (if int is given), or their
@@ -4685,7 +4686,7 @@ def expand_dims(
                     variables[k] = v.set_dims(dict(all_dims))
             else:
                 if k not in variables:
-                    if create_1d_index:
+                    if k in coord_names and create_1d_index:
                         # If dims includes a label of a non-dimension coordinate,
                         # it will be promoted to a 1D coordinate with a single value.
                         index, index_vars = create_default_index_implicit(v.set_dims(k))

From 8152c0a40c4386eb01cd1974769527ae5c85a570 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 19 Apr 2024 23:51:28 -0400
Subject: [PATCH 22/64] test index auto-creation when iterable passed as new
 coordinate values

---
 xarray/tests/test_dataset.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 74c4af1790d..feee69bf49a 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -11,6 +11,7 @@
 from typing import Any, Literal
 
 import numpy as np
+import numpy.testing as npt
 import pandas as pd
 import pytest
 from pandas.core.indexes.datetimes import DatetimeIndex
@@ -3461,6 +3462,22 @@ def test_expand_dims_create_index_coordinate_variable(self):
         assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])}
         assert expanded_no_index.indexes == {}
 
+    def test_expand_dims_create_index_from_iterable(self):
+        ds = Dataset(coords={"x": 0})
+        expanded = ds.expand_dims(x=[0, 1])
+        expected = Dataset({"x": ("x", [0, 1])})
+        assert_identical(expanded, expected)
+
+        expanded_no_index = ds.expand_dims(x=[0, 1], create_1d_index=False)
+        expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x")
+
+        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
+        # assert_identical(expanded, expected)
+        assert list(expanded_no_index.coords) == ["x"]
+        assert isinstance(expanded_no_index.coords["x"].variable, Variable)
+        npt.assert_array_equal(expanded_no_index.coords["x"].data, np.array([0, 1]))
+        assert expanded_no_index.indexes == {}
+
     @requires_pandas_version_two
     def test_expand_dims_non_nanosecond_conversion(self) -> None:
         # Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000

From aa813cff0d6011af81cb4a15c87e9857f35c3d26 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 00:06:27 -0400
Subject: [PATCH 23/64] make test for iterable pass

---
 xarray/core/dataset.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 9752afbd749..25cf8263bf5 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4647,10 +4647,13 @@ def expand_dims(
                 # value within the dim dict to the length of the iterable
                 # for later use.
 
-                # TODO should we have an option to not create a variable here?
-                index = PandasIndex(v, k)
-                indexes[k] = index
-                variables.update(index.create_variables())
+                if create_1d_index:
+                    index = PandasIndex(v, k)
+                    indexes[k] = index
+                    name_and_new_1d_var = index.create_variables()
+                else:
+                    name_and_new_1d_var = {k: Variable(data=v, dims=k)}
+                variables.update(name_and_new_1d_var)
                 coord_names.add(k)
                 dim[k] = variables[k].size
             elif isinstance(v, int):

From e78de7d5e93649114d9aaa916bf0f4185882be9c Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 00:36:37 -0400
Subject: [PATCH 24/64] added kwarg to dataarray

---
 xarray/core/dataarray.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 509962ff80d..818275dedd3 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -2557,6 +2557,7 @@ def expand_dims(
         self,
         dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis: None | int | Sequence[int] = None,
+        create_1d_index: bool = True,
         **dim_kwargs: Any,
     ) -> Self:
         """Return a new object with an additional axis (or axes) inserted at
@@ -2566,6 +2567,9 @@ def expand_dims(
         If dim is already a scalar coordinate, it will be promoted to a 1D
         coordinate consisting of a single value.
 
+        The automatic creation of indexes to back new 1D coordinate variables
+        controlled by the create_1d_index kwarg.
+
         Parameters
         ----------
         dim : Hashable, sequence of Hashable, dict, or None, optional
@@ -2581,6 +2585,8 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
+        create_1d_index : bool, default is True
+            Whether to create new PandasIndex objects for any new 1D coordinate variables.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
             are either the lengths of the new dims (if int is given), or their
@@ -2644,7 +2650,9 @@ def expand_dims(
             dim = {dim: 1}
 
         dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims")
-        ds = self._to_temp_dataset().expand_dims(dim, axis)
+        ds = self._to_temp_dataset().expand_dims(
+            dim, axis, create_1d_index=create_1d_index
+        )
         return self._from_temp_dataset(ds)
 
     def set_index(

From b1329cc08c2b08cf032c6a6ddb4da8ff99bbbee7 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 01:22:51 -0400
Subject: [PATCH 25/64] whatsnew

---
 doc/whats-new.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 2332f7f236b..ffa6f562e41 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -29,6 +29,9 @@ New Features
   for example, will retain the object.  However, one cannot do operations that are not possible on the `ExtensionArray`
   then, such as broadcasting.
   By `Ilan Gold <https://github.com/ilan-gold>`_.
+- Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg
+  `create_1d_index=False`. (:pull:`8960`) 
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

From a9f7e0c7c177f16371819963f16388537fead51d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 20 Apr 2024 05:23:22 +0000
Subject: [PATCH 26/64] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index ffa6f562e41..7ecbc8fc3e6 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -30,7 +30,7 @@ New Features
   then, such as broadcasting.
   By `Ilan Gold <https://github.com/ilan-gold>`_.
 - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg
-  `create_1d_index=False`. (:pull:`8960`) 
+  `create_1d_index=False`. (:pull:`8960`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes

From 2ce3decebb0248ff9b3fe92de38f14a293ef08b7 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 01:32:05 -0400
Subject: [PATCH 27/64] Revert "refactor tests to consider data variables and
 coordinate variables separately"

This reverts commit ba5627eebf7b580d0a0b9a171f1f94d7412662e3.
---
 xarray/tests/test_dataset.py | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 8c76e1ffd24..020af51a5f8 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3431,34 +3431,21 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
         )
         assert_identical(other_way_expected, other_way)
 
-    @pytest.mark.parametrize("create_1d_index_flag", [True, False])
-    def test_expand_dims_create_index_data_variable(self, create_1d_index_flag):
+    def test_expand_dims_creates_indexvariable(self):
         # data variables should not gain an index ever
-        ds = Dataset({"x": 0})
-        expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag)
-
-        # TODO I can't just create the expected dataset directly using constructor because of GH issue 8959
-        # expected = Dataset(data_vars={"x": ("x", [0])})
-        expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
-
-        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
-        # assert_identical(expanded, expected)
-        assert expected.data_vars == {"x": Variable(data=[0], dims=["x"])}
-        assert expanded.indexes == {}
+        ds = Dataset({"a": 0})
+        for flag in [True, False]:
+            expanded = ds.expand_dims("x", create_1d_index=flag)
+            expected = Dataset({"a": ("x", [0])})
+            assert_identical(expanded, expected)
+            assert expanded.indexes == {}
 
-    def test_expand_dims_create_index_coordinate_variable(self):
         # coordinate variables should gain an index only if create_1d_index is True (the default)
         ds = Dataset(coords={"x": 0})
         expanded = ds.expand_dims("x")
         expected = Dataset({"x": ("x", [0])})
         assert_identical(expanded, expected)
-
         expanded_no_index = ds.expand_dims("x", create_1d_index=False)
-        expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")
-
-        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
-        # assert_identical(expanded, expected)
-        assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])}
         assert expanded_no_index.indexes == {}
 
     @requires_pandas_version_two

From 87a08b4c36ebd0b4cc5c613d65685e00d1030f11 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 01:32:06 -0400
Subject: [PATCH 28/64] Revert "add option to not create 1D index in
 expand_dims"

This reverts commit 95d453ccff1d2e2746c1970c0157f2de0b582105.
---
 xarray/core/dataset.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 861356f3d19..10bf1466156 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4479,7 +4479,6 @@ def expand_dims(
         self,
         dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis: None | int | Sequence[int] = None,
-        create_1d_index: bool = True,
         **dim_kwargs: Any,
     ) -> Self:
         """Return a new object with an additional axis (or axes) inserted at
@@ -4489,8 +4488,6 @@ def expand_dims(
         If dim is already a scalar coordinate, it will be promoted to a 1D
         coordinate consisting of a single value.
 
-        The 
-        
         Parameters
         ----------
         dim : hashable, sequence of hashable, mapping, or None
@@ -4506,8 +4503,6 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
-        create_1d_index : bool, default is True
-            Whether to create new PandasIndex objects for new 1D coordinate variables.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
             are either the lengths of the new dims (if int is given), or their
@@ -4627,8 +4622,6 @@ def expand_dims(
                 # save the coordinates to the variables dict, and set the
                 # value within the dim dict to the length of the iterable
                 # for later use.
-
-                # TODO should we have an option to not create a variable here?
                 index = PandasIndex(v, k)
                 indexes[k] = index
                 variables.update(index.create_variables())
@@ -4667,16 +4660,11 @@ def expand_dims(
                     variables[k] = v.set_dims(dict(all_dims))
             else:
                 if k not in variables:
-                    if create_1d_index:
-                        # If dims includes a label of a non-dimension coordinate,
-                        # it will be promoted to a 1D coordinate with a single value.
-                        index, index_vars = create_default_index_implicit(v.set_dims(k))
-                        indexes[k] = index
-                        variables.update(index_vars)
-                    else:
-                        # create 1D variable without creating a new index
-                        new_1d_var = v.set_dims(k)
-                        variables.update({k: new_1d_var})
+                    # If dims includes a label of a non-dimension coordinate,
+                    # it will be promoted to a 1D coordinate with a single value.
+                    index, index_vars = create_default_index_implicit(v.set_dims(k))
+                    indexes[k] = index
+                    variables.update(index_vars)
 
         return self._replace_with_new_dims(
             variables, coord_names=coord_names, indexes=indexes

From 214ed7d22e9a1c06331c9d5be61c9dbee83ee467 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 02:34:54 -0400
Subject: [PATCH 29/64] test that concat doesn't raise if create_1d_index=False

---
 xarray/tests/test_concat.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index 7295941568e..87b7c5093bf 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -14,6 +14,7 @@
 from xarray.tests import (
     ConcatenatableArray,
     InaccessibleArray,
+    UnexpectedDataAccess,
     assert_array_equal,
     assert_equal,
     assert_identical,
@@ -1028,6 +1029,22 @@ def test_concat_avoids_index_auto_creation(self) -> None:
         # nor have auto-created any indexes
         assert combined.indexes == {}
 
+    def test_concat_avoids_index_auto_creation_new_1d_coord(self) -> None:
+        # create 0D coordinates (without indexes)
+        datasets = [
+            Dataset(
+                coords={"x": ConcatenatableArray(np.array(0))},
+            )
+            for _ in range(2)
+        ]
+        
+        # should not raise on concat iff create_1d_index=False
+        combined = concat(datasets, dim="x", create_1d_index=False)
+        assert combined["x"].shape == (2,)
+        assert combined["x"].dims == ("x",)
+
+        # nor have auto-created any indexes
+        assert combined.indexes == {}
 
 class TestConcatDataArray:
     def test_concat(self) -> None:

From 78d279830c79b7a8ddfb8de09666ed242b4b6292 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 02:40:56 -0400
Subject: [PATCH 30/64] make test pass by passing create_1d_index down through
 concat

---
 xarray/core/concat.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 5ef3c01d427..bc41ee9327d 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -70,6 +70,7 @@ def concat(
     fill_value=dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
+    create_1d_index: bool = True,
 ):
     """Concatenate xarray objects along a new or existing dimension.
 
@@ -163,6 +164,8 @@ def concat(
 
         If a callable, it must expect a sequence of ``attrs`` dicts and a context object
         as its only parameters.
+    create_1d_index : bool, default is True
+        Whether to create new PandasIndex objects for any new 1D coordinate variables.
 
     Returns
     -------
@@ -246,6 +249,7 @@ def concat(
             fill_value=fill_value,
             join=join,
             combine_attrs=combine_attrs,
+            create_1d_index=create_1d_index,
         )
     elif isinstance(first_obj, Dataset):
         return _dataset_concat(
@@ -258,6 +262,7 @@ def concat(
             fill_value=fill_value,
             join=join,
             combine_attrs=combine_attrs,
+            create_1d_index=create_1d_index,
         )
     else:
         raise TypeError(
@@ -457,6 +462,7 @@ def _dataset_concat(
     fill_value: Any = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
+    create_1d_index: bool = True,
 ) -> T_Dataset:
     """
     Concatenate a sequence of datasets along a new or existing dimension
@@ -503,7 +509,9 @@ def _dataset_concat(
 
     # case where concat dimension is a coordinate or data_var but not a dimension
     if (dim in coord_names or dim in data_names) and dim not in dim_names:
-        datasets = [ds.expand_dims(dim) for ds in datasets]
+        datasets = [
+            ds.expand_dims(dim, create_1d_index=create_1d_index) for ds in datasets
+        ]
 
     # determine which variables to concatenate
     concat_over, equals, concat_dim_lengths = _calc_concat_over(
@@ -696,6 +704,7 @@ def _dataarray_concat(
     fill_value: object = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
+    create_1d_index: bool = True,
 ) -> T_DataArray:
     from xarray.core.dataarray import DataArray
 
@@ -732,6 +741,7 @@ def _dataarray_concat(
         fill_value=fill_value,
         join=join,
         combine_attrs=combine_attrs,
+        create_1d_index=create_1d_index,
     )
 
     merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs)

From fc206b093a9951bb167621561d6342ef18b69adf Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 03:23:48 -0400
Subject: [PATCH 31/64] assert that an UnexpectedDataAccess error is raised
 when create_1d_index=True

---
 xarray/tests/test_concat.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index 87b7c5093bf..dad6e9e965f 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -1033,11 +1033,16 @@ def test_concat_avoids_index_auto_creation_new_1d_coord(self) -> None:
         # create 0D coordinates (without indexes)
         datasets = [
             Dataset(
-                coords={"x": ConcatenatableArray(np.array(0))},
+                coords={"x": ConcatenatableArray(np.array(10))},
             )
             for _ in range(2)
         ]
-        
+
+        # TODO this should raise an error, but it doesn't
+        # It's somehow cheating it's way around the UnexpectedDataAccess error that should be raised
+        with pytest.raises(UnexpectedDataAccess):
+            combined = concat(datasets, dim="x", create_1d_index=True)
+
         # should not raise on concat iff create_1d_index=False
         combined = concat(datasets, dim="x", create_1d_index=False)
         assert combined["x"].shape == (2,)
@@ -1046,6 +1051,7 @@ def test_concat_avoids_index_auto_creation_new_1d_coord(self) -> None:
         # nor have auto-created any indexes
         assert combined.indexes == {}
 
+
 class TestConcatDataArray:
     def test_concat(self) -> None:
         ds = Dataset(

From ce797f126da5edd718f6b2a3b1b43b79c257c95e Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Sat, 20 Apr 2024 03:25:20 -0400
Subject: [PATCH 32/64] eliminate possibility of xarray internals bypassing
 UnexpectedDataAccess error by accessing .array

---
 xarray/tests/__init__.py | 42 +++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 8ce2fb44174..e336252d4e1 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -234,7 +234,7 @@ def concatenate(
     if any(not isinstance(arr, ConcatenatableArray) for arr in arrays):
         raise TypeError
 
-    result = np.concatenate([arr.array for arr in arrays], axis=axis)
+    result = np.concatenate([arr._array for arr in arrays], axis=axis)
     return ConcatenatableArray(result)
 
 
@@ -243,7 +243,7 @@ def stack(arrays: Iterable[ConcatenatableArray], /, *, axis=0) -> Concatenatable
     if any(not isinstance(arr, ConcatenatableArray) for arr in arrays):
         raise TypeError
 
-    result = np.stack([arr.array for arr in arrays], axis=axis)
+    result = np.stack([arr._array for arr in arrays], axis=axis)
     return ConcatenatableArray(result)
 
 
@@ -267,19 +267,33 @@ def broadcast_to(
     if not isinstance(x, ConcatenatableArray):
         raise TypeError
 
-    result = np.broadcast_to(x.array, shape=shape)
+    result = np.broadcast_to(x._array, shape=shape)
     return ConcatenatableArray(result)
 
 
-class ConcatenatableArray(utils.NDArrayMixin):
+class ConcatenatableArray:
     """Disallows loading or coercing to an index but does support concatenation / stacking."""
 
-    # TODO only reason this is different from InaccessibleArray is to avoid it being a subclass of ExplicitlyIndexed
-
     HANDLED_ARRAY_FUNCTIONS = [concatenate, stack, result_type]
 
     def __init__(self, array):
-        self.array = array
+        # use ._array instead of .array because we don't want this to be accessible even to xarray's internals (e.g. create_default_index_implicit)
+        self._array = array
+
+    @property
+    def dtype(self: Any) -> np.dtype:
+        return self._array.dtype
+
+    @property
+    def shape(self: Any) -> tuple[int, ...]:
+        return self._array.shape
+
+    @property
+    def ndim(self: Any) -> int:
+        return self._array.ndim
+
+    def __repr__(self: Any) -> str:
+        return f"{type(self).__name__}(array={self._array!r})"
 
     def get_duck_array(self):
         raise UnexpectedDataAccess("Tried accessing data")
@@ -287,8 +301,18 @@ def get_duck_array(self):
     def __array__(self, dtype: np.typing.DTypeLike = None):
         raise UnexpectedDataAccess("Tried accessing data")
 
-    def __getitem__(self, key):
-        raise UnexpectedDataAccess("Tried accessing data.")
+    def __getitem__(self, key) -> ConcatenatableArray:
+        """Some cases of concat require supporting expanding dims by dimensions of size 1"""
+        # see https://data-apis.org/array-api/2022.12/API_specification/indexing.html#multi-axis-indexing
+        arr = self._array
+        for axis, indexer_1d in enumerate(key):
+            if indexer_1d is None:
+                arr = np.expand_dims(arr, axis)
+            elif indexer_1d is Ellipsis:
+                pass
+            else:
+                raise UnexpectedDataAccess("Tried accessing data.")
+        return arr
 
     def __array_function__(self, func, types, args, kwargs) -> Any:
         if func not in HANDLED_ARRAY_FUNCTIONS:

From 62e750f46c55209b904cc14ece174fa58cce546a Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 11:31:59 -0400
Subject: [PATCH 33/64] update tests to use private versions of assertions

---
 xarray/tests/test_dataset.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index feee69bf49a..4b1e2141750 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -11,7 +11,6 @@
 from typing import Any, Literal
 
 import numpy as np
-import numpy.testing as npt
 import pandas as pd
 import pytest
 from pandas.core.indexes.datetimes import DatetimeIndex
@@ -3438,13 +3437,10 @@ def test_expand_dims_create_index_data_variable(self, create_1d_index_flag):
         ds = Dataset({"x": 0})
         expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag)
 
-        # TODO I can't just create the expected dataset directly using constructor because of GH issue 8959
-        # expected = Dataset(data_vars={"x": ("x", [0])})
+        # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
 
-        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
-        # assert_identical(expanded, expected)
-        assert expected.data_vars == {"x": Variable(data=[0], dims=["x"])}
+        assert_identical(expanded, expected, check_default_indexes=False)
         assert expanded.indexes == {}
 
     def test_expand_dims_create_index_coordinate_variable(self):
@@ -3455,11 +3451,11 @@ def test_expand_dims_create_index_coordinate_variable(self):
         assert_identical(expanded, expected)
 
         expanded_no_index = ds.expand_dims("x", create_1d_index=False)
+
+        # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")
 
-        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
-        # assert_identical(expanded, expected)
-        assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])}
+        assert_identical(expanded_no_index, expected, check_default_indexes=False)
         assert expanded_no_index.indexes == {}
 
     def test_expand_dims_create_index_from_iterable(self):
@@ -3469,13 +3465,11 @@ def test_expand_dims_create_index_from_iterable(self):
         assert_identical(expanded, expected)
 
         expanded_no_index = ds.expand_dims(x=[0, 1], create_1d_index=False)
+
+        # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x")
 
-        # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
-        # assert_identical(expanded, expected)
-        assert list(expanded_no_index.coords) == ["x"]
-        assert isinstance(expanded_no_index.coords["x"].variable, Variable)
-        npt.assert_array_equal(expanded_no_index.coords["x"].data, np.array([0, 1]))
+        assert_identical(expanded, expected, check_default_indexes=False)
         assert expanded_no_index.indexes == {}
 
     @requires_pandas_version_two

From f86c82f1c9efdf0b207f1fb537aea2cefe287c82 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 11:34:37 -0400
Subject: [PATCH 34/64] create_1d_index->create_index

---
 xarray/core/dataarray.py     | 10 ++++------
 xarray/core/dataset.py       | 10 +++++-----
 xarray/tests/test_dataset.py | 12 ++++++------
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 818275dedd3..41c9af1bb10 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -2557,7 +2557,7 @@ def expand_dims(
         self,
         dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis: None | int | Sequence[int] = None,
-        create_1d_index: bool = True,
+        create_index: bool = True,
         **dim_kwargs: Any,
     ) -> Self:
         """Return a new object with an additional axis (or axes) inserted at
@@ -2568,7 +2568,7 @@ def expand_dims(
         coordinate consisting of a single value.
 
         The automatic creation of indexes to back new 1D coordinate variables
-        controlled by the create_1d_index kwarg.
+        controlled by the create_index kwarg.
 
         Parameters
         ----------
@@ -2585,7 +2585,7 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
-        create_1d_index : bool, default is True
+        create_index : bool, default is True
             Whether to create new PandasIndex objects for any new 1D coordinate variables.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
@@ -2650,9 +2650,7 @@ def expand_dims(
             dim = {dim: 1}
 
         dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims")
-        ds = self._to_temp_dataset().expand_dims(
-            dim, axis, create_1d_index=create_1d_index
-        )
+        ds = self._to_temp_dataset().expand_dims(dim, axis, create_index=create_index)
         return self._from_temp_dataset(ds)
 
     def set_index(
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 25cf8263bf5..a2bc3505366 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4497,7 +4497,7 @@ def expand_dims(
         self,
         dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis: None | int | Sequence[int] = None,
-        create_1d_index: bool = True,
+        create_index: bool = True,
         **dim_kwargs: Any,
     ) -> Self:
         """Return a new object with an additional axis (or axes) inserted at
@@ -4508,7 +4508,7 @@ def expand_dims(
         coordinate consisting of a single value.
 
         The automatic creation of indexes to back new 1D coordinate variables
-        controlled by the create_1d_index kwarg.
+        controlled by the create_index kwarg.
 
         Parameters
         ----------
@@ -4525,7 +4525,7 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
-        create_1d_index : bool, default is True
+        create_index : bool, default is True
             Whether to create new PandasIndex objects for any new 1D coordinate variables.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
@@ -4647,7 +4647,7 @@ def expand_dims(
                 # value within the dim dict to the length of the iterable
                 # for later use.
 
-                if create_1d_index:
+                if create_index:
                     index = PandasIndex(v, k)
                     indexes[k] = index
                     name_and_new_1d_var = index.create_variables()
@@ -4689,7 +4689,7 @@ def expand_dims(
                     variables[k] = v.set_dims(dict(all_dims))
             else:
                 if k not in variables:
-                    if k in coord_names and create_1d_index:
+                    if k in coord_names and create_index:
                         # If dims includes a label of a non-dimension coordinate,
                         # it will be promoted to a 1D coordinate with a single value.
                         index, index_vars = create_default_index_implicit(v.set_dims(k))
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 4b1e2141750..e4251ced7a4 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3431,11 +3431,11 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
         )
         assert_identical(other_way_expected, other_way)
 
-    @pytest.mark.parametrize("create_1d_index_flag", [True, False])
-    def test_expand_dims_create_index_data_variable(self, create_1d_index_flag):
+    @pytest.mark.parametrize("create_index_flag", [True, False])
+    def test_expand_dims_create_index_data_variable(self, create_index_flag):
         # data variables should not gain an index ever
         ds = Dataset({"x": 0})
-        expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag)
+        expanded = ds.expand_dims("x", create_index=create_index_flag)
 
         # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
@@ -3444,13 +3444,13 @@ def test_expand_dims_create_index_data_variable(self, create_1d_index_flag):
         assert expanded.indexes == {}
 
     def test_expand_dims_create_index_coordinate_variable(self):
-        # coordinate variables should gain an index only if create_1d_index is True (the default)
+        # coordinate variables should gain an index only if create_index is True (the default)
         ds = Dataset(coords={"x": 0})
         expanded = ds.expand_dims("x")
         expected = Dataset({"x": ("x", [0])})
         assert_identical(expanded, expected)
 
-        expanded_no_index = ds.expand_dims("x", create_1d_index=False)
+        expanded_no_index = ds.expand_dims("x", create_index=False)
 
         # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")
@@ -3464,7 +3464,7 @@ def test_expand_dims_create_index_from_iterable(self):
         expected = Dataset({"x": ("x", [0, 1])})
         assert_identical(expanded, expected)
 
-        expanded_no_index = ds.expand_dims(x=[0, 1], create_1d_index=False)
+        expanded_no_index = ds.expand_dims(x=[0, 1], create_index=False)
 
         # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x")

From d5d90fdd27673537d47d7acae883571e0604ee91 Mon Sep 17 00:00:00 2001
From: Tom Nicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 09:38:47 -0600
Subject: [PATCH 35/64] Update doc/whats-new.rst

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index ce05e41243a..2231f0a7d4c 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -30,7 +30,7 @@ New Features
   then, such as broadcasting.
   By `Ilan Gold <https://github.com/ilan-gold>`_.
 - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg
-  `create_1d_index=False`. (:pull:`8960`)
+  `create_index=False`. (:pull:`8960`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes

From 5bb88b81d7541ee581e3eff192bfdfb8502cc97e Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 13:19:28 -0400
Subject: [PATCH 36/64] Rename create_1d_index -> create_index

---
 xarray/core/concat.py       | 16 ++++++++--------
 xarray/tests/test_concat.py |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index bc41ee9327d..b05a524ff01 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -70,7 +70,7 @@ def concat(
     fill_value=dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
-    create_1d_index: bool = True,
+    create_index: bool = True,
 ):
     """Concatenate xarray objects along a new or existing dimension.
 
@@ -164,7 +164,7 @@ def concat(
 
         If a callable, it must expect a sequence of ``attrs`` dicts and a context object
         as its only parameters.
-    create_1d_index : bool, default is True
+    create_index : bool, default is True
         Whether to create new PandasIndex objects for any new 1D coordinate variables.
 
     Returns
@@ -249,7 +249,7 @@ def concat(
             fill_value=fill_value,
             join=join,
             combine_attrs=combine_attrs,
-            create_1d_index=create_1d_index,
+            create_index=create_index,
         )
     elif isinstance(first_obj, Dataset):
         return _dataset_concat(
@@ -262,7 +262,7 @@ def concat(
             fill_value=fill_value,
             join=join,
             combine_attrs=combine_attrs,
-            create_1d_index=create_1d_index,
+            create_index=create_index,
         )
     else:
         raise TypeError(
@@ -462,7 +462,7 @@ def _dataset_concat(
     fill_value: Any = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
-    create_1d_index: bool = True,
+    create_index: bool = True,
 ) -> T_Dataset:
     """
     Concatenate a sequence of datasets along a new or existing dimension
@@ -510,7 +510,7 @@ def _dataset_concat(
     # case where concat dimension is a coordinate or data_var but not a dimension
     if (dim in coord_names or dim in data_names) and dim not in dim_names:
         datasets = [
-            ds.expand_dims(dim, create_1d_index=create_1d_index) for ds in datasets
+            ds.expand_dims(dim, create_index=create_index) for ds in datasets
         ]
 
     # determine which variables to concatenate
@@ -704,7 +704,7 @@ def _dataarray_concat(
     fill_value: object = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
-    create_1d_index: bool = True,
+    create_index: bool = True,
 ) -> T_DataArray:
     from xarray.core.dataarray import DataArray
 
@@ -741,7 +741,7 @@ def _dataarray_concat(
         fill_value=fill_value,
         join=join,
         combine_attrs=combine_attrs,
-        create_1d_index=create_1d_index,
+        create_index=create_index,
     )
 
     merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs)
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index dad6e9e965f..e4af4f29358 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -1041,10 +1041,10 @@ def test_concat_avoids_index_auto_creation_new_1d_coord(self) -> None:
         # TODO this should raise an error, but it doesn't
         # It's somehow cheating it's way around the UnexpectedDataAccess error that should be raised
         with pytest.raises(UnexpectedDataAccess):
-            combined = concat(datasets, dim="x", create_1d_index=True)
+            combined = concat(datasets, dim="x", create_index=True)
 
-        # should not raise on concat iff create_1d_index=False
-        combined = concat(datasets, dim="x", create_1d_index=False)
+        # should not raise on concat iff create_index=False
+        combined = concat(datasets, dim="x", create_index=False)
         assert combined["x"].shape == (2,)
         assert combined["x"].dims == ("x",)
 

From 1d471b1197802a0ff6748c04d657936ad9358c45 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 13:41:17 -0400
Subject: [PATCH 37/64] fix ConcatenatableArray

---
 xarray/tests/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index f6756c3b21f..6a31c753e22 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -311,7 +311,7 @@ def __getitem__(self, key) -> ConcatenatableArray:
                 pass
             else:
                 raise UnexpectedDataAccess("Tried accessing data.")
-        return arr
+        return ConcatenatableArray(arr)
 
     def __array_function__(self, func, types, args, kwargs) -> Any:
         if func not in HANDLED_ARRAY_FUNCTIONS:

From 766605d855443f8c302bc130d0fe47b91db77092 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 13:41:50 -0400
Subject: [PATCH 38/64] formatting

---
 xarray/core/concat.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index b05a524ff01..8cb6218687c 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -509,9 +509,7 @@ def _dataset_concat(
 
     # case where concat dimension is a coordinate or data_var but not a dimension
     if (dim in coord_names or dim in data_names) and dim not in dim_names:
-        datasets = [
-            ds.expand_dims(dim, create_index=create_index) for ds in datasets
-        ]
+        datasets = [ds.expand_dims(dim, create_index=create_index) for ds in datasets]
 
     # determine which variables to concatenate
     concat_over, equals, concat_dim_lengths = _calc_concat_over(

From 10c0ed59c6e64f6adfa53e7bf25fdf3e79f1ce91 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 13:48:18 -0400
Subject: [PATCH 39/64] whatsnew

---
 doc/whats-new.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index e71bda46803..4df03becb18 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -32,6 +32,9 @@ New Features
 - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg
   `create_index=False`. (:pull:`8960`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
+- Avoid automatically re-creating 1D pandas indexes in :py:func:`concat()`. Also optionally avoid creating 1D indexes for
+  new dimension coordinates by the new kwarg `create_index=False`. (:pull:`8872`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

From 51eea5d37f3fc5e856eeff3be087d0d26e0c9962 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 13:50:52 -0400
Subject: [PATCH 40/64] add new create_index kwarg to overloads

---
 xarray/core/concat.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 8cb6218687c..c615a3c192e 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -43,6 +43,7 @@ def concat(
     fill_value: object = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
+    create_index: bool = True,
 ) -> T_Dataset: ...
 
 
@@ -57,6 +58,7 @@ def concat(
     fill_value: object = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
+    create_index: bool = True,
 ) -> T_DataArray: ...
 
 

From bde9f2bd4a0831166b2cf08604e689a5a2c9184d Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 13:58:53 -0400
Subject: [PATCH 41/64] split vars into data_vars and coord_vars in one loop

---
 xarray/core/concat.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index c615a3c192e..f8c8e94c427 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -661,11 +661,13 @@ def get_indexes(name):
             f"Variables {absent_coord_names!r} are coordinates in some datasets but not others."
         )
 
-    coord_vars = {
-        name: result_var
-        for name, result_var in result_vars.items()
-        if name in coord_names
-    }
+    result_data_vars = {}
+    coord_vars = {}
+    for name, result_var in result_vars.items():
+        if name in coord_names:
+            coord_vars[name] = result_var
+        else:
+            result_data_vars[name] = result_var
 
     if index is not None:
         if dim_var is not None:
@@ -680,12 +682,6 @@ def get_indexes(name):
     # TODO: add indexes at Dataset creation (when it is supported)
     coords = Coordinates(coord_vars, indexes=result_indexes)
 
-    result_data_vars = {
-        name: result_var
-        for name, result_var in result_vars.items()
-        if name not in coord_names
-    }
-
     result = type(datasets[0])(result_data_vars, coords=coords, attrs=result_attrs)
     result.encoding = result_encoding
 

From d5241ce7994625fb647b92f4afd34d2caaeb8948 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 14:13:35 -0400
Subject: [PATCH 42/64] avoid mypy error by using new variable name

---
 xarray/core/concat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index f8c8e94c427..7bbb0283d19 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -680,9 +680,9 @@ def get_indexes(name):
         unlabeled_dims = unlabeled_dims - set([dim])
 
     # TODO: add indexes at Dataset creation (when it is supported)
-    coords = Coordinates(coord_vars, indexes=result_indexes)
+    coords_obj = Coordinates(coord_vars, indexes=result_indexes)
 
-    result = type(datasets[0])(result_data_vars, coords=coords, attrs=result_attrs)
+    result = type(datasets[0])(result_data_vars, coords=coords_obj, attrs=result_attrs)
     result.encoding = result_encoding
 
     result = result.drop_vars(unlabeled_dims, errors="ignore")

From 7e8f895c2746e2a75979b5675967b434aef09ee6 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 23:31:08 -0400
Subject: [PATCH 43/64] warn if create_index=True but no index created because
 dimension variable was a data var not a coord

---
 xarray/core/dataset.py       | 7 +++++++
 xarray/tests/test_dataset.py | 7 ++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index a2bc3505366..3985d2cad1a 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4696,6 +4696,13 @@ def expand_dims(
                         indexes[k] = index
                         variables.update(index_vars)
                     else:
+                        if create_index:
+                            warnings.warn(
+                                f"No index created for dimension {k} because variable {k} is not a coordinate. "
+                                f"To create an index for {k}, please first call `.set_coords({k})` on this object.",
+                                UserWarning,
+                            )
+
                         # create 1D variable without creating a new index
                         new_1d_var = v.set_dims(k)
                         variables.update({k: new_1d_var})
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index e4251ced7a4..17ef63d826e 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3435,7 +3435,12 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
     def test_expand_dims_create_index_data_variable(self, create_index_flag):
         # data variables should not gain an index ever
         ds = Dataset({"x": 0})
-        expanded = ds.expand_dims("x", create_index=create_index_flag)
+
+        if create_index_flag:
+            with pytest.warns(UserWarning, match="No index created"):
+                expanded = ds.expand_dims("x", create_index=create_index_flag)
+        else:
+            expanded = ds.expand_dims("x", create_index=create_index_flag)
 
         # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")

From ed85446d54ea8791f9224b71da833fd0f5d9c1eb Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Fri, 26 Apr 2024 23:32:53 -0400
Subject: [PATCH 44/64] add string marks in warning message

---
 xarray/core/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 3985d2cad1a..4f9125a1ab0 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4699,7 +4699,7 @@ def expand_dims(
                         if create_index:
                             warnings.warn(
                                 f"No index created for dimension {k} because variable {k} is not a coordinate. "
-                                f"To create an index for {k}, please first call `.set_coords({k})` on this object.",
+                                f"To create an index for {k}, please first call `.set_coords('{k}')` on this object.",
                                 UserWarning,
                             )
 

From 5894724df6bf3412396acca05e47cd2aa7dd3fad Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Mon, 29 Apr 2024 11:33:35 -0400
Subject: [PATCH 45/64] regression test for dtype changing in to_stacked_array

---
 xarray/tests/test_dataset.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 40cf85484da..006a876d37f 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3970,6 +3970,25 @@ def test_to_stacked_array_to_unstacked_dataset_different_dimension(self) -> None
         x = y.to_unstacked_dataset("features")
         assert_identical(D, x)
 
+    def test_to_stacked_array_preserves_dtype(self) -> None:
+        # regression test for bug found in https://github.com/pydata/xarray/pull/8872#issuecomment-2081218616
+        ds = xr.Dataset(
+            data_vars={
+                "a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]),
+                "b": ("x", [6, 7]),
+            },
+            coords={"y": ["u", "v", "w"]},
+        )
+        stacked = ds.to_stacked_array("z", sample_dims=["x"])
+
+        # coordinate created from variables names should be of string dtype
+        data = np.array(["a", "a", "a", "b"], dtype="<U1")
+        expected_stacked_variable = DataArray(name="variable", data=data, dims="z")
+        assert_identical(
+            stacked.coords["variable"].drop_vars(["z", "variable", "y"]),
+            expected_stacked_variable,
+        )
+
     def test_update(self) -> None:
         data = create_test_data(seed=0)
         expected = data.copy()

From dad94332f3083a17ec57b5dfea4d93aba3513abc Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Mon, 29 Apr 2024 11:34:56 -0400
Subject: [PATCH 46/64] correct doctest

---
 xarray/core/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 4f9125a1ab0..7e6741257a4 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5383,7 +5383,7 @@ def to_stacked_array(
                [3, 4, 5, 7]])
         Coordinates:
           * z         (z) object 32B MultiIndex
-          * variable  (z) object 32B 'a' 'a' 'a' 'b'
+          * variable  (z) <U1 16B 'a' 'a' 'a' 'b'
           * y         (z) object 32B 'u' 'v' 'w' nan
         Dimensions without coordinates: x
 

From 36a2223d3828cc257ef6f8659de11ea99610cb0a Mon Sep 17 00:00:00 2001
From: Tom Nicholas <tom@cworthy.org>
Date: Mon, 29 Apr 2024 10:27:09 -0600
Subject: [PATCH 47/64] Remove outdated comment

---
 xarray/tests/test_concat.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index e4af4f29358..24a17ceebc1 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -1038,10 +1038,8 @@ def test_concat_avoids_index_auto_creation_new_1d_coord(self) -> None:
             for _ in range(2)
         ]
 
-        # TODO this should raise an error, but it doesn't
-        # It's somehow cheating it's way around the UnexpectedDataAccess error that should be raised
         with pytest.raises(UnexpectedDataAccess):
-            combined = concat(datasets, dim="x", create_index=True)
+            concat(datasets, dim="x", create_index=True)
 
         # should not raise on concat iff create_index=False
         combined = concat(datasets, dim="x", create_index=False)

From e17c13f6cb4954a94c037a4e2fb4c73d04ebdec3 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Mon, 29 Apr 2024 12:34:14 -0400
Subject: [PATCH 48/64] test we can skip creation of indexes during shape
 promotion

---
 xarray/tests/test_concat.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index e4af4f29358..3c11db00739 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -1051,6 +1051,15 @@ def test_concat_avoids_index_auto_creation_new_1d_coord(self) -> None:
         # nor have auto-created any indexes
         assert combined.indexes == {}
 
+    def test_concat_promote_shape_without_creating_new_index(self) -> None:
+        # different shapes but neither have indexes
+        ds1 = Dataset(coords={"x": 0})
+        ds2 = Dataset(data_vars={"x": [1]}).drop_indexes("x")
+        actual = concat([ds1, ds2], dim="x", create_index=False)
+        expected = Dataset(data_vars={"x": [0, 1]}).drop_indexes("x")
+        assert_identical(actual, expected, check_default_indexes=False)
+        assert actual.indexes == {}
+
 
 class TestConcatDataArray:
     def test_concat(self) -> None:

From e8fa857c0b4bf9ddd7909b6d202eddacc4e21173 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Mon, 29 Apr 2024 12:35:44 -0400
Subject: [PATCH 49/64] make shape promotion test pass

---
 xarray/core/concat.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 7bbb0283d19..b530a1e2ca2 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -576,7 +576,8 @@ def get_indexes(name):
                 var = ds._variables[name]
                 if not var.dims:
                     data = var.set_dims(dim).values
-                    yield PandasIndex(data, dim, coord_dtype=var.dtype)
+                    if create_index:
+                        yield PandasIndex(data, dim, coord_dtype=var.dtype)
 
     # create concatenation index, needed for later reindexing
     file_start_indexes = np.append(0, np.cumsum(concat_dim_lengths))

From 6dd57a976f6d7d98dc5dfccd816ab29fc68dfde0 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Mon, 29 Apr 2024 12:48:31 -0400
Subject: [PATCH 50/64] point to issue in whatsnew

---
 doc/whats-new.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 0ca4253ee2b..c0bea023639 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -32,8 +32,8 @@ New Features
 - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg
   `create_index=False`. (:pull:`8960`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
-- Avoid automatically re-creating 1D pandas indexes in :py:func:`concat()`. Also optionally avoid creating 1D indexes for
-  new dimension coordinates by the new kwarg `create_index=False`. (:pull:`8872`)
+- Avoid automatically re-creating 1D pandas indexes in :py:func:`concat()`. Also added option to avoid creating 1D indexes for
+  new dimension coordinates by passing the new kwarg `create_index=False`. (:issue:`8871`, :pull:`8872`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes

From b0e36124c04255378ee3f7928fb5d0276f628f89 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 1 May 2024 14:49:07 -0400
Subject: [PATCH 51/64] don't create dimension coordinates just to drop them at
 the end

---
 xarray/core/concat.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index b530a1e2ca2..d4dae1a8657 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -447,7 +447,7 @@ def _parse_datasets(
             if dim in dims:
                 continue
 
-            if dim not in dim_coords:
+            if dim in ds.coords and dim not in dim_coords:
                 dim_coords[dim] = ds.coords[dim].variable
         dims = dims | set(ds.dims)
 
@@ -686,8 +686,6 @@ def get_indexes(name):
     result = type(datasets[0])(result_data_vars, coords=coords_obj, attrs=result_attrs)
     result.encoding = result_encoding
 
-    result = result.drop_vars(unlabeled_dims, errors="ignore")
-
     return result
 
 

From ff70fc7b1b52f225cba199c838a738d46d71f0cc Mon Sep 17 00:00:00 2001
From: Tom Nicholas <tom@cworthy.org>
Date: Wed, 1 May 2024 12:57:45 -0600
Subject: [PATCH 52/64] Remove ToDo about not using Coordinates object to pass
 indexes

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 xarray/core/concat.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index d4dae1a8657..d67ceba9f0c 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -680,7 +680,6 @@ def get_indexes(name):
         result_indexes[dim] = index
         unlabeled_dims = unlabeled_dims - set([dim])
 
-    # TODO: add indexes at Dataset creation (when it is supported)
     coords_obj = Coordinates(coord_vars, indexes=result_indexes)
 
     result = type(datasets[0])(result_data_vars, coords=coords_obj, attrs=result_attrs)

From 2f97a5cfe45677300711fa6e690856e1bbb81768 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 1 May 2024 15:06:57 -0400
Subject: [PATCH 53/64] get rid of unlabeled_dims variable entirely

---
 xarray/core/concat.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index d67ceba9f0c..79dc906c099 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -498,7 +498,6 @@ def _dataset_concat(
         datasets
     )
     dim_names = set(dim_coords)
-    unlabeled_dims = dim_names - coord_names
 
     both_data_and_coords = coord_names & data_names
     if both_data_and_coords:
@@ -519,7 +518,7 @@ def _dataset_concat(
     )
 
     # determine which variables to merge, and then merge them according to compat
-    variables_to_merge = (coord_names | data_names) - concat_over - unlabeled_dims
+    variables_to_merge = (coord_names | data_names) - concat_over
 
     result_vars = {}
     result_indexes = {}
@@ -678,7 +677,6 @@ def get_indexes(name):
 
         coord_vars[dim] = index_vars[dim]
         result_indexes[dim] = index
-        unlabeled_dims = unlabeled_dims - set([dim])
 
     coords_obj = Coordinates(coord_vars, indexes=result_indexes)
 

From 6d825e5d97088037933c3242d136b9c41488fd07 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 14:11:38 -0400
Subject: [PATCH 54/64] move ConcatenatableArray and similar to new file

---
 xarray/tests/arrays.py | 176 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)
 create mode 100644 xarray/tests/arrays.py

diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py
new file mode 100644
index 00000000000..2df945d2b22
--- /dev/null
+++ b/xarray/tests/arrays.py
@@ -0,0 +1,176 @@
+from typing import Callable, Any
+from collections.abc import Iterable
+
+import numpy as np
+from xarray.core import utils
+from xarray.core.indexing import ExplicitlyIndexed
+
+"""
+This module contains various lazy array classes which can be wrapped and manipulated by xarray objects but will raise on data access.
+"""
+
+
+class UnexpectedDataAccess(Exception):
+    pass
+
+
+class InaccessibleArray(utils.NDArrayMixin, ExplicitlyIndexed):
+    """Disallows any loading."""
+
+    def __init__(self, array):
+        self.array = array
+
+    def get_duck_array(self):
+        raise UnexpectedDataAccess("Tried accessing data")
+
+    def __array__(self, dtype: np.typing.DTypeLike = None):
+        raise UnexpectedDataAccess("Tried accessing data")
+
+    def __getitem__(self, key):
+        raise UnexpectedDataAccess("Tried accessing data.")
+
+
+class FirstElementAccessibleArray(InaccessibleArray):
+    def __getitem__(self, key):
+        tuple_idxr = key.tuple
+        if len(tuple_idxr) > 1:
+            raise UnexpectedDataAccess("Tried accessing more than one element.")
+        return self.array[tuple_idxr]
+
+
+class DuckArrayWrapper(utils.NDArrayMixin):
+    """Array-like that prevents casting to array.
+    Modeled after cupy."""
+
+    def __init__(self, array: np.ndarray):
+        self.array = array
+
+    def __getitem__(self, key):
+        return type(self)(self.array[key])
+
+    def __array__(self, dtype: np.typing.DTypeLike = None):
+        raise UnexpectedDataAccess("Tried accessing data")
+
+    def __array_namespace__(self):
+        """Present to satisfy is_duck_array test."""
+
+
+CONCATENATABLEARRAY_HANDLED_ARRAY_FUNCTIONS: dict[str, Callable] = {}
+
+
+def implements(numpy_function):
+    """Register an __array_function__ implementation for ConcatenatableArray objects."""
+
+    def decorator(func):
+        CONCATENATABLEARRAY_HANDLED_ARRAY_FUNCTIONS[numpy_function] = func
+        return func
+
+    return decorator
+
+
+@implements(np.concatenate)
+def concatenate(
+    arrays: Iterable["ConcatenatableArray"], /, *, axis=0
+) -> "ConcatenatableArray":
+    if any(not isinstance(arr, ConcatenatableArray) for arr in arrays):
+        raise TypeError
+
+    result = np.concatenate([arr._array for arr in arrays], axis=axis)
+    return ConcatenatableArray(result)
+
+
+@implements(np.stack)
+def stack(arrays: Iterable["ConcatenatableArray"], /, *, axis=0) -> "ConcatenatableArray":
+    if any(not isinstance(arr, ConcatenatableArray) for arr in arrays):
+        raise TypeError
+
+    result = np.stack([arr._array for arr in arrays], axis=axis)
+    return ConcatenatableArray(result)
+
+
+@implements(np.result_type)
+def result_type(*arrays_and_dtypes) -> np.dtype:
+    """Called by xarray to ensure all arguments to concat have the same dtype."""
+    first_dtype, *other_dtypes = (np.dtype(obj) for obj in arrays_and_dtypes)
+    for other_dtype in other_dtypes:
+        if other_dtype != first_dtype:
+            raise ValueError("dtypes not all consistent")
+    return first_dtype
+
+
+@implements(np.broadcast_to)
+def broadcast_to(
+    x: "ConcatenatableArray", /, shape: tuple[int, ...]
+) -> "ConcatenatableArray":
+    """
+    Broadcasts an array to a specified shape, by either manipulating chunk keys or copying chunk manifest entries.
+    """
+    if not isinstance(x, ConcatenatableArray):
+        raise TypeError
+
+    result = np.broadcast_to(x._array, shape=shape)
+    return ConcatenatableArray(result)
+
+
+class ConcatenatableArray:
+    """Disallows loading or coercing to an index but does support concatenation / stacking."""
+
+    def __init__(self, array):
+        # use ._array instead of .array because we don't want this to be accessible even to xarray's internals (e.g. create_default_index_implicit)
+        self._array = array
+
+    @property
+    def dtype(self: Any) -> np.dtype:
+        return self._array.dtype
+
+    @property
+    def shape(self: Any) -> tuple[int, ...]:
+        return self._array.shape
+
+    @property
+    def ndim(self: Any) -> int:
+        return self._array.ndim
+
+    def __repr__(self: Any) -> str:
+        return f"{type(self).__name__}(array={self._array!r})"
+
+    def get_duck_array(self):
+        raise UnexpectedDataAccess("Tried accessing data")
+
+    def __array__(self, dtype: np.typing.DTypeLike = None):
+        raise UnexpectedDataAccess("Tried accessing data")
+
+    def __getitem__(self, key) -> "ConcatenatableArray":
+        """Some cases of concat require supporting expanding dims by dimensions of size 1"""
+        # see https://data-apis.org/array-api/2022.12/API_specification/indexing.html#multi-axis-indexing
+        arr = self._array
+        for axis, indexer_1d in enumerate(key):
+            if indexer_1d is None:
+                arr = np.expand_dims(arr, axis)
+            elif indexer_1d is Ellipsis:
+                pass
+            else:
+                raise UnexpectedDataAccess("Tried accessing data.")
+        return ConcatenatableArray(arr)
+
+    def __array_function__(self, func, types, args, kwargs) -> Any:
+        if func not in CONCATENATABLEARRAY_HANDLED_ARRAY_FUNCTIONS:
+            return NotImplemented
+
+        # Note: this allows subclasses that don't override
+        # __array_function__ to handle ManifestArray objects
+        if not all(issubclass(t, ConcatenatableArray) for t in types):
+            return NotImplemented
+
+        return CONCATENATABLEARRAY_HANDLED_ARRAY_FUNCTIONS[func](*args, **kwargs)
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> Any:
+        """We have to define this in order to convince xarray that this class is a duckarray, even though we will never support ufuncs."""
+        return NotImplemented
+
+    def astype(self, dtype: np.dtype, /, *, copy: bool = True) -> "ConcatenatableArray":
+        """Needed because xarray will call this even when it's a no-op"""
+        if dtype != self.dtype:
+            raise NotImplementedError()
+        else:
+            return self

From b88b5a6b6be486b2ecff1d1301cf513f8417e0cd Mon Sep 17 00:00:00 2001
From: Tom Nicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 12:13:07 -0600
Subject: [PATCH 55/64] formatting nit

Co-authored-by: Justus Magin <keewis@users.noreply.github.com>
---
 xarray/core/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 79dc906c099..3b981fa1c61 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -166,7 +166,7 @@ def concat(
 
         If a callable, it must expect a sequence of ``attrs`` dicts and a context object
         as its only parameters.
-    create_index : bool, default is True
+    create_index : bool, default: True
         Whether to create new PandasIndex objects for any new 1D coordinate variables.
 
     Returns

From b243150372bc4969e1f663c74c14424f01f847b7 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 14:25:19 -0400
Subject: [PATCH 56/64] renamed create_index -> create_index_for_new_dim in
 concat

---
 doc/whats-new.rst           |  2 +-
 xarray/core/concat.py       | 29 +++++++++++++++++------------
 xarray/tests/test_concat.py |  8 ++++----
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index ac172137ad3..2e035e5dd07 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -33,7 +33,7 @@ New Features
   `create_index=False`. (:pull:`8960`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Avoid automatically re-creating 1D pandas indexes in :py:func:`concat()`. Also added option to avoid creating 1D indexes for
-  new dimension coordinates by passing the new kwarg `create_index=False`. (:issue:`8871`, :pull:`8872`)
+  new dimension coordinates by passing the new kwarg `create_index_for_new_dim=False`. (:issue:`8871`, :pull:`8872`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 3b981fa1c61..28d214881c5 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -43,7 +43,7 @@ def concat(
     fill_value: object = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
-    create_index: bool = True,
+    create_index_for_new_dim: bool = True,
 ) -> T_Dataset: ...
 
 
@@ -58,7 +58,7 @@ def concat(
     fill_value: object = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
-    create_index: bool = True,
+    create_index_for_new_dim: bool = True,
 ) -> T_DataArray: ...
 
 
@@ -72,7 +72,7 @@ def concat(
     fill_value=dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
-    create_index: bool = True,
+    create_index_for_new_dim: bool = True,
 ):
     """Concatenate xarray objects along a new or existing dimension.
 
@@ -166,8 +166,8 @@ def concat(
 
         If a callable, it must expect a sequence of ``attrs`` dicts and a context object
         as its only parameters.
-    create_index : bool, default: True
-        Whether to create new PandasIndex objects for any new 1D coordinate variables.
+    create_index_for_new_dim : bool, default is True
+        Whether to create a new ``PandasIndex`` object when the objects being concatenated contain scalar variables named ``dim``.
 
     Returns
     -------
@@ -223,6 +223,8 @@ def concat(
         x        (new_dim) <U1 8B 'a' 'b'
       * y        (y) int64 24B 10 20 30
       * new_dim  (new_dim) int64 16B -90 -100
+
+
     """
     # TODO: add ignore_index arguments copied from pandas.concat
     # TODO: support concatenating scalar coordinates even if the concatenated
@@ -251,7 +253,7 @@ def concat(
             fill_value=fill_value,
             join=join,
             combine_attrs=combine_attrs,
-            create_index=create_index,
+            create_index_for_new_dim=create_index_for_new_dim,
         )
     elif isinstance(first_obj, Dataset):
         return _dataset_concat(
@@ -264,7 +266,7 @@ def concat(
             fill_value=fill_value,
             join=join,
             combine_attrs=combine_attrs,
-            create_index=create_index,
+            create_index_for_new_dim=create_index_for_new_dim,
         )
     else:
         raise TypeError(
@@ -464,7 +466,7 @@ def _dataset_concat(
     fill_value: Any = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
-    create_index: bool = True,
+    create_index_for_new_dim: bool = True,
 ) -> T_Dataset:
     """
     Concatenate a sequence of datasets along a new or existing dimension
@@ -510,7 +512,10 @@ def _dataset_concat(
 
     # case where concat dimension is a coordinate or data_var but not a dimension
     if (dim in coord_names or dim in data_names) and dim not in dim_names:
-        datasets = [ds.expand_dims(dim, create_index=create_index) for ds in datasets]
+        datasets = [
+            ds.expand_dims(dim, create_index=create_index_for_new_dim)
+            for ds in datasets
+        ]
 
     # determine which variables to concatenate
     concat_over, equals, concat_dim_lengths = _calc_concat_over(
@@ -575,7 +580,7 @@ def get_indexes(name):
                 var = ds._variables[name]
                 if not var.dims:
                     data = var.set_dims(dim).values
-                    if create_index:
+                    if create_index_for_new_dim:
                         yield PandasIndex(data, dim, coord_dtype=var.dtype)
 
     # create concatenation index, needed for later reindexing
@@ -696,7 +701,7 @@ def _dataarray_concat(
     fill_value: object = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "override",
-    create_index: bool = True,
+    create_index_for_new_dim: bool = True,
 ) -> T_DataArray:
     from xarray.core.dataarray import DataArray
 
@@ -733,7 +738,7 @@ def _dataarray_concat(
         fill_value=fill_value,
         join=join,
         combine_attrs=combine_attrs,
-        create_index=create_index,
+        create_index_for_new_dim=create_index_for_new_dim,
     )
 
     merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs)
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index e44e6150be9..0c570de3b52 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -1039,10 +1039,10 @@ def test_concat_avoids_index_auto_creation_new_1d_coord(self) -> None:
         ]
 
         with pytest.raises(UnexpectedDataAccess):
-            concat(datasets, dim="x", create_index=True)
+            concat(datasets, dim="x", create_index_for_new_dim=True)
 
-        # should not raise on concat iff create_index=False
-        combined = concat(datasets, dim="x", create_index=False)
+        # should not raise on concat iff create_index_for_new_dim=False
+        combined = concat(datasets, dim="x", create_index_for_new_dim=False)
         assert combined["x"].shape == (2,)
         assert combined["x"].dims == ("x",)
 
@@ -1053,7 +1053,7 @@ def test_concat_promote_shape_without_creating_new_index(self) -> None:
         # different shapes but neither have indexes
         ds1 = Dataset(coords={"x": 0})
         ds2 = Dataset(data_vars={"x": [1]}).drop_indexes("x")
-        actual = concat([ds1, ds2], dim="x", create_index=False)
+        actual = concat([ds1, ds2], dim="x", create_index_for_new_dim=False)
         expected = Dataset(data_vars={"x": [0, 1]}).drop_indexes("x")
         assert_identical(actual, expected, check_default_indexes=False)
         assert actual.indexes == {}

From 9e9e1683591309567565d940d038c9fd7a65ad5a Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 14:29:39 -0400
Subject: [PATCH 57/64] renamed create_index -> create_index_for_new_dim in
 expand_dims

---
 doc/whats-new.rst            |  2 +-
 xarray/core/dataarray.py     | 12 +++++++-----
 xarray/core/dataset.py       | 14 +++++++-------
 xarray/tests/test_dataset.py | 22 ++++++++++++++--------
 4 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 2e035e5dd07..11657045d83 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -30,7 +30,7 @@ New Features
   then, such as broadcasting.
   By `Ilan Gold <https://github.com/ilan-gold>`_.
 - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg
-  `create_index=False`. (:pull:`8960`)
+  `create_index_for_new_dim=False`. (:pull:`8960`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Avoid automatically re-creating 1D pandas indexes in :py:func:`concat()`. Also added option to avoid creating 1D indexes for
   new dimension coordinates by passing the new kwarg `create_index_for_new_dim=False`. (:issue:`8871`, :pull:`8872`)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index d5aa4688ce1..9ef5e56152b 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -2558,7 +2558,7 @@ def expand_dims(
         self,
         dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis: None | int | Sequence[int] = None,
-        create_index: bool = True,
+        create_index_for_new_dim: bool = True,
         **dim_kwargs: Any,
     ) -> Self:
         """Return a new object with an additional axis (or axes) inserted at
@@ -2569,7 +2569,7 @@ def expand_dims(
         coordinate consisting of a single value.
 
         The automatic creation of indexes to back new 1D coordinate variables
-        controlled by the create_index kwarg.
+        controlled by the create_index_for_new_dim kwarg.
 
         Parameters
         ----------
@@ -2586,8 +2586,8 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
-        create_index : bool, default is True
-            Whether to create new PandasIndex objects for any new 1D coordinate variables.
+        create_index_for_new_dim : bool, default is True
+            Whether to create new ``PandasIndex`` objects when the object being expanded contains scalar variables with names in ``dim``.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
             are either the lengths of the new dims (if int is given), or their
@@ -2651,7 +2651,9 @@ def expand_dims(
             dim = {dim: 1}
 
         dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims")
-        ds = self._to_temp_dataset().expand_dims(dim, axis, create_index=create_index)
+        ds = self._to_temp_dataset().expand_dims(
+            dim, axis, create_index_for_new_dim=create_index_for_new_dim
+        )
         return self._from_temp_dataset(ds)
 
     def set_index(
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 0ff7ce6c7ee..b0c5215570a 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4513,7 +4513,7 @@ def expand_dims(
         self,
         dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis: None | int | Sequence[int] = None,
-        create_index: bool = True,
+        create_index_for_new_dim: bool = True,
         **dim_kwargs: Any,
     ) -> Self:
         """Return a new object with an additional axis (or axes) inserted at
@@ -4524,7 +4524,7 @@ def expand_dims(
         coordinate consisting of a single value.
 
         The automatic creation of indexes to back new 1D coordinate variables
-        controlled by the create_index kwarg.
+        controlled by the create_index_for_new_dim kwarg.
 
         Parameters
         ----------
@@ -4541,8 +4541,8 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
-        create_index : bool, default is True
-            Whether to create new PandasIndex objects for any new 1D coordinate variables.
+        create_index_for_new_dim : bool, default is True
+            Whether to create new ``PandasIndex`` objects when the object being expanded contains scalar variables with names in ``dim``.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
             are either the lengths of the new dims (if int is given), or their
@@ -4663,7 +4663,7 @@ def expand_dims(
                 # value within the dim dict to the length of the iterable
                 # for later use.
 
-                if create_index:
+                if create_index_for_new_dim:
                     index = PandasIndex(v, k)
                     indexes[k] = index
                     name_and_new_1d_var = index.create_variables()
@@ -4705,14 +4705,14 @@ def expand_dims(
                     variables[k] = v.set_dims(dict(all_dims))
             else:
                 if k not in variables:
-                    if k in coord_names and create_index:
+                    if k in coord_names and create_index_for_new_dim:
                         # If dims includes a label of a non-dimension coordinate,
                         # it will be promoted to a 1D coordinate with a single value.
                         index, index_vars = create_default_index_implicit(v.set_dims(k))
                         indexes[k] = index
                         variables.update(index_vars)
                     else:
-                        if create_index:
+                        if create_index_for_new_dim:
                             warnings.warn(
                                 f"No index created for dimension {k} because variable {k} is not a coordinate. "
                                 f"To create an index for {k}, please first call `.set_coords('{k}')` on this object.",
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 6cb4ecc537b..1aec179a4ad 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3430,16 +3430,22 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
         )
         assert_identical(other_way_expected, other_way)
 
-    @pytest.mark.parametrize("create_index_flag", [True, False])
-    def test_expand_dims_create_index_data_variable(self, create_index_flag):
+    @pytest.mark.parametrize("create_index_for_new_dim_flag", [True, False])
+    def test_expand_dims_create_index_data_variable(
+        self, create_index_for_new_dim_flag
+    ):
         # data variables should not gain an index ever
         ds = Dataset({"x": 0})
 
-        if create_index_flag:
+        if create_index_for_new_dim_flag:
             with pytest.warns(UserWarning, match="No index created"):
-                expanded = ds.expand_dims("x", create_index=create_index_flag)
+                expanded = ds.expand_dims(
+                    "x", create_index_for_new_dim=create_index_for_new_dim_flag
+                )
         else:
-            expanded = ds.expand_dims("x", create_index=create_index_flag)
+            expanded = ds.expand_dims(
+                "x", create_index_for_new_dim=create_index_for_new_dim_flag
+            )
 
         # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
@@ -3448,13 +3454,13 @@ def test_expand_dims_create_index_data_variable(self, create_index_flag):
         assert expanded.indexes == {}
 
     def test_expand_dims_create_index_coordinate_variable(self):
-        # coordinate variables should gain an index only if create_index is True (the default)
+        # coordinate variables should gain an index only if create_index_for_new_dim is True (the default)
         ds = Dataset(coords={"x": 0})
         expanded = ds.expand_dims("x")
         expected = Dataset({"x": ("x", [0])})
         assert_identical(expanded, expected)
 
-        expanded_no_index = ds.expand_dims("x", create_index=False)
+        expanded_no_index = ds.expand_dims("x", create_index_for_new_dim=False)
 
         # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")
@@ -3468,7 +3474,7 @@ def test_expand_dims_create_index_from_iterable(self):
         expected = Dataset({"x": ("x", [0, 1])})
         assert_identical(expanded, expected)
 
-        expanded_no_index = ds.expand_dims(x=[0, 1], create_index=False)
+        expanded_no_index = ds.expand_dims(x=[0, 1], create_index_for_new_dim=False)
 
         # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
         expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x")

From dca2fb9dcd453391bf672b04929b02e97c18bcf6 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 14:52:24 -0400
Subject: [PATCH 58/64] fix incorrect arg name

---
 xarray/core/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 28d214881c5..51506a55395 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -513,7 +513,7 @@ def _dataset_concat(
     # case where concat dimension is a coordinate or data_var but not a dimension
     if (dim in coord_names or dim in data_names) and dim not in dim_names:
         datasets = [
-            ds.expand_dims(dim, create_index=create_index_for_new_dim)
+            ds.expand_dims(dim, create_index_for_new_dim=create_index_for_new_dim)
             for ds in datasets
         ]
 

From c9796726bcb6413ed690dc8e2d55e572c7bc99d0 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 14:52:37 -0400
Subject: [PATCH 59/64] add example to docstring

---
 xarray/core/dataset.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index b0c5215570a..ab7ab990789 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4612,6 +4612,33 @@ def expand_dims(
         Data variables:
             temperature  (y, x, time) float64 96B 0.5488 0.7152 0.6028 ... 0.7917 0.5289
 
+        # Expand a scalar variable along a new dimension of the same name with and without creating a new index
+
+        >>> ds = xr.Dataset(coords={"x": 0})
+        >>> ds
+        <xarray.Dataset> Size: 8B
+        Dimensions:  ()
+        Coordinates:
+            x        int64 8B 0
+        Data variables:
+            *empty*
+
+        >>> ds.expand_dims('x')
+        <xarray.Dataset> Size: 8B
+        Dimensions:  (x: 1)
+        Coordinates:
+        * x        (x) int64 8B 0
+        Data variables:
+            *empty*
+
+        >>> ds.expand_dims("x").indexes
+        Indexes:
+            x        Index([0], dtype='int64', name='x')
+
+        >>> ds.expand_dims("x", create_index_for_new_dim=False).indexes
+        Indexes:
+            *empty*
+
         See Also
         --------
         DataArray.expand_dims

From ac27ce0a0fc7b3db4577240079343d6db78a7d2a Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 14:56:02 -0400
Subject: [PATCH 60/64] add example of using new kwarg to docstring of
 expand_dims

---
 xarray/core/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index ab7ab990789..40866dff72a 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4623,7 +4623,7 @@ def expand_dims(
         Data variables:
             *empty*
 
-        >>> ds.expand_dims('x')
+        >>> ds.expand_dims("x")
         <xarray.Dataset> Size: 8B
         Dimensions:  (x: 1)
         Coordinates:

From d73ac4802c312a9d95c6f07636c7975a16216ab1 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 14:56:11 -0400
Subject: [PATCH 61/64] add example of using new kwarg to docstring of concat

---
 xarray/core/concat.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 51506a55395..92a7768bfb9 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -224,7 +224,24 @@ def concat(
       * y        (y) int64 24B 10 20 30
       * new_dim  (new_dim) int64 16B -90 -100
 
+    # Concatenate a scalar variable along a new dimension of the same name with and without creating a new index
 
+    >>> ds = xr.Dataset(coords={"x": 0})
+    >>> xr.concat([ds, ds], dim="x")
+    <xarray.Dataset> Size: 16B
+    Dimensions:  (x: 2)
+    Coordinates:
+    * x        (x) int64 16B 0 0
+    Data variables:
+        *empty*
+
+    >>> xr.concat([ds, ds], dim="x").indexes
+    Indexes:
+        x        Index([0, 0], dtype='int64', name='x')
+
+    >>> xr.concat([ds, ds], dim="x", create_index_for_new_dim=False).indexes
+    Indexes:
+        *empty*
     """
     # TODO: add ignore_index arguments copied from pandas.concat
     # TODO: support concatenating scalar coordinates even if the concatenated

From d1b656d74e3f2b78e620b536b74fd4a2c2fd721f Mon Sep 17 00:00:00 2001
From: Tom Nicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 12:59:10 -0600
Subject: [PATCH 62/64] re-nit the nit

Co-authored-by: Justus Magin <keewis@users.noreply.github.com>
---
 xarray/core/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 92a7768bfb9..7f7f6d91650 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -166,7 +166,7 @@ def concat(
 
         If a callable, it must expect a sequence of ``attrs`` dicts and a context object
         as its only parameters.
-    create_index_for_new_dim : bool, default is True
+    create_index_for_new_dim : bool, default: True
         Whether to create a new ``PandasIndex`` object when the objects being concatenated contain scalar variables named ``dim``.
 
     Returns

From ac998e9ca72d11f33c285ed818b72a4b093f5b2a Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@users.noreply.github.com>
Date: Wed, 8 May 2024 21:05:00 +0200
Subject: [PATCH 63/64] more instances of the nit

---
 xarray/core/dataarray.py | 2 +-
 xarray/core/dataset.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 3363b2e6de8..4dc897c1878 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -2586,7 +2586,7 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
-        create_index_for_new_dim : bool, default is True
+        create_index_for_new_dim : bool, default: True
             Whether to create new ``PandasIndex`` objects when the object being expanded contains scalar variables with names in ``dim``.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index b87682bb158..4315c0a7ce0 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4541,7 +4541,7 @@ def expand_dims(
             multiple axes are inserted. In this case, dim arguments should be
             same length list. If axis=None is passed, all the axes will be
             inserted to the start of the result array.
-        create_index_for_new_dim : bool, default is True
+        create_index_for_new_dim : bool, default: True
             Whether to create new ``PandasIndex`` objects when the object being expanded contains scalar variables with names in ``dim``.
         **dim_kwargs : int or sequence or ndarray
             The keywords are arbitrary dimensions being inserted and the values

From 0849b94525fcd1fbfec54e0321f9bda9182814b7 Mon Sep 17 00:00:00 2001
From: TomNicholas <tom@cworthy.org>
Date: Wed, 8 May 2024 15:05:06 -0400
Subject: [PATCH 64/64] fix docstring doctest formatting nit

---
 xarray/core/concat.py  | 2 +-
 xarray/core/dataset.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 92a7768bfb9..4612b140e6c 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -231,7 +231,7 @@ def concat(
     <xarray.Dataset> Size: 16B
     Dimensions:  (x: 2)
     Coordinates:
-    * x        (x) int64 16B 0 0
+      * x        (x) int64 16B 0 0
     Data variables:
         *empty*
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index b87682bb158..8395236065b 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4627,7 +4627,7 @@ def expand_dims(
         <xarray.Dataset> Size: 8B
         Dimensions:  (x: 1)
         Coordinates:
-        * x        (x) int64 8B 0
+          * x        (x) int64 8B 0
         Data variables:
             *empty*