From ee28c1623adf815032507330a033b60decb0a19d Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Mon, 4 Jan 2021 12:03:00 +0100
Subject: [PATCH 1/7] coords: retain str dtype

---
 doc/whats-new.rst              |  3 +++
 xarray/core/alignment.py       | 12 ++++++----
 xarray/core/concat.py          |  2 +-
 xarray/core/merge.py           |  4 +++-
 xarray/core/utils.py           | 19 +++++++++++++++
 xarray/core/variable.py        |  4 ++++
 xarray/tests/test_concat.py    | 44 ++++++++++++++++++++++++++++++++++
 xarray/tests/test_dataarray.py | 33 +++++++++++++++++++++++++
 xarray/tests/test_dataset.py   | 34 ++++++++++++++++++++++++++
 xarray/tests/test_utils.py     | 15 ++++++++++++
 xarray/tests/test_variable.py  | 11 +++++++++
 11 files changed, 175 insertions(+), 6 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 151af2de66c..7de93641c18 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -42,6 +42,9 @@ Bug fixes
   By `Anderson Banihirwe <https://github.com/andersy005>`_
 - Fix a crash in orthogonal indexing on geographic coordinates with ``engine='cfgrib'`` (:issue:`4733` :pull:`4737`).
   By `Alessandro Amici <https://github.com/alexamici>`_
+- Coordinates with dtype ``str`` or ``bytes`` now retain their dtype on ``reindex``, ``align``,
+  ``concat``, and ``assign``, previously they were cast to an object dtype 
+  (:issue:`2658` and :issue:`4543`) by `Mathias Hauser <https://github.com/mathause>`_. 
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index 21bda8ef8d7..ef309d5b1b4 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -19,7 +19,7 @@
 
 from . import dtypes, utils
 from .indexing import get_indexer_nd
-from .utils import is_dict_like, is_full_slice
+from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str_type
 from .variable import IndexVariable, Variable
 
 if TYPE_CHECKING:
@@ -278,10 +278,12 @@ def align(
         return (obj.copy(deep=copy),)
 
     all_indexes = defaultdict(list)
+    all_coords = defaultdict(list)
     unlabeled_dim_sizes = defaultdict(set)
     for obj in objects:
         for dim in obj.dims:
             if dim not in exclude:
+                all_coords[dim].append(obj.coords[dim])
                 try:
                     index = obj.indexes[dim]
                 except KeyError:
@@ -306,7 +308,7 @@ def align(
                 any(not index.equals(other) for other in matching_indexes)
                 or dim in unlabeled_dim_sizes
             ):
-                joined_indexes[dim] = index
+                joined_indexes[dim] = indexes[dim]
         else:
             if (
                 any(
@@ -318,9 +320,11 @@ def align(
                 if join == "exact":
                     raise ValueError(f"indexes along dimension {dim!r} are not equal")
                 index = joiner(matching_indexes)
+                # make sure str coords are not cast to object
+                index = maybe_coerce_to_str_type(index, all_coords[dim])
                 joined_indexes[dim] = index
             else:
-                index = matching_indexes[0]
+                index = all_coords[dim][0]
 
         if dim in unlabeled_dim_sizes:
             unlabeled_sizes = unlabeled_dim_sizes[dim]
@@ -583,7 +587,7 @@ def reindex_variables(
             args: tuple = (var.attrs, var.encoding)
         else:
             args = ()
-        reindexed[dim] = IndexVariable((dim,), target, *args)
+        reindexed[dim] = IndexVariable((dim,), indexers[dim], *args)
 
     for dim in sizes:
         if dim not in indexes and dim in indexers:
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 1275d002cd3..ba8058ab02b 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -503,7 +503,7 @@ def ensure_common_dims(vars):
     for k in datasets[0].variables:
         if k in concat_over:
             try:
-                vars = ensure_common_dims([ds.variables[k] for ds in datasets])
+                vars = ensure_common_dims([ds[k].variable for ds in datasets])
             except KeyError:
                 raise ValueError("%r is not present in all datasets." % k)
             combined = concat_vars(vars, dim, positions)
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index dff00804f8f..d29a9e1ff02 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -930,9 +930,11 @@ def dataset_update_method(
                 if coord_names:
                     other[key] = value.drop_vars(coord_names)
 
+    # use ds.coords and not ds.indexes, else str coords are cast to object
+    indexes = {key: dataset.coords[key] for key in dataset.indexes.keys()}
     return merge_core(
         [dataset, other],
         priority_arg=1,
-        indexes=dataset.indexes,
+        indexes=indexes,
         combine_attrs="override",
     )
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 093b30d088d..acb7677301c 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -32,6 +32,8 @@
 import numpy as np
 import pandas as pd
 
+from . import dtypes
+
 K = TypeVar("K")
 V = TypeVar("V")
 T = TypeVar("T")
@@ -77,6 +79,23 @@ def maybe_cast_to_coords_dtype(label, coords_dtype):
     return label
 
 
+def maybe_coerce_to_str_type(index, original_coords):
+    """maybe coerce a pandas Index back to a nunpy array of type str
+
+    pd.Index uses object-dtype to store str - try to avoid this for coords
+    """
+
+    try:
+        result_type = dtypes.result_type(*original_coords)
+    except TypeError:
+        pass
+    else:
+        if result_type.kind in "SU":
+            index = np.asarray(index, dtype=result_type)
+
+    return index
+
+
 def safe_cast_to_index(array: Any) -> pd.Index:
     """Given an array, safely cast it to a pandas.Index.
 
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 0a6eef44c90..bd503f17e2e 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -48,6 +48,7 @@
     ensure_us_time_resolution,
     infix_dims,
     is_duck_array,
+    maybe_coerce_to_str_type,
 )
 
 NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
@@ -2523,6 +2524,9 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
                 indices = nputils.inverse_permutation(np.concatenate(positions))
                 data = data.take(indices)
 
+        # keep as str if possible as pandas.Index uses object (converts to numpy array)
+        data = maybe_coerce_to_str_type(data, variables)
+
         attrs = dict(first_var.attrs)
         if not shortcut:
             for var in variables:
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index 0d5507b6879..7416cab13ed 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -376,6 +376,30 @@ def test_concat_fill_value(self, fill_value):
         actual = concat(datasets, dim="t", fill_value=fill_value)
         assert_identical(actual, expected)
 
+    @pytest.mark.parametrize("dtype", [str, bytes])
+    @pytest.mark.parametrize("dim", ["x1", "x2"])
+    def test_concat_str_dtype(self, dtype, dim):
+
+        data = np.arange(4).reshape([2, 2])
+
+        da1 = Dataset(
+            {
+                "data": (["x1", "x2"], data),
+                "x1": [0, 1],
+                "x2": np.array(["a", "b"], dtype=dtype),
+            }
+        )
+        da2 = Dataset(
+            {
+                "data": (["x1", "x2"], data),
+                "x1": np.array([1, 2]),
+                "x2": np.array(["c", "d"], dtype=dtype),
+            }
+        )
+        actual = concat([da1, da2], dim=dim)
+
+        assert np.issubdtype(actual.x2.dtype, dtype)
+
 
 class TestConcatDataArray:
     def test_concat(self):
@@ -525,6 +549,26 @@ def test_concat_combine_attrs_kwarg(self):
             actual = concat([da1, da2], dim="x", combine_attrs=combine_attrs)
             assert_identical(actual, expected[combine_attrs])
 
+    @pytest.mark.parametrize("dtype", [str, bytes])
+    @pytest.mark.parametrize("dim", ["x1", "x2"])
+    def test_concat_str_dtype(self, dtype, dim):
+
+        data = np.arange(4).reshape([2, 2])
+
+        da1 = DataArray(
+            data=data,
+            dims=["x1", "x2"],
+            coords={"x1": [0, 1], "x2": np.array(["a", "b"], dtype=dtype)},
+        )
+        da2 = DataArray(
+            data=data,
+            dims=["x1", "x2"],
+            coords={"x1": np.array([1, 2]), "x2": np.array(["c", "d"], dtype=dtype)},
+        )
+        actual = concat([da1, da2], dim=dim)
+
+        assert np.issubdtype(actual.x2.dtype, dtype)
+
 
 @pytest.mark.parametrize("attr1", ({"a": {"meta": [10, 20, 30]}}, {"a": [1, 2, 3]}, {}))
 @pytest.mark.parametrize("attr2", ({"a": [1, 2, 3]}, {}))
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 1ddb97e5419..646b5c5e643 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -1568,6 +1568,19 @@ def test_reindex_fill_value(self, fill_value):
         )
         assert_identical(expected, actual)
 
+    @pytest.mark.parametrize("dtype", [str, bytes])
+    def test_reindex_str_dtype(self, dtype):
+
+        data = DataArray(
+            [1, 2], dims="x", coords={"x": np.array(["a", "b"], dtype=dtype)}
+        )
+
+        actual = data.reindex(x=data.x)
+        expected = data
+
+        assert_identical(expected, actual)
+        assert actual.dtype == expected.dtype
+
     def test_rename(self):
         renamed = self.dv.rename("bar")
         assert_identical(renamed.to_dataset(), self.ds.rename({"foo": "bar"}))
@@ -3423,6 +3436,26 @@ def test_align_without_indexes_errors(self):
                 DataArray([1, 2], coords=[("x", [0, 1])]),
             )
 
+    def test_align_str_dtype(self):
+
+        a = DataArray([0, 1], dims=["x"], coords={"x": ["a", "b"]})
+        b = DataArray([1, 2], dims=["x"], coords={"x": ["b", "c"]})
+
+        expected_a = DataArray(
+            [0, 1, np.NaN], dims=["x"], coords={"x": ["a", "b", "c"]}
+        )
+        expected_b = DataArray(
+            [np.NaN, 1, 2], dims=["x"], coords={"x": ["a", "b", "c"]}
+        )
+
+        actual_a, actual_b = xr.align(a, b, join="outer")
+
+        assert_identical(expected_a, actual_a)
+        assert expected_a.x.dtype == actual_a.x.dtype
+
+        assert_identical(expected_b, actual_b)
+        assert expected_b.x.dtype == actual_b.x.dtype
+
     def test_broadcast_arrays(self):
         x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x")
         y = DataArray([1, 2], coords=[("b", [3, 4])], name="y")
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 099fb5c0515..3c6e4791c40 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -1949,6 +1949,16 @@ def test_reindex_like_fill_value(self, fill_value):
         )
         assert_identical(expected, actual)
 
+    @pytest.mark.parametrize("dtype", [str, bytes])
+    def test_reindex_str_dtype(self, dtype):
+        data = Dataset({"data": ("x", [1, 2]), "x": np.array(["a", "b"], dtype=dtype)})
+
+        actual = data.reindex(x=data.x)
+        expected = data
+
+        assert_identical(expected, actual)
+        assert actual.x.dtype == expected.x.dtype
+
     @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": 2, "bar": 1}])
     def test_align_fill_value(self, fill_value):
         x = Dataset({"foo": DataArray([1, 2], dims=["x"], coords={"x": [1, 2]})})
@@ -2133,6 +2143,22 @@ def test_align_non_unique(self):
         with raises_regex(ValueError, "cannot reindex or align"):
             align(x, y)
 
+    def test_align_str_dtype(self):
+
+        a = Dataset({"foo": ("x", [0, 1]), "x": ["a", "b"]})
+        b = Dataset({"foo": ("x", [1, 2]), "x": ["b", "c"]})
+
+        expected_a = Dataset({"foo": ("x", [0, 1, np.NaN]), "x": ["a", "b", "c"]})
+        expected_b = Dataset({"foo": ("x", [np.NaN, 1, 2]), "x": ["a", "b", "c"]})
+
+        actual_a, actual_b = xr.align(a, b, join="outer")
+
+        assert_identical(expected_a, actual_a)
+        assert expected_a.x.dtype == actual_a.x.dtype
+
+        assert_identical(expected_b, actual_b)
+        assert expected_b.x.dtype == actual_b.x.dtype
+
     def test_broadcast(self):
         ds = Dataset(
             {"foo": 0, "bar": ("x", [1]), "baz": ("y", [2, 3])}, {"c": ("x", [4])}
@@ -3419,6 +3445,14 @@ def test_setitem_align_new_indexes(self):
         )
         assert_identical(ds, expected)
 
+    @pytest.mark.parametrize("dtype", [str, bytes])
+    def test_setitem_str_dtype(self, dtype):
+
+        ds = xr.Dataset(coords={"x": np.array(["x", "y"], dtype=dtype)})
+        ds["foo"] = xr.DataArray(np.array([0, 0]), dims=["x"])
+
+        assert np.issubdtype(ds.x.dtype, dtype)
+
     def test_assign(self):
         ds = Dataset()
         actual = ds.assign(x=[0, 1, 2], y=2)
diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py
index 5f8b1770bd3..bfe7a45ab2c 100644
--- a/xarray/tests/test_utils.py
+++ b/xarray/tests/test_utils.py
@@ -39,6 +39,21 @@ def test_safe_cast_to_index():
         assert expected.dtype == actual.dtype
 
 
+@pytest.mark.parametrize(
+    "a, b, expected", [["a", "b", np.array(["a", "b"])], [1, 2, pd.Index([1, 2])]]
+)
+def test_maybe_coerce_to_str_type(a, b, expected):
+
+    a = np.array([a])
+    b = np.array([b])
+    index = pd.Index(a).append(pd.Index(b))
+
+    actual = utils.maybe_coerce_to_str_type(index, [a, b])
+
+    assert_array_equal(expected, actual)
+    assert expected.dtype == actual.dtype
+
+
 @requires_cftime
 def test_safe_cast_to_index_cftimeindex():
     date_types = _all_cftime_date_types()
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index ecd53aa9bb1..167a78e0820 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -2091,6 +2091,17 @@ def test_concat_multiindex(self):
         assert actual.identical(expected)
         assert isinstance(actual.to_index(), pd.MultiIndex)
 
+    @pytest.mark.parametrize("dtype", [str, bytes])
+    def test_concat_str_dtype(self, dtype):
+
+        a = IndexVariable("x", np.array(["a"], dtype=dtype))
+        b = IndexVariable("x", np.array(["b"], dtype=dtype))
+        expected = IndexVariable("x", np.array(["a", "b"], dtype=dtype))
+
+        actual = IndexVariable.concat([a, b])
+        assert actual.identical(expected)
+        assert np.issubdtype(actual.dtype, dtype)
+
     def test_coordinate_alias(self):
         with pytest.warns(Warning, match="deprecated"):
             x = Coordinate("x", [1, 2, 3])

From 1fb085b919896ee3feee3553bc6f2a2cad6b9f7a Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Mon, 4 Jan 2021 13:52:45 +0100
Subject: [PATCH 2/7] fix doctests

---
 xarray/core/concat.py          | 2 +-
 xarray/core/dataarray.py       | 4 ++--
 xarray/core/dataset.py         | 6 +++---
 xarray/tests/test_dataarray.py | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index ba8058ab02b..5cda5aa903c 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -187,7 +187,7 @@ def concat(
     array([[0, 1, 2],
            [3, 4, 5]])
     Coordinates:
-      * x        (x) object 'a' 'b'
+      * x        (x) <U1 'a' 'b'
       * y        (y) int64 10 20 30
 
     >>> xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim")
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 489642d03f7..6e49c950db0 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1318,8 +1318,8 @@ def broadcast_like(
                [ 2.2408932 ,  1.86755799, -0.97727788],
                [        nan,         nan,         nan]])
         Coordinates:
-          * x        (x) object 'a' 'b' 'c'
-          * y        (y) object 'a' 'b' 'c'
+          * x        (x) <U1 'a' 'b' 'c'
+          * y        (y) <U1 'a' 'b' 'c'
         """
         if exclude is None:
             exclude = set()
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 3bb5cd8b586..55e836728bb 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -2561,7 +2561,7 @@ def reindex(
         <xarray.Dataset>
         Dimensions:      (station: 4)
         Coordinates:
-          * station      (station) object 'boston' 'austin' 'seattle' 'lincoln'
+          * station      (station) <U7 'boston' 'austin' 'seattle' 'lincoln'
         Data variables:
             temperature  (station) float64 10.98 nan 12.06 nan
             pressure     (station) float64 211.8 nan 218.8 nan
@@ -2572,7 +2572,7 @@ def reindex(
         <xarray.Dataset>
         Dimensions:      (station: 4)
         Coordinates:
-          * station      (station) object 'boston' 'austin' 'seattle' 'lincoln'
+          * station      (station) <U7 'boston' 'austin' 'seattle' 'lincoln'
         Data variables:
             temperature  (station) float64 10.98 0.0 12.06 0.0
             pressure     (station) float64 211.8 0.0 218.8 0.0
@@ -2585,7 +2585,7 @@ def reindex(
         <xarray.Dataset>
         Dimensions:      (station: 4)
         Coordinates:
-          * station      (station) object 'boston' 'austin' 'seattle' 'lincoln'
+          * station      (station) <U7 'boston' 'austin' 'seattle' 'lincoln'
         Data variables:
             temperature  (station) float64 10.98 0.0 12.06 0.0
             pressure     (station) float64 211.8 100.0 218.8 100.0
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 96abedb9a1a..f1cd47f3617 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -89,7 +89,7 @@ def test_repr_multiindex(self):
             array([0, 1, 2, 3])
             Coordinates:
               * x        (x) MultiIndex
-              - level_1  (x) object 'a' 'a' 'b' 'b'
+              - level_1  (x) <U1 'a' 'a' 'b' 'b'
               - level_2  (x) int64 1 2 1 2"""
         )
         assert expected == repr(self.mda)

From b93e0adcdb213fd320515d8df0f3986fd725d363 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Mon, 4 Jan 2021 14:19:04 +0100
Subject: [PATCH 3/7] update what's new

---
 doc/whats-new.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 260bfe515b6..3c3546df66f 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -54,8 +54,8 @@ Bug fixes
   By `Anderson Banihirwe <https://github.com/andersy005>`_
 - Fix a crash in orthogonal indexing on geographic coordinates with ``engine='cfgrib'`` (:issue:`4733` :pull:`4737`).
   By `Alessandro Amici <https://github.com/alexamici>`_
-- Coordinates with dtype ``str`` or ``bytes`` now retain their dtype on ``reindex``, ``align``,
-  ``concat``, and ``assign``, previously they were cast to an object dtype 
+- Coordinates with dtype ``str`` or ``bytes`` now retain their dtype on many operations, 
+  e.g. ``reindex``, ``align``, ``concat``, ``assign``, previously they were cast to an object dtype 
   (:issue:`2658` and :issue:`4543`) by `Mathias Hauser <https://github.com/mathause>`_. 
 - Limit number of data rows when printing large datasets. (:issue:`4736`, :pull:`4750`). By `Jimmy Westling <https://github.com/illviljan>`_.
 

From 30984d36695096ee3c4b724db4500cd5afa56275 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Mon, 4 Jan 2021 14:19:29 +0100
Subject: [PATCH 4/7] fix multiindex repr

---
 xarray/tests/test_dataarray.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index f1cd47f3617..96abedb9a1a 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -89,7 +89,7 @@ def test_repr_multiindex(self):
             array([0, 1, 2, 3])
             Coordinates:
               * x        (x) MultiIndex
-              - level_1  (x) <U1 'a' 'a' 'b' 'b'
+              - level_1  (x) object 'a' 'a' 'b' 'b'
               - level_2  (x) int64 1 2 1 2"""
         )
         assert expected == repr(self.mda)

From fcd4eca78550b707dec53c792e4c9da557c969b6 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Mon, 4 Jan 2021 18:16:48 +0100
Subject: [PATCH 5/7] rename function

---
 xarray/core/alignment.py   | 4 ++--
 xarray/core/utils.py       | 2 +-
 xarray/core/variable.py    | 4 ++--
 xarray/tests/test_utils.py | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index ef309d5b1b4..debf3aad96a 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -19,7 +19,7 @@
 
 from . import dtypes, utils
 from .indexing import get_indexer_nd
-from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str_type
+from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str
 from .variable import IndexVariable, Variable
 
 if TYPE_CHECKING:
@@ -321,7 +321,7 @@ def align(
                     raise ValueError(f"indexes along dimension {dim!r} are not equal")
                 index = joiner(matching_indexes)
                 # make sure str coords are not cast to object
-                index = maybe_coerce_to_str_type(index, all_coords[dim])
+                index = maybe_coerce_to_str(index, all_coords[dim])
                 joined_indexes[dim] = index
             else:
                 index = all_coords[dim][0]
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index acb7677301c..06c7c49e9fa 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -79,7 +79,7 @@ def maybe_cast_to_coords_dtype(label, coords_dtype):
     return label
 
 
-def maybe_coerce_to_str_type(index, original_coords):
+def maybe_coerce_to_str(index, original_coords):
     """maybe coerce a pandas Index back to a nunpy array of type str
 
     pd.Index uses object-dtype to store str - try to avoid this for coords
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index bd503f17e2e..797de65bbcf 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -48,7 +48,7 @@
     ensure_us_time_resolution,
     infix_dims,
     is_duck_array,
-    maybe_coerce_to_str_type,
+    maybe_coerce_to_str,
 )
 
 NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
@@ -2525,7 +2525,7 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
                 data = data.take(indices)
 
         # keep as str if possible as pandas.Index uses object (converts to numpy array)
-        data = maybe_coerce_to_str_type(data, variables)
+        data = maybe_coerce_to_str(data, variables)
 
         attrs = dict(first_var.attrs)
         if not shortcut:
diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py
index bfe7a45ab2c..eecab755fa4 100644
--- a/xarray/tests/test_utils.py
+++ b/xarray/tests/test_utils.py
@@ -42,13 +42,13 @@ def test_safe_cast_to_index():
 @pytest.mark.parametrize(
     "a, b, expected", [["a", "b", np.array(["a", "b"])], [1, 2, pd.Index([1, 2])]]
 )
-def test_maybe_coerce_to_str_type(a, b, expected):
+def test_maybe_coerce_to_str(a, b, expected):
 
     a = np.array([a])
     b = np.array([b])
     index = pd.Index(a).append(pd.Index(b))
 
-    actual = utils.maybe_coerce_to_str_type(index, [a, b])
+    actual = utils.maybe_coerce_to_str(index, [a, b])
 
     assert_array_equal(expected, actual)
     assert expected.dtype == actual.dtype

From 96d4e7d9077bc027ea8d77e0edb9a14cbc311313 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Tue, 5 Jan 2021 19:28:29 +0100
Subject: [PATCH 6/7] ensure minimum str dtype

---
 xarray/core/utils.py       |  2 +-
 xarray/tests/test_utils.py | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 06c7c49e9fa..02db8955285 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -91,7 +91,7 @@ def maybe_coerce_to_str(index, original_coords):
         pass
     else:
         if result_type.kind in "SU":
-            index = np.asarray(index, dtype=result_type)
+            index = np.asarray(index, dtype=result_type.type)
 
     return index
 
diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py
index eecab755fa4..193c45f01cd 100644
--- a/xarray/tests/test_utils.py
+++ b/xarray/tests/test_utils.py
@@ -54,6 +54,18 @@ def test_maybe_coerce_to_str(a, b, expected):
     assert expected.dtype == actual.dtype
 
 
+def test_maybe_coerce_to_str_minimal_str_dtype():
+
+    a = np.array(["a", "a_long_string"])
+    index = pd.Index(["a"])
+
+    actual = utils.maybe_coerce_to_str(index, [a])
+    expected = np.array("a")
+
+    assert_array_equal(expected, actual)
+    assert expected.dtype == actual.dtype
+
+
 @requires_cftime
 def test_safe_cast_to_index_cftimeindex():
     date_types = _all_cftime_date_types()

From 4f23a3c1d18286adcd6677e44a030b1547cd9435 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Tue, 12 Jan 2021 23:53:09 +0100
Subject: [PATCH 7/7] fix EOL spaces

---
 doc/whats-new.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 54eaa9b1e10..398c332433f 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -66,9 +66,9 @@ Bug fixes
   By `Anderson Banihirwe <https://github.com/andersy005>`_
 - Fix a crash in orthogonal indexing on geographic coordinates with ``engine='cfgrib'`` (:issue:`4733` :pull:`4737`).
   By `Alessandro Amici <https://github.com/alexamici>`_
-- Coordinates with dtype ``str`` or ``bytes`` now retain their dtype on many operations, 
-  e.g. ``reindex``, ``align``, ``concat``, ``assign``, previously they were cast to an object dtype 
-  (:issue:`2658` and :issue:`4543`) by `Mathias Hauser <https://github.com/mathause>`_. 
+- Coordinates with dtype ``str`` or ``bytes`` now retain their dtype on many operations,
+  e.g. ``reindex``, ``align``, ``concat``, ``assign``, previously they were cast to an object dtype
+  (:issue:`2658` and :issue:`4543`) by `Mathias Hauser <https://github.com/mathause>`_.
 - Limit number of data rows when printing large datasets. (:issue:`4736`, :pull:`4750`). By `Jimmy Westling <https://github.com/illviljan>`_.
 - Add ``missing_dims`` parameter to transpose (:issue:`4647`, :pull:`4767`). By `Daniel Mesejo <https://github.com/mesejo>`_.
 - Resolve intervals before appending other metadata to labels when plotting (:issue:`4322`, :pull:`4794`).