pydata · max-sixty · Mar 28, 2020 · Mar 27, 2020 · Mar 28, 2020 · keewis
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -35,6 +35,9 @@ New Features
   :py:func:`combine_by_coords` and :py:func:`combine_nested` using
   combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
   By `John Omotani <https://github.com/johnomotani>`_
+- Limited the length of array items with long string reprs to a
+  reasonable width (:pull:`3900`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
 
 
 Bug fixes

diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
@@ -4,6 +4,7 @@
 import functools
 from datetime import datetime, timedelta
 from itertools import zip_longest
+from typing import Hashable
 
 import numpy as np
 import pandas as pd
@@ -14,7 +15,7 @@
 from .pycompat import dask_array_type, sparse_array_type
 
 
-def pretty_print(x, numchars):
+def pretty_print(x, numchars: int):
     """Given an object `x`, call `str(x)` and format the returned string so
     that it is numchars long, padding with trailing spaces or truncating with
     ellipses as necessary
@@ -163,7 +164,7 @@ def format_items(x):
     return formatted
 
 
-def format_array_flat(array, max_width):
+def format_array_flat(array, max_width: int):
     """Return a formatted string for as many items in the flattened version of
     array that will fit within max_width characters.
     """
@@ -198,11 +199,20 @@ def format_array_flat(array, max_width):
     num_back = count - num_front
     # note that num_back is 0 <--> array.size is 0 or 1
     #                         <--> relevant_back_items is []
-    pprint_str = (
-        " ".join(relevant_front_items[:num_front])
-        + padding
-        + " ".join(relevant_back_items[-num_back:])
+    pprint_str = "".join(
+        [
+            " ".join(relevant_front_items[:num_front]),
+            padding,
+            " ".join(relevant_back_items[-num_back:]),
+        ]
     )
+
+    # As a final check, if it's still too long even with the limit in values,
+    # replace the end with an ellipsis
+    # NB: this will still returns a full 3-character ellipsis when max_width < 3
+    if len(pprint_str) > max_width:
+        pprint_str = pprint_str[: max(max_width - 3, 0)] + "..."
-        pprint_str = pprint_str[: max(max_width - 3, 0)] + "..."
+        pprint_str = pprint_str[: max(max_width - 3, 0)] + padding
-        pprint_str = pprint_str[: max(max_width - 3, 0)] + "..."
+        pprint_str = pprint_str[: max(max_width - 3, 0)] + padding
+
     return pprint_str
 
 
@@ -258,10 +268,16 @@ def inline_variable_array_repr(var, max_width):
         return "..."
 
 
-def summarize_variable(name, var, col_width, marker=" ", max_width=None):
+def summarize_variable(
+    name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None
+):
     """Summarize a variable in one line, e.g., for the Dataset.__repr__."""
     if max_width is None:
-        max_width = OPTIONS["display_width"]
+        max_width_options = OPTIONS["display_width"]
+        if not isinstance(max_width_options, int):
+            raise TypeError(f"`max_width` value of `{max_width}` is not a valid int")
+        else:
+            max_width = max_width_options
     first_col = pretty_print(f"  {marker} {name} ", col_width)
     if var.dims:
         dims_str = "({}) ".format(", ".join(map(str, var.dims)))
@@ -295,7 +311,7 @@ def summarize_datavar(name, var, col_width):
     return summarize_variable(name, var.variable, col_width)
 
 
-def summarize_coord(name, var, col_width):
+def summarize_coord(name: Hashable, var, col_width: int):
     is_index = name in var.dims
     marker = "*" if is_index else " "
     if is_index:

diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py
@@ -115,7 +115,7 @@ def test_format_items(self):
 
     def test_format_array_flat(self):
         actual = formatting.format_array_flat(np.arange(100), 2)
-        expected = "0 ... 99"
+        expected = "..."
         assert expected == actual
 
         actual = formatting.format_array_flat(np.arange(100), 9)
@@ -134,11 +134,13 @@ def test_format_array_flat(self):
         expected = "0 1 2 ... 98 99"
         assert expected == actual
 
+        # NB: Probably not ideal; an alternative would be cutting after the
+        # first ellipsis
         actual = formatting.format_array_flat(np.arange(100.0), 11)
-        expected = "0.0 ... 99.0"
+        expected = "0.0 ... ..."
         assert expected == actual
 
-        actual = formatting.format_array_flat(np.arange(100.0), 1)
+        actual = formatting.format_array_flat(np.arange(100.0), 12)
         expected = "0.0 ... 99.0"
         assert expected == actual
 
@@ -154,16 +156,25 @@ def test_format_array_flat(self):
         expected = ""
         assert expected == actual
 
-        actual = formatting.format_array_flat(np.arange(1), 0)
+        actual = formatting.format_array_flat(np.arange(1), 1)
         expected = "0"
         assert expected == actual
 
-        actual = formatting.format_array_flat(np.arange(2), 0)
+        actual = formatting.format_array_flat(np.arange(2), 3)
         expected = "0 1"
         assert expected == actual
 
-        actual = formatting.format_array_flat(np.arange(4), 0)
-        expected = "0 ... 3"
+        actual = formatting.format_array_flat(np.arange(4), 7)
+        expected = "0 1 2 3"
+        assert expected == actual
+
+        actual = formatting.format_array_flat(np.arange(5), 7)
+        expected = "0 ... 4"
+        assert expected == actual
+
+        long_str = [" ".join(["hello world" for _ in range(100)])]
+        actual = formatting.format_array_flat(np.asarray([long_str]), 21)
+        expected = "'hello world hello..."
         assert expected == actual
 
     def test_pretty_print(self):