diff --git a/design_notes/flexible_indexes_notes.md b/design_notes/flexible_indexes_notes.md index b36ce3e46ed..f4a2c1c2125 100644 --- a/design_notes/flexible_indexes_notes.md +++ b/design_notes/flexible_indexes_notes.md @@ -71,7 +71,7 @@ An `XarrayIndex` subclass must/should/may implement the following properties/met - a `data` property to access index's data and map it to coordinate data (see [Section 4](#4-indexvariable)) - a `__getitem__()` implementation to propagate the index through DataArray/Dataset indexing operations - `equals()`, `union()` and `intersection()` methods for data alignment (see [Section 2.6](#26-using-indexes-for-data-alignment)) -- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coodinates)) +- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coordinates)) - a method that may return a new index and that will be called when one of the corresponding coordinates is dropped from the Dataset/DataArray (multi-coordinate indexes) - `encode()`/`decode()` methods that would allow storage-agnostic serialization and fast-path reconstruction of the underlying index object(s) (see [Section 2.8](#28-index-encoding)) - one or more "non-standard" methods or properties that could be leveraged in Xarray 3rd-party extensions like Dataset/DataArray accessors (see [Section 2.7](#27-using-indexes-for-other-purposes)) diff --git a/design_notes/grouper_objects.md b/design_notes/grouper_objects.md index af42ef2f493..508ed5e9716 100644 --- a/design_notes/grouper_objects.md +++ b/design_notes/grouper_objects.md @@ -166,7 +166,7 @@ where `|` represents chunk boundaries. A simple rechunking to ``` 000|111122|3333 ``` -would make this resampling reduction an embarassingly parallel blockwise problem. +would make this resampling reduction an embarrassingly parallel blockwise problem. Similarly consider monthly-mean climatologies for which the month numbers might be ``` diff --git a/design_notes/named_array_design_doc.md b/design_notes/named_array_design_doc.md index 074f8cf17e7..0050471cd01 100644 --- a/design_notes/named_array_design_doc.md +++ b/design_notes/named_array_design_doc.md @@ -258,7 +258,7 @@ Questions: Variable.coarsen_reshape Variable.rolling_window - Variable.set_dims # split this into broadcas_to and expand_dims + Variable.set_dims # split this into broadcast_to and expand_dims # Reordering/Reshaping diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst index f71969066f9..5c421aa51d8 100644 --- a/doc/user-guide/dask.rst +++ b/doc/user-guide/dask.rst @@ -298,7 +298,7 @@ Automatic parallelization with ``apply_ufunc`` and ``map_blocks`` .. tip:: - Some problems can become embarassingly parallel and thus easy to parallelize + Some problems can become embarrassingly parallel and thus easy to parallelize automatically by rechunking to a frequency, e.g. ``ds.chunk(time=TimeResampler("YE"))``. See :py:meth:`Dataset.chunk` for more. @@ -559,7 +559,7 @@ larger chunksizes. .. tip:: - Many time domain problems become amenable to an embarassingly parallel or blockwise solution + Many time domain problems become amenable to an embarrassingly parallel or blockwise solution (e.g. using :py:func:`xarray.map_blocks`, :py:func:`dask.array.map_blocks`, or :py:func:`dask.array.blockwise`) by rechunking to a frequency along the time dimension. Provide :py:class:`xarray.groupers.TimeResampler` objects to :py:meth:`Dataset.chunk` to do so. diff --git a/doc/user-guide/data-structures.rst b/doc/user-guide/data-structures.rst index a1794f4123d..b963ccf0b00 100644 --- a/doc/user-guide/data-structures.rst +++ b/doc/user-guide/data-structures.rst @@ -289,7 +289,7 @@ pressure that were made under various conditions: * the measurements were made on four different days; * they were made at two separate locations, which we will represent using their latitude and longitude; and -* they were made using instruments by three different manufacutrers, which we +* they were made using instruments by three different manufacturers, which we will refer to as `'manufac1'`, `'manufac2'`, and `'manufac3'`. .. ipython:: python diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst index 26fa7ea5c0c..30939cbbd17 100644 --- a/doc/user-guide/pandas.rst +++ b/doc/user-guide/pandas.rst @@ -120,7 +120,7 @@ Particularly after a roundtrip, the following deviations are noted: - a non-dimension Dataset ``coordinate`` is converted into ``variable`` - a non-dimension DataArray ``coordinate`` is not converted -- ``dtype`` is not allways the same (e.g. "str" is converted to "object") +- ``dtype`` is not always the same (e.g. "str" is converted to "object") - ``attrs`` metadata is not conserved To avoid these problems, the third-party `ntv-pandas `__ library offers lossless and reversible conversions between diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 54fea2b73ea..99099a135a1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -118,7 +118,7 @@ New Features (:issue:`6610`, :pull:`8840`). By `Deepak Cherian `_. - Allow rechunking to a frequency using ``Dataset.chunk(time=TimeResampler("YE"))`` syntax. (:issue:`7559`, :pull:`9109`) - Such rechunking allows many time domain analyses to be executed in an embarassingly parallel fashion. + Such rechunking allows many time domain analyses to be executed in an embarrassingly parallel fashion. By `Deepak Cherian `_. - Allow per-variable specification of ```mask_and_scale``, ``decode_times``, ``decode_timedelta`` ``use_cftime`` and ``concat_characters`` params in :py:func:`~xarray.open_dataset` (:pull:`9218`). @@ -151,7 +151,7 @@ Breaking changes Bug fixes ~~~~~~~~~ -- Fix scatter plot broadcasting unneccesarily. (:issue:`9129`, :pull:`9206`) +- Fix scatter plot broadcasting unnecessarily. (:issue:`9129`, :pull:`9206`) By `Jimmy Westling `_. - Don't convert custom indexes to ``pandas`` indexes when computing a diff (:pull:`9157`) By `Justus Magin `_. @@ -614,7 +614,7 @@ Internal Changes ~~~~~~~~~~~~~~~~ - The implementation of :py:func:`map_blocks` has changed to minimize graph size and duplication of data. - This should be a strict improvement even though the graphs are not always embarassingly parallel any more. + This should be a strict improvement even though the graphs are not always embarrassingly parallel any more. Please open an issue if you spot a regression. (:pull:`8412`, :issue:`8409`). By `Deepak Cherian `_. - Remove null values before plotting. (:pull:`8535`). diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index f7bed2c13ef..fc3fe762cf7 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -739,7 +739,7 @@ def _generate_anchored_deprecated_frequencies( return pairs -_DEPRECATED_FREQUENICES: dict[str, str] = { +_DEPRECATED_FREQUENCIES: dict[str, str] = { "A": "YE", "Y": "YE", "AS": "YS", @@ -765,7 +765,7 @@ def _generate_anchored_deprecated_frequencies( def _emit_freq_deprecation_warning(deprecated_freq): - recommended_freq = _DEPRECATED_FREQUENICES[deprecated_freq] + recommended_freq = _DEPRECATED_FREQUENCIES[deprecated_freq] message = _DEPRECATION_MESSAGE.format( deprecated_freq=deprecated_freq, recommended_freq=recommended_freq ) @@ -784,7 +784,7 @@ def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffs freq_data = match.groupdict() freq = freq_data["freq"] - if warn and freq in _DEPRECATED_FREQUENICES: + if warn and freq in _DEPRECATED_FREQUENCIES: _emit_freq_deprecation_warning(freq) multiples = freq_data["multiple"] multiples = 1 if multiples is None else int(multiples) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a7b52dc0185..d6381e3fe01 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -9749,7 +9749,7 @@ def eval( Calculate an expression supplied as a string in the context of the dataset. This is currently experimental; the API may change particularly around - assignments, which currently returnn a ``Dataset`` with the additional variable. + assignments, which currently return a ``Dataset`` with the additional variable. Currently only the ``python`` engine is supported, which has the same performance as executing in python. diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 59984c5afa3..9df66031440 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -1520,7 +1520,7 @@ def to_netcdf( mode : {"w", "a"}, default: "w" Write ('w') or append ('a') mode. If mode='w', any existing file at this location will be overwritten. If mode='a', existing variables - will be overwritten. Only appies to the root group. + will be overwritten. Only applies to the root group. encoding : dict, optional Nested dictionary with variable names as keys and dictionaries of variable specific encodings as values, e.g., diff --git a/xarray/core/datatree_ops.py b/xarray/core/datatree_ops.py index bc64b44ae1e..a44700e2bf8 100644 --- a/xarray/core/datatree_ops.py +++ b/xarray/core/datatree_ops.py @@ -224,7 +224,7 @@ def insert_doc_addendum(docstring: str | None, addendum: str) -> str | None: Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin. The majority of the docstrings fall into a parseable pattern. Those that - don't, just have the addendum appeneded after. None values are returned. + don't, just have the addendum appended after. None values are returned. """ if docstring is None: diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 80d15f8cde9..d92ef72246e 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1802,7 +1802,7 @@ def check_variables(): def _apply_indexes_fast(indexes: Indexes[Index], args: Mapping[Any, Any], func: str): # This function avoids the call to indexes.group_by_index - # which is really slow when repeatidly iterating through + # which is really slow when repeatedly iterating through # an array. However, it fails to return the correct ID for # multi-index arrays indexes_fast, coords = indexes._indexes, indexes._variables diff --git a/xarray/core/merge.py b/xarray/core/merge.py index d1eaa43ad89..b3b50ec5ef7 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -267,7 +267,7 @@ def merge_collected( index, other_index, variable, other_var, index_cmp_cache ): raise MergeError( - f"conflicting values/indexes on objects to be combined fo coordinate {name!r}\n" + f"conflicting values/indexes on objects to be combined for coordinate {name!r}\n" f"first index: {index!r}\nsecond index: {other_index!r}\n" f"first variable: {variable!r}\nsecond variable: {other_var!r}\n" ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f63692c2222..1f2911a9930 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1658,7 +1658,7 @@ def reduce( # type: ignore[override] _get_keep_attrs(default=False) if keep_attrs is None else keep_attrs ) - # Noe that the call order for Variable.mean is + # Note that the call order for Variable.mean is # Variable.mean -> NamedArray.mean -> Variable.reduce # -> NamedArray.reduce result = super().reduce( diff --git a/xarray/datatree_/docs/source/data-structures.rst b/xarray/datatree_/docs/source/data-structures.rst index 02e4a31f688..90b786701cc 100644 --- a/xarray/datatree_/docs/source/data-structures.rst +++ b/xarray/datatree_/docs/source/data-structures.rst @@ -40,7 +40,7 @@ stored under hashable keys), and so has the same key properties: - ``dims``: a dictionary mapping of dimension names to lengths, for the variables in this node, - ``data_vars``: a dict-like container of DataArrays corresponding to variables in this node, - ``coords``: another dict-like container of DataArrays, corresponding to coordinate variables in this node, -- ``attrs``: dict to hold arbitary metadata relevant to data in this node. +- ``attrs``: dict to hold arbitrary metadata relevant to data in this node. A single ``DataTree`` object acts much like a single ``Dataset`` object, and has a similar set of dict-like methods defined upon it. However, ``DataTree``'s can also contain other ``DataTree`` objects, so they can be thought of as nested dict-like diff --git a/xarray/datatree_/docs/source/hierarchical-data.rst b/xarray/datatree_/docs/source/hierarchical-data.rst index d4f58847718..ceb3fc46b44 100644 --- a/xarray/datatree_/docs/source/hierarchical-data.rst +++ b/xarray/datatree_/docs/source/hierarchical-data.rst @@ -133,7 +133,7 @@ We can add Herbert to the family tree without displacing Homer by :py:meth:`~Dat .. note:: This example shows a minor subtlety - the returned tree has Homer's brother listed as ``"Herbert"``, - but the original node was named "Herbert". Not only are names overriden when stored as keys like this, + but the original node was named "Herbert". Not only are names overridden when stored as keys like this, but the new node is a copy, so that the original node that was reference is unchanged (i.e. ``herbert.name == "Herb"`` still). In other words, nodes are copied into trees, not inserted into them. This is intentional, and mirrors the behaviour when storing named ``xarray.DataArray`` objects inside datasets. diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py index e3453b82fdc..04d298efcce 100644 --- a/xarray/plot/dataset_plot.py +++ b/xarray/plot/dataset_plot.py @@ -737,7 +737,7 @@ def _temp_dataarray(ds: Dataset, y: Hashable, locals_: dict[str, Any]) -> DataAr coords[key] = darray dims.update(darray.dims) - # Trim dataset from unneccessary dims: + # Trim dataset from unnecessary dims: ds_trimmed = ds.drop_dims(ds.sizes.keys() - dims) # TODO: Use ds.dims in the future # The dataarray has to include all the dims. Broadcast to that shape diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index a037123a46f..306ad1327c4 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -1170,7 +1170,7 @@ def _legend_add_subtitle(handles, labels, text): if text and len(handles) > 1: # Create a blank handle that's not visible, the - # invisibillity will be used to discern which are subtitles + # invisibility will be used to discern which are subtitles # or not: blank_handle = plt.Line2D([], [], label=text) blank_handle.set_visible(False) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 2e906b2286d..17b4d2c5ba7 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5041,9 +5041,10 @@ def test_extract_nc4_variable_encoding_netcdf4(self): var = xr.Variable(("x",), [1, 2, 3], {}, {"compression": "szlib"}) _extract_nc4_variable_encoding(var, backend="netCDF4", raise_on_invalid=True) + @pytest.mark.xfail def test_extract_h5nc_encoding(self) -> None: # not supported with h5netcdf (yet) - var = xr.Variable(("x",), [1, 2, 3], {}, {"least_sigificant_digit": 2}) + var = xr.Variable(("x",), [1, 2, 3], {}, {"least_significant_digit": 2}) with pytest.raises(ValueError, match=r"unexpected encoding"): _extract_nc4_variable_encoding(var, raise_on_invalid=True) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 653e6dec43b..4cb0e3fc593 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1797,6 +1797,6 @@ def test_minimize_graph_size(): actual = len([key for key in graph if var in key[0]]) # assert that we only include each chunk of an index variable # is only included once, not the product of number of chunks of - # all the other dimenions. + # all the other dimensions. # e.g. previously for 'x', actual == numchunks['y'] * numchunks['z'] assert actual == numchunks[var], (actual, numchunks[var]) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 22b43f0aa04..3d1bb065193 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6650,8 +6650,8 @@ def test_to_and_from_iris(self) -> None: ), ) - for coord, orginal_key in zip((actual.coords()), original.coords): - original_coord = original.coords[orginal_key] + for coord, original_key in zip((actual.coords()), original.coords): + original_coord = original.coords[original_key] assert coord.var_name == original_coord.name assert_array_equal( coord.points, CFDatetimeCoder().encode(original_coord.variable).values @@ -6726,8 +6726,8 @@ def test_to_and_from_iris_dask(self) -> None: ), ) - for coord, orginal_key in zip((actual.coords()), original.coords): - original_coord = original.coords[orginal_key] + for coord, original_key in zip((actual.coords()), original.coords): + original_coord = original.coords[original_key] assert coord.var_name == original_coord.name assert_array_equal( coord.points, CFDatetimeCoder().encode(original_coord.variable).values diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 838185bc6b3..960de3ec29e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6742,7 +6742,7 @@ def test_pad(self, padded_dim_name, constant_values) -> None: else: np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim) - # check if coord "numbers" with dimention dim3 is paded correctly + # check if coord "numbers" with dimension dim3 is padded correctly if padded_dim_name == "dim3": assert padded["numbers"][[0, -1]].isnull().all() # twarning: passes but dtype changes from int to float diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index ef8d6e9472f..c410f3a2fd5 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -2004,7 +2004,7 @@ def test_plot_rgba_image_transposed(self) -> None: easy_array((4, 10, 15), start=0), dims=["band", "y", "x"] ).plot.imshow() - def test_warns_ambigious_dim(self) -> None: + def test_warns_ambiguous_dim(self) -> None: arr = DataArray(easy_array((3, 3, 3)), dims=["y", "x", "band"]) with pytest.warns(UserWarning): arr.plot.imshow() diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index a1d8994a736..5f7ee266774 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -576,7 +576,7 @@ def test_copy_deep_recursive(self) -> None: # lets just ensure that deep copy works without RecursionError v.copy(deep=True) - # indirect recusrion + # indirect recursion v2 = self.cls("y", [2, 3]) v.attrs["other"] = v2 v2.attrs["other"] = v @@ -654,7 +654,7 @@ def test_aggregate_complex(self): expected = Variable((), 0.5 + 1j) assert_allclose(v.mean(), expected) - def test_pandas_cateogrical_dtype(self): + def test_pandas_categorical_dtype(self): data = pd.Categorical(np.arange(10, dtype="int64")) v = self.cls("x", data) print(v) # should not error @@ -1575,13 +1575,13 @@ def test_transpose_0d(self): actual = variable.transpose() assert_identical(actual, variable) - def test_pandas_cateogrical_dtype(self): + def test_pandas_categorical_dtype(self): data = pd.Categorical(np.arange(10, dtype="int64")) v = self.cls("x", data) print(v) # should not error assert pd.api.types.is_extension_array_dtype(v.dtype) - def test_pandas_cateogrical_no_chunk(self): + def test_pandas_categorical_no_chunk(self): data = pd.Categorical(np.arange(10, dtype="int64")) v = self.cls("x", data) with pytest.raises( @@ -2386,7 +2386,7 @@ def test_multiindex(self): def test_pad(self, mode, xr_arg, np_arg): super().test_pad(mode, xr_arg, np_arg) - def test_pandas_cateogrical_dtype(self): + def test_pandas_categorical_dtype(self): data = pd.Categorical(np.arange(10, dtype="int64")) with pytest.raises(ValueError, match="was found to be a Pandas ExtensionArray"): self.cls("x", data)