Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move absolute path finder from open_mfdataset to own function #7968

Merged
merged 9 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 7 additions & 33 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence
from functools import partial
from glob import glob
from io import BytesIO
from numbers import Number
from typing import (
Expand All @@ -21,7 +20,12 @@

from xarray import backends, conventions
from xarray.backends import plugins
from xarray.backends.common import AbstractDataStore, ArrayWriter, _normalize_path
from xarray.backends.common import (
AbstractDataStore,
ArrayWriter,
_find_absolute_paths,
_normalize_path,
)
from xarray.backends.locks import _get_scheduler
from xarray.core import indexing
from xarray.core.combine import (
Expand Down Expand Up @@ -967,37 +971,7 @@ def open_mfdataset(
.. [1] https://docs.xarray.dev/en/stable/dask.html
.. [2] https://docs.xarray.dev/en/stable/dask.html#chunking-and-performance
"""
if isinstance(paths, str):
if is_remote_uri(paths) and engine == "zarr":
try:
from fsspec.core import get_fs_token_paths
except ImportError as e:
raise ImportError(
"The use of remote URLs for opening zarr requires the package fsspec"
) from e

fs, _, _ = get_fs_token_paths(
paths,
mode="rb",
storage_options=kwargs.get("backend_kwargs", {}).get(
"storage_options", {}
),
expand=False,
)
tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories
paths = [fs.get_mapper(path) for path in tmp_paths]
elif is_remote_uri(paths):
raise ValueError(
"cannot do wild-card matching for paths that are remote URLs "
f"unless engine='zarr' is specified. Got paths: {paths}. "
"Instead, supply paths as an explicit list of strings."
)
else:
paths = sorted(glob(_normalize_path(paths)))
elif isinstance(paths, os.PathLike):
paths = [os.fspath(paths)]
else:
paths = [os.fspath(p) if isinstance(p, os.PathLike) else p for p in paths]
paths = _find_absolute_paths(paths, engine=engine, **kwargs)

if not paths:
raise OSError("no files to open")
Expand Down
78 changes: 78 additions & 0 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
import traceback
from collections.abc import Iterable
from glob import glob
from typing import TYPE_CHECKING, Any, ClassVar

import numpy as np
Expand All @@ -19,6 +20,7 @@
from io import BufferedIOBase

from xarray.core.dataset import Dataset
from xarray.core.types import NestedSequence

# Create a logger object, but don't add any handlers. Leave that to user code.
logger = logging.getLogger(__name__)
Expand All @@ -28,6 +30,24 @@


def _normalize_path(path):
"""
Normalize pathlikes to string.

Parameters
----------
path :
Path to file.

Examples
--------
>>> from pathlib import Path

>>> directory = Path(xr.backends.common.__file__).parent
>>> paths_path = Path(directory).joinpath("comm*n.py")
>>> paths_str = xr.backends.common._normalize_path(paths_path)
>>> print([type(p) for p in (paths_str,)])
[<class 'str'>]
"""
if isinstance(path, os.PathLike):
path = os.fspath(path)

Expand All @@ -37,6 +57,64 @@ def _normalize_path(path):
return path


def _find_absolute_paths(
paths: str | os.PathLike | NestedSequence[str | os.PathLike], **kwargs
) -> list[str]:
"""
Find absolute paths from the pattern.

Parameters
----------
paths :
Path(s) to file(s). Can include wildcards like * .
**kwargs :
Extra kwargs. Mainly for fsspec.

Examples
--------
>>> from pathlib import Path

>>> directory = Path(xr.backends.common.__file__).parent
>>> paths = str(Path(directory).joinpath("comm*n.py")) # Find common with wildcard
>>> paths = xr.backends.common._find_absolute_paths(paths)
>>> [Path(p).name for p in paths]
['common.py']
"""
if isinstance(paths, str):
if is_remote_uri(paths) and kwargs.get("engine", None) == "zarr":
try:
from fsspec.core import get_fs_token_paths
except ImportError as e:
raise ImportError(
"The use of remote URLs for opening zarr requires the package fsspec"
) from e

fs, _, _ = get_fs_token_paths(
paths,
mode="rb",
storage_options=kwargs.get("backend_kwargs", {}).get(
"storage_options", {}
),
expand=False,
)
tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories
paths = [fs.get_mapper(path) for path in tmp_paths]
elif is_remote_uri(paths):
raise ValueError(
"cannot do wild-card matching for paths that are remote URLs "
f"unless engine='zarr' is specified. Got paths: {paths}. "
"Instead, supply paths as an explicit list of strings."
)
else:
paths = sorted(glob(_normalize_path(paths)))
elif isinstance(paths, os.PathLike):
paths = [os.fspath(paths)]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that this is just moved from the previous location, but is there a reason why we don't convert to absolute path here and below?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, it's a little odd to me as well.

I haven't really felt the need for os.PathLike in my normal usage so I don't have much experience with that part though.

else:
paths = [os.fspath(p) if isinstance(p, os.PathLike) else p for p in paths]

return paths


def _encode_variable_name(name):
if name is None:
name = NONE_VAR_NAME
Expand Down