Skip to content

Commit

Permalink
io: Add more tests for to_io utility function
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Dec 4, 2023
1 parent dc53919 commit 692a135
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 4 deletions.
16 changes: 13 additions & 3 deletions pueblo/io/universal.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,16 @@ def open_url(url: str) -> PathPlus:
-----------
fs = Path("github://path/to/document.md", username="foobar", token="ghp_lalala", org="acme", repo="sweet-camino")
"""
uri = URL(url)
uri = None
try:
uri = URL(url)
except ValueError as ex:
if "host is required for absolute urls" in str(ex):
pass
else:
raise

if uri.scheme.startswith("github+https"):
if uri and uri.scheme.startswith("github+https"):
path_fragments = uri.path.split("/")[1:]
path_kwargs = {
"username": uri.user,
Expand All @@ -60,7 +67,10 @@ def open_url(url: str) -> PathPlus:
path = PathPlus(downstream_url, **path_kwargs)

else:
path = PathPlus(url)
kwargs = {}
if url.startswith("s3://"):
kwargs["anon"] = True
path = PathPlus(url, **kwargs)
return path


Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ develop = [
"validate-pyproject<0.16",
]
fileio = [
"fsspec[adlfs,dask,gcs,git,github,http,s3,smb]<2023.11",
"fsspec[abfs,dask,gcs,git,github,http,s3,smb]<2023.11",
"pathlibfs<0.6",
"python-magic<0.5",
"yarl<1.10",
]
nlp = [
Expand Down
43 changes: 43 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# ruff: noqa: E402
import dataclasses
import typing as t

import magic
import pytest

pytest.importorskip("pathlibfs")
Expand Down Expand Up @@ -60,3 +64,42 @@ def test_path_without_scheme_absolute():

def test_path_without_scheme_relative():
assert path_without_scheme("/bar/baz") == PathPlus("file:///bar/baz")


@dataclasses.dataclass
class RemoteFile:
url: str
mimetypes: t.List[str]


def remote_files() -> t.List[RemoteFile]:
return [
RemoteFile(
url="https://github.com/daq-tools/skeem/raw/main/tests/testdata/basic.ods",
mimetypes=["application/vnd.oasis.opendocument.spreadsheet"],
),
RemoteFile(
url="github://daq-tools:skeem@/tests/testdata/basic.ods",
mimetypes=["application/vnd.oasis.opendocument.spreadsheet"],
),
RemoteFile(
url="github+https://github.com/daq-tools/skeem/raw/main/tests/testdata/basic.ods",
mimetypes=["application/vnd.oasis.opendocument.spreadsheet"],
),
RemoteFile(
url="gs://gcp-public-data-landsat/LC08/01/001/003/LC08_L1GT_001003_20140812_20170420_01_T2/LC08_L1GT_001003_20140812_20170420_01_T2_B3.TIF",
mimetypes=["image/tiff"],
),
RemoteFile(
url="s3://fmi-gridded-obs-daily-1km/Netcdf/Tday/tday_2023.nc",
mimetypes=["application/x-netcdf", "application/octet-stream"],
),
]


@pytest.mark.parametrize("remote_file", remote_files(), ids=[rf.url for rf in remote_files()])
def test_to_io_remote_files(remote_file):
with to_io(remote_file.url, mode="rb") as fp:
content = fp.read(100)
mimetype = magic.from_buffer(content, mime=True)
assert mimetype in remote_file.mimetypes

0 comments on commit 692a135

Please sign in to comment.