-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add sunpy style sample data downloading (#421)
* Add sunpy style sample data downloading * Random cleanup: don't write files to working directory * Attempt to override data download on RTD * Fix a whoopise * Fix warning and error * Add some very basic tests * Change doc paths to match sample data * rtd debug * Feck you RTD * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add changelog --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
e5fd787
commit 0f19a59
Showing
13 changed files
with
202 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Added two partial datasets to `dkist.data.sample` for documentation and testing. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import os | ||
import tarfile | ||
from pathlib import Path | ||
from urllib.parse import urljoin | ||
|
||
from parfive import Downloader, Results | ||
|
||
from astropy.io import fits | ||
|
||
from dkist import conf | ||
|
||
VISP_HEADER = fits.Header.fromtextfile(Path(__file__).parent / "VISP_HEADER.hdr") | ||
_SAMPLE_DATASETS = { | ||
"VISP_BKPLX": ("https://g-a36282.cd214.a567.data.globus.org/user_tools_tutorial_data/", "BKPLX_stokesI.tar"), | ||
"VBI_AJQWW": ("https://g-a36282.cd214.a567.data.globus.org/user_tools_tutorial_data/", "AJQWW_single_mosaic.tar"), | ||
} | ||
|
||
|
||
def _download_and_extract_sample_data(names, overwrite, path): | ||
""" | ||
Downloads a list of files. | ||
Parameters | ||
---------- | ||
names : list[str] | ||
The names of the datasets to download and extract | ||
overwrite : bool | ||
Will overwrite a file on disk if True. | ||
path : `pathlib.Path` | ||
The sample data path to save the tar files | ||
Returns | ||
------- | ||
`parfive.Results` | ||
Download results. Will behave like a list of files. | ||
""" | ||
dl = Downloader(overwrite=overwrite, progress=True) | ||
|
||
existing_files = [] | ||
|
||
for name in names: | ||
base_url, filename = _SAMPLE_DATASETS[name] | ||
if (filepath := path / filename).exists(): | ||
existing_files.append(filepath) | ||
continue | ||
|
||
url = urljoin(base_url, filename) | ||
dl.enqueue_file(url, path=path) | ||
|
||
results = Results() | ||
if dl.queued_downloads: | ||
results = dl.download() | ||
results += existing_files | ||
|
||
file_folder = {filename: name for name, (_, filename) in _SAMPLE_DATASETS.items() if name in names} | ||
|
||
for i, tarpath in enumerate(results): | ||
output_path = path / file_folder[Path(tarpath).name] | ||
with tarfile.open(tarpath, "r:*") as tar: | ||
tar.extractall(path=output_path, filter="data") | ||
results[i] = output_path | ||
|
||
return results | ||
|
||
|
||
def _get_sample_datasets(dataset_names, no_download=False, force_download=False): | ||
""" | ||
Returns a list of disk locations corresponding to a list of filenames for | ||
sample data, downloading the sample data files as necessary. | ||
Parameters | ||
---------- | ||
no_download : `bool` | ||
If ``True``, do not download any files, even if they are not present. | ||
Default is ``False``. | ||
force_download : `bool` | ||
If ``True``, download all files, and overwrite any existing ones. | ||
Default is ``False``. | ||
Returns | ||
------- | ||
`list` of `pathlib.Path` | ||
List of disk locations corresponding to the list of filenames. An entry | ||
will be ``None`` if ``no_download == True`` and the file is not present. | ||
Raises | ||
------ | ||
RuntimeError | ||
Raised if any of the files cannot be downloaded from any of the mirrors. | ||
""" | ||
sampledata_dir = Path(conf.sample_data_directory) | ||
if env_override := os.environ.get("DKIST_SAMPLE_DIR"): | ||
# For some reason, RTD adds ' around the path in the env var. | ||
sampledata_dir = Path(env_override.strip("'")) | ||
sampledata_dir = sampledata_dir.expanduser() | ||
|
||
datasets = dict((k,v) for k, v in _SAMPLE_DATASETS.items() if k in dataset_names) # noqa: C402 | ||
download_paths = [sampledata_dir / fn for _, fn in datasets.values()] | ||
|
||
if no_download: | ||
return [sampledata_dir / name for name in datasets.keys() if (sampledata_dir / name).exists()] | ||
|
||
results = _download_and_extract_sample_data(datasets.keys(), overwrite=force_download, path=sampledata_dir) | ||
|
||
if results.errors: | ||
raise RuntimeError( | ||
f"{len(results.errors)} sample data files failed " | ||
"to download, the first error is above.") from results.errors[0].exception | ||
|
||
return list(results) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,27 @@ | ||
import pathlib | ||
""" | ||
This module provides some (partial) sample datasets. | ||
""" | ||
|
||
from astropy.io import fits | ||
from ._sample import _SAMPLE_DATASETS, VISP_HEADER, _get_sample_datasets | ||
|
||
__all__ = ["VISP_HEADER"] | ||
__all__ = ["download_all_sample_data", *sorted(_SAMPLE_DATASETS.keys()), "VISP_HEADER"] | ||
|
||
_data_dir = pathlib.Path(__file__).parent | ||
|
||
VISP_HEADER = fits.Header.fromtextfile(_data_dir / "VISP_HEADER.hdr") | ||
# See PEP 562 (https://peps.python.org/pep-0562/) for module-level __dir__() | ||
def __dir__(): | ||
return __all__ | ||
|
||
|
||
# See PEP 562 (https://peps.python.org/pep-0562/) for module-level __getattr__() | ||
def __getattr__(name): | ||
if name in _SAMPLE_DATASETS: | ||
return _get_sample_datasets(name)[0] | ||
|
||
raise AttributeError(f"module '{__name__}' has no attribute '{name}'") | ||
|
||
|
||
def download_all_sample_data(): | ||
""" | ||
Download all sample data at once that has not already been downloaded. | ||
""" | ||
return _get_sample_datasets(_SAMPLE_DATASETS.keys()) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import os | ||
from unittest.mock import call | ||
|
||
import pytest | ||
|
||
|
||
@pytest.fixture | ||
def tmp_sample_dir(tmp_path): | ||
old_path = os.environ.get("DKIST_SAMPLE_DIR", "") | ||
os.environ["DKIST_SAMPLE_DIR"] = str(tmp_path) | ||
yield tmp_path | ||
os.environ["DKIST_SAMPLE_DIR"] = old_path | ||
|
||
|
||
def test_module_dir(): | ||
import dkist.data.sample | ||
|
||
assert "VBI_AJQWW" in dir(dkist.data.sample) | ||
assert "VISP_BKPLX" in dir(dkist.data.sample) | ||
|
||
|
||
@pytest.mark.parametrize("attrname", ["VBI_AJQWW", "VISP_BKPLX"]) | ||
def test_module_getattr(mocker, attrname): | ||
mock = mocker.patch("dkist.data.sample._get_sample_datasets") | ||
import dkist.data.sample | ||
|
||
getattr(dkist.data.sample, attrname) | ||
|
||
mock.assert_has_calls([call(attrname), call().__getitem__(0)]) | ||
|
||
|
||
@pytest.mark.internet_off | ||
def test_fail(tmp_sample_dir): | ||
""" | ||
No remote data means this test should fail. | ||
""" | ||
with pytest.raises(RuntimeError, match="1 sample data files failed"): | ||
from dkist.data.sample import VISP_BKPLX # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters