Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch NWM Crosswalk Source from CSV to URL #156

Merged
merged 7 commits into from
Nov 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/nwm_client_new/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
include LICENSE
include src/hydrotools/nwm_client_new/data/routelink_files/*
1 change: 1 addition & 0 deletions python/nwm_client_new/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ install_requires =
aiohttp
aiofiles
netcdf4
tables
python_requires = >=3.7
include_package_data = True

Expand Down
58 changes: 41 additions & 17 deletions python/nwm_client_new/src/hydrotools/nwm_client_new/NWMClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from typing import List, Union
from pathlib import Path
from dataclasses import dataclass
from tempfile import TemporaryDirectory
import ssl
import shutil
from urllib.parse import unquote
Expand All @@ -39,15 +40,18 @@ class QueryError(Exception):
class NWMClientDefaults:
"""Stores application default options.

CROSSWALK: DataFrame that maps between point feature data source identifiers
(i.e. USGS gage id -> NHDPlus COMID).
CACHE: Configured ParquetCache instance.
CATALOG: Concrete NWM data source instance.
CANONICAL_COLUMN_MAPPING: Mapping from NWM output variable names to
hydrotools canonical names.
SSL_CONTEXT: ssl context instance.
ROUTELINK_URL: URL string path that points at an HDF5 file containing a
pandas.DataFrame with NWM crosswalk data.
CROSSWALK: A property that generates a pandas.DataFrame that maps between
point feature data source identifiers (i.e. USGS gage id -> NWM feature
ID).
DOWNLOAD_DIRECTORY: Local path to save downloaded NWM files.
"""
CROSSWALK: pd.DataFrame = None
CACHE: ParquetCache = ParquetCache(
"nwm_cache.parquet",
write_index=False,
Expand All @@ -60,20 +64,40 @@ class NWMClientDefaults:
"streamflow": "value"
})
SSL_CONTEXT: ssl.SSLContext = ssl.create_default_context()
def __post_init__(self):
# Gather routelink files
rl_filepath = Path(__file__).parent / "data/routelink_files"
rl_files = rl_filepath.glob("*.csv")

# Generate crosswalk
dfs = []
for rl_file in rl_files:
dfs.append(pd.read_csv(
rl_file,
dtype={"nwm_feature_id": int, "usgs_site_code": str},
comment='#'
).set_index('nwm_feature_id')[['usgs_site_code']])
self.CROSSWALK = pd.concat(dfs)
ROUTELINK_URL: str = "https://www.hydroshare.org/resource/d154f19f762c4ee9b74be55f504325d3/data/contents/RouteLink.h5"

def _download_and_read_routelink_file(self) -> dd.DataFrame:
"""Retrieve NWM RouteLink data from URL and return a
dask.dataframe.DataFrame.

Returns
-------
df: dask.dataframe.DataFrame
DataFrame containing associated location metadata.
"""
with TemporaryDirectory() as td:
# Setup downloader
downloader = FileDownloader(
output_directory=td,
create_directory=False,
ssl_context=self.SSL_CONTEXT
)

# Download files
downloader.get([(self.ROUTELINK_URL, "RouteLink.h5")])
return dd.from_pandas(pd.read_hdf(Path(td)/"RouteLink.h5"),
npartitions=1)

@property
def CROSSWALK(self) -> pd.DataFrame:
"""Retrieve and cache a default crosswalk for use by a NWM client."""
return self.CACHE.get(
function=self._download_and_read_routelink_file,
subdirectory="CROSSWALK"
).compute()[["nwm_feature_id", "usgs_site_code"]].set_index(
"nwm_feature_id")

# Initialize defaults
_NWMClientDefault = NWMClientDefaults()

class NWMClient(ABC):
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "6.0.0b0"
__version__ = "6.1.0b0"
Loading