Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NWM Client New: Separate Large Modules #159

Merged
merged 10 commits into from
Nov 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/nwm_client_new/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The following example demonstrates how one might use `hydrotools.nwm_client_new`
### Code
```python
# Import the nwm Client
from hydrotools.nwm_client_new.NWMClient import NWMFileClient
from hydrotools.nwm_client_new.NWMFileClient import NWMFileClient
import pandas as pd

# Instantiate model data client
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
======================================
NWM Google Cloud Platform File Catalog
======================================
Concrete implementation of a National Water Model file client for discovering
files on Google Cloud Platform (GCP).

GCP -- https://console.cloud.google.com/marketplace/details/noaa-public/national-water-model

Classes
-------
GCPFileCatalog
"""
from .NWMFileCatalog import NWMFileCatalog
from google.cloud import storage
from typing import List

class GCPFileCatalog(NWMFileCatalog):
"""A Google Cloud client class for NWM data.
This GCPFileCatalog class provides various methods for discovering NWM
files on Google Cloud Platform.
"""

def __init__(
self,
bucket_name: str = 'national-water-model'
) -> None:
"""Initialize catalog of NWM data source on Google Cloud Platform.

Parameters
----------
bucket_name : str, required, default 'national-water-model'
Name of Google Cloud Bucket

Returns
-------
None
"""
super().__init__()
self.bucket_name = bucket_name

def list_blobs(
self,
configuration: str,
reference_time: str,
must_contain: str = 'channel_rt'
) -> List[str]:
"""List available blobs with provided parameters.

Parameters
----------
configuration : str, required
Particular model simulation or forecast configuration. For a list
of available configurations see NWMDataService.configurations
reference_time : str, required
Model simulation or forecast issuance/reference time in
YYYYmmddTHHZ format.
must_contain : str, optional, default 'channel_rt'
Optional substring found in each blob name.

Returns
-------
A list of blob names that satisfy the criteria set by the parameters.
"""
# Validate configuration
self.raise_invalid_configuration(configuration)

# Break-up reference time
issue_date, issue_time = self.separate_datetime(reference_time)

# Connect to bucket with anonymous client
client = storage.Client.create_anonymous_client()
bucket = client.bucket(self.bucket_name)

# Get list of blobs
blobs = client.list_blobs(
bucket,
prefix=f'nwm.{issue_date}/{configuration}/nwm.t{issue_time}'
)

# Return blob names
return [b.public_url for b in list(blobs) if must_contain in b.name]

@property
def bucket_name(self) -> str:
return self._bucket_name

@bucket_name.setter
def bucket_name(self, bucket_name: str) -> None:
self._bucket_name = bucket_name

151 changes: 151 additions & 0 deletions python/nwm_client_new/src/hydrotools/nwm_client_new/HTTPFileCatalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""
=====================
NWM HTTP File Catalog
=====================
Concrete implementation of a National Water Model file client for discovering
files on generic HTTP servers, for example:

NOMADS -- https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/

Classes
-------
HTTPFileCatalog
"""
from .NWMFileCatalog import NWMFileCatalog
import asyncio
import aiohttp
import ssl
from bs4 import BeautifulSoup
from typing import List

class HTTPFileCatalog(NWMFileCatalog):
"""An HTTP client class for NWM data.
This HTTPFileCatalog class provides various methods for discovering NWM
files on generic web servers.
"""

def __init__(
self,
server: str,
ssl_context: ssl.SSLContext = ssl.create_default_context()
) -> None:
"""Initialize HTTP File Catalog of NWM data source.

Parameters
----------
server : str, required
Fully qualified path to web server endpoint. Example:
"https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/"
ssl_context : ssl.SSLContext, optional, default context
SSL configuration context.

Returns
-------
None
"""
super().__init__()
# Server path
self.server = server

# Setup SSL context
self.ssl_context = ssl_context

@staticmethod
async def get_html(
url: str,
ssl_context: ssl.SSLContext = ssl.create_default_context()
) -> str:
"""Retrieve an HTML document.

Parameters
----------
url : str, required
Path to HTML document
ssl_context : ssl.SSLContext, optional, default context
SSL configuration context.

Returns
-------
HTML document retrieved from url.
"""
async with aiohttp.ClientSession() as session:
async with session.get(url, ssl=ssl_context) as response:
# Get html content
html_doc = await response.text()

# Raise for no results found
if response.status >= 400:
raise FileNotFoundError(html_doc)

# Otherwise return response content
return html_doc

def list_blobs(
self,
configuration: str,
reference_time: str,
must_contain: str = 'channel_rt'
) -> List[str]:
"""List available blobs with provided parameters.

Parameters
----------
configuration : str, required
Particular model simulation or forecast configuration. For a list
of available configurations see NWMDataService.configurations
reference_time : str, required
Model simulation or forecast issuance/reference time in
%Y%m%dT%HZ format.
must_contain : str, optional, default 'channel_rt'
Optional substring that must be found in each blob name.

Returns
-------
A list of blob names that satisfy the criteria set by the parameters.
"""
# Validate configuration
self.raise_invalid_configuration(configuration)

# Break-up reference time
issue_date, issue_time = NWMFileCatalog.separate_datetime(reference_time)

# Set prefix
prefix = f"nwm.{issue_date}/{configuration}/"

# Generate url
directory = self.server + prefix

# Get directory listing
html_doc = asyncio.run(self.get_html(directory, self.ssl_context))

# Parse content
soup = BeautifulSoup(html_doc, 'html.parser')

# Get links
elements = soup.select("a[href]")

# Generate list
blob_list = []
for e in elements:
filename = e.get("href")
if filename.startswith(f"nwm.t{issue_time}"):
full_path = directory + filename
blob_list.append(full_path)

return [b for b in blob_list if must_contain in b]

@property
def server(self) -> str:
return self._server

@server.setter
def server(self, server: str) -> None:
self._server = server

@property
def ssl_context(self) -> ssl.SSLContext:
return self._ssl_context

@ssl_context.setter
def ssl_context(self, ssl_context: ssl.SSLContext) -> None:
self._ssl_context = ssl_context
Loading