Skip to content

Commit

Permalink
Merge pull request #159 from jarq6c/nwm-units
Browse files Browse the repository at this point in the history
NWM Client New: Separate Large Modules
  • Loading branch information
jarq6c authored Nov 15, 2021
2 parents 55b19b5 + f688122 commit 4930ced
Show file tree
Hide file tree
Showing 10 changed files with 626 additions and 570 deletions.
2 changes: 1 addition & 1 deletion python/nwm_client_new/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The following example demonstrates how one might use `hydrotools.nwm_client_new`
### Code
```python
# Import the nwm Client
from hydrotools.nwm_client_new.NWMClient import NWMFileClient
from hydrotools.nwm_client_new.NWMFileClient import NWMFileClient
import pandas as pd

# Instantiate model data client
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
======================================
NWM Google Cloud Platform File Catalog
======================================
Concrete implementation of a National Water Model file client for discovering
files on Google Cloud Platform (GCP).
GCP -- https://console.cloud.google.com/marketplace/details/noaa-public/national-water-model
Classes
-------
GCPFileCatalog
"""
from .NWMFileCatalog import NWMFileCatalog
from google.cloud import storage
from typing import List

class GCPFileCatalog(NWMFileCatalog):
"""A Google Cloud client class for NWM data.
This GCPFileCatalog class provides various methods for discovering NWM
files on Google Cloud Platform.
"""

def __init__(
self,
bucket_name: str = 'national-water-model'
) -> None:
"""Initialize catalog of NWM data source on Google Cloud Platform.
Parameters
----------
bucket_name : str, required, default 'national-water-model'
Name of Google Cloud Bucket
Returns
-------
None
"""
super().__init__()
self.bucket_name = bucket_name

def list_blobs(
self,
configuration: str,
reference_time: str,
must_contain: str = 'channel_rt'
) -> List[str]:
"""List available blobs with provided parameters.
Parameters
----------
configuration : str, required
Particular model simulation or forecast configuration. For a list
of available configurations see NWMDataService.configurations
reference_time : str, required
Model simulation or forecast issuance/reference time in
YYYYmmddTHHZ format.
must_contain : str, optional, default 'channel_rt'
Optional substring found in each blob name.
Returns
-------
A list of blob names that satisfy the criteria set by the parameters.
"""
# Validate configuration
self.raise_invalid_configuration(configuration)

# Break-up reference time
issue_date, issue_time = self.separate_datetime(reference_time)

# Connect to bucket with anonymous client
client = storage.Client.create_anonymous_client()
bucket = client.bucket(self.bucket_name)

# Get list of blobs
blobs = client.list_blobs(
bucket,
prefix=f'nwm.{issue_date}/{configuration}/nwm.t{issue_time}'
)

# Return blob names
return [b.public_url for b in list(blobs) if must_contain in b.name]

@property
def bucket_name(self) -> str:
return self._bucket_name

@bucket_name.setter
def bucket_name(self, bucket_name: str) -> None:
self._bucket_name = bucket_name

151 changes: 151 additions & 0 deletions python/nwm_client_new/src/hydrotools/nwm_client_new/HTTPFileCatalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""
=====================
NWM HTTP File Catalog
=====================
Concrete implementation of a National Water Model file client for discovering
files on generic HTTP servers, for example:
NOMADS -- https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/
Classes
-------
HTTPFileCatalog
"""
from .NWMFileCatalog import NWMFileCatalog
import asyncio
import aiohttp
import ssl
from bs4 import BeautifulSoup
from typing import List

class HTTPFileCatalog(NWMFileCatalog):
"""An HTTP client class for NWM data.
This HTTPFileCatalog class provides various methods for discovering NWM
files on generic web servers.
"""

def __init__(
self,
server: str,
ssl_context: ssl.SSLContext = ssl.create_default_context()
) -> None:
"""Initialize HTTP File Catalog of NWM data source.
Parameters
----------
server : str, required
Fully qualified path to web server endpoint. Example:
"https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/"
ssl_context : ssl.SSLContext, optional, default context
SSL configuration context.
Returns
-------
None
"""
super().__init__()
# Server path
self.server = server

# Setup SSL context
self.ssl_context = ssl_context

@staticmethod
async def get_html(
url: str,
ssl_context: ssl.SSLContext = ssl.create_default_context()
) -> str:
"""Retrieve an HTML document.
Parameters
----------
url : str, required
Path to HTML document
ssl_context : ssl.SSLContext, optional, default context
SSL configuration context.
Returns
-------
HTML document retrieved from url.
"""
async with aiohttp.ClientSession() as session:
async with session.get(url, ssl=ssl_context) as response:
# Get html content
html_doc = await response.text()

# Raise for no results found
if response.status >= 400:
raise FileNotFoundError(html_doc)

# Otherwise return response content
return html_doc

def list_blobs(
self,
configuration: str,
reference_time: str,
must_contain: str = 'channel_rt'
) -> List[str]:
"""List available blobs with provided parameters.
Parameters
----------
configuration : str, required
Particular model simulation or forecast configuration. For a list
of available configurations see NWMDataService.configurations
reference_time : str, required
Model simulation or forecast issuance/reference time in
%Y%m%dT%HZ format.
must_contain : str, optional, default 'channel_rt'
Optional substring that must be found in each blob name.
Returns
-------
A list of blob names that satisfy the criteria set by the parameters.
"""
# Validate configuration
self.raise_invalid_configuration(configuration)

# Break-up reference time
issue_date, issue_time = NWMFileCatalog.separate_datetime(reference_time)

# Set prefix
prefix = f"nwm.{issue_date}/{configuration}/"

# Generate url
directory = self.server + prefix

# Get directory listing
html_doc = asyncio.run(self.get_html(directory, self.ssl_context))

# Parse content
soup = BeautifulSoup(html_doc, 'html.parser')

# Get links
elements = soup.select("a[href]")

# Generate list
blob_list = []
for e in elements:
filename = e.get("href")
if filename.startswith(f"nwm.t{issue_time}"):
full_path = directory + filename
blob_list.append(full_path)

return [b for b in blob_list if must_contain in b]

@property
def server(self) -> str:
return self._server

@server.setter
def server(self, server: str) -> None:
self._server = server

@property
def ssl_context(self) -> ssl.SSLContext:
return self._ssl_context

@ssl_context.setter
def ssl_context(self, ssl_context: ssl.SSLContext) -> None:
self._ssl_context = ssl_context
Loading

0 comments on commit 4930ced

Please sign in to comment.