Merge pull request #159 from jarq6c/nwm-units

NWM Client New: Separate Large Modules
NOAA-OWP · Nov 15, 2021 · 4930ced · 4930ced
2 parents 55b19b5 + f688122
commit 4930ced
Show file tree

Hide file tree

Showing 10 changed files with 626 additions and 570 deletions.
diff --git a/python/nwm_client_new/README.md b/python/nwm_client_new/README.md
@@ -27,7 +27,7 @@ The following example demonstrates how one might use `hydrotools.nwm_client_new`
 ### Code
 ```python
 # Import the nwm Client
-from hydrotools.nwm_client_new.NWMClient import NWMFileClient
+from hydrotools.nwm_client_new.NWMFileClient import NWMFileClient
 import pandas as pd
 
 # Instantiate model data client

diff --git a/python/nwm_client_new/src/hydrotools/nwm_client_new/GCPFileCatalog.py b/python/nwm_client_new/src/hydrotools/nwm_client_new/GCPFileCatalog.py
@@ -0,0 +1,91 @@
+"""
+======================================
+NWM Google Cloud Platform File Catalog
+======================================
+Concrete implementation of a National Water Model file client for discovering 
+files on Google Cloud Platform (GCP).
+
+GCP -- https://console.cloud.google.com/marketplace/details/noaa-public/national-water-model
+
+Classes
+-------
+GCPFileCatalog
+"""
+from .NWMFileCatalog import NWMFileCatalog
+from google.cloud import storage
+from typing import List
+
+class GCPFileCatalog(NWMFileCatalog):
+    """A Google Cloud client class for NWM data.
+    This GCPFileCatalog class provides various methods for discovering NWM 
+    files on Google Cloud Platform.
+    """
+
+    def __init__(
+        self,
+        bucket_name: str = 'national-water-model'
+        ) -> None:
+        """Initialize catalog of NWM data source on Google Cloud Platform.
+
+        Parameters
+        ----------
+        bucket_name : str, required, default 'national-water-model'
+            Name of Google Cloud Bucket
+            
+        Returns
+        -------
+        None
+        """
+        super().__init__()
+        self.bucket_name = bucket_name
+
+    def list_blobs(
+        self,
+        configuration: str,
+        reference_time: str,
+        must_contain: str = 'channel_rt'
+        ) -> List[str]:
+        """List available blobs with provided parameters.
+
+        Parameters
+        ----------
+        configuration : str, required
+            Particular model simulation or forecast configuration. For a list 
+            of available configurations see NWMDataService.configurations
+        reference_time : str, required
+            Model simulation or forecast issuance/reference time in 
+            YYYYmmddTHHZ format.
+        must_contain : str, optional, default 'channel_rt'
+            Optional substring found in each blob name.
+
+        Returns
+        -------
+        A list of blob names that satisfy the criteria set by the parameters.
+        """
+        # Validate configuration
+        self.raise_invalid_configuration(configuration)
+
+        # Break-up reference time
+        issue_date, issue_time = self.separate_datetime(reference_time)
+
+        # Connect to bucket with anonymous client
+        client = storage.Client.create_anonymous_client()
+        bucket = client.bucket(self.bucket_name)
+
+        # Get list of blobs
+        blobs = client.list_blobs(
+            bucket,
+            prefix=f'nwm.{issue_date}/{configuration}/nwm.t{issue_time}'
+            )
+
+        # Return blob names
+        return [b.public_url for b in list(blobs) if must_contain in b.name]
+
+    @property
+    def bucket_name(self) -> str:
+        return self._bucket_name
+
+    @bucket_name.setter
+    def bucket_name(self, bucket_name: str) -> None:
+        self._bucket_name = bucket_name
+
diff --git a/python/nwm_client_new/src/hydrotools/nwm_client_new/HTTPFileCatalog.py b/python/nwm_client_new/src/hydrotools/nwm_client_new/HTTPFileCatalog.py
@@ -0,0 +1,151 @@
+"""
+=====================
+NWM HTTP File Catalog
+=====================
+Concrete implementation of a National Water Model file client for discovering 
+files on generic HTTP servers, for example:
+
+NOMADS -- https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/
+
+Classes
+-------
+HTTPFileCatalog
+"""
+from .NWMFileCatalog import NWMFileCatalog
+import asyncio
+import aiohttp
+import ssl
+from bs4 import BeautifulSoup
+from typing import List
+
+class HTTPFileCatalog(NWMFileCatalog):
+    """An HTTP client class for NWM data.
+    This HTTPFileCatalog class provides various methods for discovering NWM 
+    files on generic web servers.
+    """
+
+    def __init__(
+        self,
+        server: str,
+        ssl_context: ssl.SSLContext = ssl.create_default_context()
+        ) -> None:
+        """Initialize HTTP File Catalog of NWM data source.
+
+        Parameters
+        ----------
+        server : str, required
+            Fully qualified path to web server endpoint. Example:
+            "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/"
+        ssl_context : ssl.SSLContext, optional, default context
+            SSL configuration context.
+            
+        Returns
+        -------
+        None
+        """
+        super().__init__()
+        # Server path
+        self.server = server
+
+        # Setup SSL context
+        self.ssl_context = ssl_context
+
+    @staticmethod
+    async def get_html(
+        url: str,
+        ssl_context: ssl.SSLContext = ssl.create_default_context()
+        ) -> str:
+        """Retrieve an HTML document.
+
+        Parameters
+        ----------
+        url : str, required
+            Path to HTML document
+        ssl_context : ssl.SSLContext, optional, default context
+            SSL configuration context.
+
+        Returns
+        -------
+        HTML document retrieved from url.
+        """
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, ssl=ssl_context) as response:
+                # Get html content
+                html_doc = await response.text()
+
+                # Raise for no results found
+                if response.status >= 400:
+                    raise FileNotFoundError(html_doc)
+
+                # Otherwise return response content
+                return html_doc
+
+    def list_blobs(
+        self,
+        configuration: str,
+        reference_time: str,
+        must_contain: str = 'channel_rt'
+        ) -> List[str]:
+        """List available blobs with provided parameters.
+
+        Parameters
+        ----------
+        configuration : str, required
+            Particular model simulation or forecast configuration. For a list 
+            of available configurations see NWMDataService.configurations
+        reference_time : str, required
+            Model simulation or forecast issuance/reference time in 
+            %Y%m%dT%HZ format.
+        must_contain : str, optional, default 'channel_rt'
+            Optional substring that must be found in each blob name.
+
+        Returns
+        -------
+        A list of blob names that satisfy the criteria set by the parameters.
+        """
+        # Validate configuration
+        self.raise_invalid_configuration(configuration)
+
+        # Break-up reference time
+        issue_date, issue_time = NWMFileCatalog.separate_datetime(reference_time)
+
+        # Set prefix
+        prefix = f"nwm.{issue_date}/{configuration}/"
+
+        # Generate url
+        directory = self.server + prefix
+
+        # Get directory listing
+        html_doc = asyncio.run(self.get_html(directory, self.ssl_context))
+
+        # Parse content
+        soup = BeautifulSoup(html_doc, 'html.parser')
+
+        # Get links
+        elements = soup.select("a[href]")
+
+        # Generate list
+        blob_list = []
+        for e in elements:
+            filename = e.get("href")
+            if filename.startswith(f"nwm.t{issue_time}"):
+                full_path = directory + filename
+                blob_list.append(full_path)
+
+        return [b for b in blob_list if must_contain in b]
+
+    @property
+    def server(self) -> str:
+        return self._server
+
+    @server.setter
+    def server(self, server: str) -> None:
+        self._server = server
+
+    @property
+    def ssl_context(self) -> ssl.SSLContext:
+        return self._ssl_context
+
+    @ssl_context.setter
+    def ssl_context(self, ssl_context: ssl.SSLContext) -> None:
+        self._ssl_context = ssl_context