NOAA-OWP · jarq6c · May 24, 2022 · Jan 27, 2022 · Jan 28, 2022 · Jan 28, 2022
diff --git a/.github/workflows/run_slow_unit_tests.yml b/.github/workflows/run_slow_unit_tests.yml
@@ -31,6 +31,7 @@ jobs:
         python3 -m pip install --use-feature=in-tree-build ./python/events[develop]
         python3 -m pip install --use-feature=in-tree-build ./python/metrics[develop]
         python3 -m pip install --use-feature=in-tree-build ./python/nwm_client_new[develop]
+        python3 -m pip install --use-feature=in-tree-build ./python/svi_client[develop]
     - name: Run all unittests 
       run: |
         python3 -m pytest -s
diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml
@@ -28,6 +28,7 @@ jobs:
         python3 -m pip install --use-feature=in-tree-build ./python/nwm_client[develop,gcp]
         python3 -m pip install --use-feature=in-tree-build ./python/events[develop]
         python3 -m pip install --use-feature=in-tree-build ./python/metrics[develop]
+        python3 -m pip install --use-feature=in-tree-build ./python/svi_client[develop]
     - name: Run all unittests 
       run: |
         python3 -m pytest -s -m "not slow"
diff --git a/python/svi_client/CONTRIBUTING.md b/python/svi_client/CONTRIBUTING.md
@@ -0,0 +1 @@
+../../CONTRIBUTING.md
diff --git a/python/svi_client/LICENSE b/python/svi_client/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
diff --git a/python/svi_client/MANIFEST.in b/python/svi_client/MANIFEST.in
@@ -0,0 +1,2 @@
+include LICENSE
+include src/hydrotools/nwm_client/data/*
diff --git a/python/svi_client/README.md b/python/svi_client/README.md
@@ -0,0 +1,33 @@
+# OWPHydroTools :: SVI Client
+
+## Installation
+
+
+## Usage
+
+
+### Code
+```python
+```
+### Example output
+```console
+```
+### System Requirements
+
+## Development
+
+```bash
+$ python3 -m venv env
+$ source env/bin/activate
+$ python3 -m pip install -U pip
+$ python3 -m pip install -U setuptools
+$ python3 -m pip install -e ".[develop]"
+```
+
+To generate a source distribution:
+```bash
+$ python3 -m pip install -U wheel build
+$ python3 -m build
+```
+
+The packages generated in `dist/` can be installed directly with `pip` or uploaded to PyPI using `twine`.
diff --git a/python/svi_client/SECURITY.md b/python/svi_client/SECURITY.md
@@ -0,0 +1 @@
+../../SECURITY.md
diff --git a/python/svi_client/TERMS.md b/python/svi_client/TERMS.md
@@ -0,0 +1 @@
+../../TERMS.md
diff --git a/python/svi_client/pyproject.toml b/python/svi_client/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = [
+  "setuptools>=42",
+  "wheel",
+]
diff --git a/python/svi_client/pytest.ini b/python/svi_client/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+markers =
+    slow: marks tests as slow (deselect with '-m "not slow"')
+
diff --git a/python/svi_client/setup.cfg b/python/svi_client/setup.cfg
@@ -0,0 +1,49 @@
+[metadata]
+name = hydrotools.svi_client
+version = attr: hydrotools.svi_client._version.__version__
+author = Austin Raney
+author_email = [email protected]
+description = Retrieve Social Vulnerability Index data from The Center for Disease Control / The Agency for Toxic Substances and Disease Registry.
+long_description = file: README.md
+long_description_content_type = text/markdown
+charset = UTF-8
+license = USDOC
+license_files =
+    LICENSE
+url = https://github.com/NOAA-OWP/hydrotools
+project_urls =
+    Documentation = https://noaa-owp.github.io/hydrotools/hydrotools.svi_client.html
+    Source = https://github.com/NOAA-OWP/hydrotools/tree/main/python/svi_client
+    Tracker = https://github.com/NOAA-OWP/hydrotools/issues
+classifiers =
+    Development Status :: 3 - Alpha
+    Intended Audience :: Education
+    Intended Audience :: Science/Research
+    License :: Free To Use But Restricted
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Topic :: Scientific/Engineering
+    Topic :: Sociology
+    Intended Audience :: Science/Research
+    Operating System :: OS Independent
+
+[options]
+packages = find_namespace:
+package_dir =
+    =src
+install_requires =
+    hydrotools._restclient
+    numpy >=1.20.0
+    pandas
+    geopandas
+    pydantic
+    typing_extensions
+python_requires = >=3.7
+
+[options.packages.find]
+where = src
+
+[options.extras_require]
+develop =
+    pytest
diff --git a/python/svi_client/src/hydrotools/svi_client/__init__.py b/python/svi_client/src/hydrotools/svi_client/__init__.py
@@ -0,0 +1,4 @@
+# removing __version__ import will cause build to fail. see: https://github.com/pypa/setuptools/issues/1724#issuecomment-627241822
+from ._version import __version__
+
+from .clients import SVIClient
diff --git a/python/svi_client/src/hydrotools/svi_client/_version.py b/python/svi_client/src/hydrotools/svi_client/_version.py
@@ -0,0 +1 @@
+__version__ = "0.0.1"
diff --git a/python/svi_client/src/hydrotools/svi_client/clients.py b/python/svi_client/src/hydrotools/svi_client/clients.py
@@ -0,0 +1,220 @@
+from hydrotools._restclient import RestClient
+import pandas as pd
+import geopandas as gpd
+
+# local imports
+from . import url_builders
+from .types import GeographicScale, GeographicContext, Year, utilities, field_name_map
+
+# typing imports
+from typing import Union
+from pathlib import Path
+
+
+class SVIClient:
+    def __init__(
+        self,
+        enable_cache: bool = True,
+        cache_filename: Union[str, Path] = "svi_client_cache",
+    ) -> None:
+        self._rest_client = RestClient(
+            cache_filename=cache_filename,
+            enable_cache=enable_cache,
+        )
+
+    def get(
+        self,
+        location: str,
+        geographic_scale: GeographicScale,
+        year: Year,
+        geographic_context: GeographicContext = "national",
+    ) -> gpd.GeoDataFrame:
+        """Retrieve social vulnerability index thematic rankings and values for a given state or the
+        U.S..
+
+        SVI values are available for the following years: 2000, 2010, 2014, 2016, and 2018.  The CDC
+        calculates the SVI at the census tract or county geographic scale. Likewise, the CDC
+        calculates SVI rankings in two geographic contexts: (1) relative to a given state's SVI
+        values or (2) relative to the U.S.. (1) permits interastate comparison and (2) permits
+        national comparison.
+
+        Note: `state` geographic_context is not supported at this time.
+
+        Parameters
+        ----------
+        location : str
+            state / national name or abbreviation (e.g. "AL", "US", "Wyoming", "new york")
+        geographic_scale : GeographicScale "census_tract" or "county"
+            geographic scale at which theme values were calculated
+        year : Year
+            2000, 2010, 2014, 2016, or 2018
+        geographic_context : GeographicContext "national" or "state", optional
+            svi rankings calculated at the national or state level. use state for intrastate comparisons, by default "national"
+            Note: `state` not supported at this time. will raise NotImplimented Error
+
+        Returns
+        -------
+        pd.DataFrame
+            Dataframe of Social Vulnerability Index values at the census tract or county scale
+
+            columns names:
+                state_name: str
+                state_abbreviation: str
+                county_name: str
+                state_fips: str
+                county_fips: str
+                fips: str
+                theme: str
+                rank: float
+                value: float
+                svi_edition: str
+                geometry: gpd.array.GeometryDtype
+
+
+        Examples
+        --------
+        >>> client = SVIClient()
+        ... df = client.get("AL", "census_tract", "2018")
+                    state_name state_abbreviation  ... svi_edition                                           geometry
+        0        alabama                 al  ...        2018  POLYGON ((-87.21230 32.83583, -87.20970 32.835...
+        1        alabama                 al  ...        2018  POLYGON ((-86.45640 31.65556, -86.44864 31.655...
+        ...          ...                ...  ...         ...                                                ...
+        29498    alabama                 al  ...        2018  POLYGON ((-85.99487 31.84424, -85.99381 31.844...
+        29499    alabama                 al  ...        2018  POLYGON ((-86.19941 31.80787, -86.19809 31.808...
+
+        """
+        url_path = url_builders.build_feature_server_url(
+            location=location,
+            geographic_scale=geographic_scale,
+            year=year,
+            geographic_context=geographic_context,
+            count_only=True,
+        )
+
+        # RestClient only allows 200 response code or an aiohttp.client_exceptions.ClientConnectorError is raised
+        # number of features
+        count_request = self._rest_client.get(url_path)
+
+        deserialized_count = count_request.json()
+        count = deserialized_count["properties"]["count"]
+
+        # number of features requested by a single request
+        OFFSET = 1000
+        n_gets = (count // OFFSET) + 1
+
+        urls = [
+            url_builders.build_feature_server_url(
+                location=location,
+                geographic_scale=geographic_scale,
+                year=year,
+                geographic_context=geographic_context,
+                result_offset=i * OFFSET,
+                result_record_count=OFFSET,
+            )
+            for i in range(n_gets)
+        ]
+
+        results = self._rest_client.mget(urls)
+
+        # create geodataframe from geojson response
+        df = pd.concat(
+            [gpd.GeoDataFrame.from_features(r.json()) for r in results],
+            ignore_index=True,
+        )
+
+        assert len(df) == count
+
+        fnm = field_name_map.CdcEsriFieldNameMapFactory(geographic_scale, year)
+
+        # map of dataset field names to canonical field names
+        field_names = {
+            v: k
+            for k, v in fnm.dict(exclude_unset=True, exclude={"svi_edition"}).items()
+        }
+
+        df = df.rename(columns=field_names)
+
+        # create missing fields if required
+        df = fnm.create_missing_fields(df)
+
+        df["svi_edition"] = fnm.svi_edition
+
+        # wide to long format
+        rank_col_names = df.columns.str.contains("rank$")
+
+        df = df.melt(
+            id_vars=df.columns[~rank_col_names],
+            value_vars=df.columns[rank_col_names],
+            var_name="rank_theme",
+            value_name="rank",
+        )
+
+        value_col_names = df.columns.str.contains("value$")
+        # some datasources do not include summed theme values
+        if not (value_col_names == False).all():
+            df = df.melt(
+                id_vars=df.columns[~value_col_names],
+                value_vars=df.columns[value_col_names],
+                var_name="value_theme",
+                value_name="value",
+            )
+        # create theme column by truncating rank_theme's _rank suffix
+        df["theme"] = df["rank_theme"].str.rstrip("_rank")
+
+        # drop unnecessary cols
+        # value_theme column might not exist, so ignore errors when trying to drop
+        df = df.drop(columns=["rank_theme", "value_theme"], errors="ignore")
+
+        # lowercase and strip all leading and trailing white spaces from str columns for consistent
+        # output and quality control
+        df_dtypes = df.dtypes
+        str_cols = df_dtypes[df_dtypes == "object"].index
+        df[str_cols] = df[str_cols].apply(lambda d: d.str.strip().str.lower())
+
+        df.sort_values("state_name", inplace=True, ignore_index=True)
+
+        output_column_order = [
+            "state_name",
+            "state_abbreviation",
+            "county_name",
+            "state_fips",
+            "county_fips",
+            "fips",
+            "theme",
+            "rank",
+            "value",
+            "svi_edition",
+            "geometry",
+        ]
+
+        # reorder dataframe columns
+        # note, during reindex, if there are columns not present in dataframe, they will be created
+        # with NaN row values
+        df = df.reindex(columns=output_column_order)
+
+        return df
+
+    @staticmethod
+    def svi_documentation_url(year: Year) -> str:
+        year = utilities.validate_year(year)
+
+        urls = {
+            "2000": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2000Documentation-H.pdf",
+            "2010": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI-2010-Documentation-H.pdf",
+            "2014": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2014Documentation_01192022.pdf",
+            "2016": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2016Documentation_01192022.pdf",
+            "2018": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2018Documentation_01192022_1.pdf",
+        }
+
+        url = urls.get(year, None)
+
+        # raise error if valid year not in urls.
+        # when new svi releases are added, this will purposefully break.
+        if url is None:
+            # raise error
+            error_message = (
+                f"documentation for year: {year} has not been added to SVIClient."
+            )
+            raise ValueError(error_message)
+
+        return url
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		include LICENSE
		include src/hydrotools/nwm_client/data/*