diff --git a/.github/workflows/deploy-gh-pages.yml b/.github/workflows/deploy-gh-pages.yml index e9bd9c6c..76c23dab 100644 --- a/.github/workflows/deploy-gh-pages.yml +++ b/.github/workflows/deploy-gh-pages.yml @@ -21,12 +21,12 @@ jobs: run: | python -m pip install --upgrade pip Sphinx furo if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - python3 -m pip install --use-feature=in-tree-build ./python/_restclient[develop] - python3 -m pip install --use-feature=in-tree-build ./python/nwis_client[develop] - python3 -m pip install --use-feature=in-tree-build ./python/caches[develop] - python3 -m pip install --use-feature=in-tree-build ./python/nwm_client[develop] - python3 -m pip install --use-feature=in-tree-build ./python/events[develop] - python3 -m pip install --use-feature=in-tree-build ./python/metrics[develop] + python3 -m pip install ./python/_restclient[develop] + python3 -m pip install ./python/nwis_client[develop] + python3 -m pip install ./python/caches[develop] + python3 -m pip install ./python/nwm_client[develop] + python3 -m pip install ./python/events[develop] + python3 -m pip install ./python/metrics[develop] - name: Build Sphinx documentation run: | cd docs/ diff --git a/.github/workflows/run_slow_unit_tests.yml b/.github/workflows/run_slow_unit_tests.yml index 605c56c6..1730e9bd 100644 --- a/.github/workflows/run_slow_unit_tests.yml +++ b/.github/workflows/run_slow_unit_tests.yml @@ -24,13 +24,14 @@ jobs: - name: Install dependencies run: | python3 -m pip install -U pip - python3 -m pip install --use-feature=in-tree-build ./python/_restclient[develop] - python3 -m pip install --use-feature=in-tree-build ./python/nwis_client[develop] - python3 -m pip install --use-feature=in-tree-build ./python/caches[develop] - python3 -m pip install --use-feature=in-tree-build ./python/nwm_client[gcp,develop] - python3 -m pip install --use-feature=in-tree-build ./python/events[develop] - python3 -m pip install --use-feature=in-tree-build ./python/metrics[develop] - python3 -m pip install --use-feature=in-tree-build ./python/nwm_client_new[develop] + python3 -m pip install ./python/_restclient[develop] + python3 -m pip install ./python/nwis_client[develop] + python3 -m pip install ./python/caches[develop] + python3 -m pip install ./python/nwm_client[gcp,develop] + python3 -m pip install ./python/events[develop] + python3 -m pip install ./python/metrics[develop] + python3 -m pip install ./python/nwm_client_new[develop] + python3 -m pip install ./python/svi_client[develop] - name: Run all unittests run: | python3 -m pytest -s diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml index f746f11b..f4c3e335 100644 --- a/.github/workflows/run_unit_tests.yml +++ b/.github/workflows/run_unit_tests.yml @@ -21,13 +21,14 @@ jobs: - name: Install dependencies run: | python3 -m pip install -U pip - python3 -m pip install --use-feature=in-tree-build ./python/_restclient[develop] - python3 -m pip install --use-feature=in-tree-build ./python/nwis_client[develop] - python3 -m pip install --use-feature=in-tree-build ./python/caches[develop] - python3 -m pip install --use-feature=in-tree-build ./python/nwm_client_new[develop] - python3 -m pip install --use-feature=in-tree-build ./python/nwm_client[develop,gcp] - python3 -m pip install --use-feature=in-tree-build ./python/events[develop] - python3 -m pip install --use-feature=in-tree-build ./python/metrics[develop] + python3 -m pip install ./python/_restclient[develop] + python3 -m pip install ./python/nwis_client[develop] + python3 -m pip install ./python/caches[develop] + python3 -m pip install ./python/nwm_client_new[develop] + python3 -m pip install ./python/nwm_client[develop,gcp] + python3 -m pip install ./python/events[develop] + python3 -m pip install ./python/metrics[develop] + python3 -m pip install ./python/svi_client[develop] - name: Run all unittests run: | python3 -m pytest -s -m "not slow" diff --git a/python/svi_client/CONTRIBUTING.md b/python/svi_client/CONTRIBUTING.md new file mode 120000 index 00000000..f939e75f --- /dev/null +++ b/python/svi_client/CONTRIBUTING.md @@ -0,0 +1 @@ +../../CONTRIBUTING.md \ No newline at end of file diff --git a/python/svi_client/LICENSE b/python/svi_client/LICENSE new file mode 120000 index 00000000..30cff740 --- /dev/null +++ b/python/svi_client/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/svi_client/MANIFEST.in b/python/svi_client/MANIFEST.in new file mode 100644 index 00000000..6f8c5322 --- /dev/null +++ b/python/svi_client/MANIFEST.in @@ -0,0 +1,2 @@ +include LICENSE +include src/hydrotools/nwm_client/data/* diff --git a/python/svi_client/README.md b/python/svi_client/README.md new file mode 100644 index 00000000..5054d930 --- /dev/null +++ b/python/svi_client/README.md @@ -0,0 +1,109 @@ +# OWPHydroTools :: SVI Client + + +This subpackage provides programmatic accessing the Center for Disease Control's (CDC) Social +Vulnerability Index (SVI). "Social vulnerability refers to the potential negative effects on +communities caused by external stresses on human health. Such stresses include natural or +human-caused disasters, or disease outbreaks. Reducing social vulnerability can decrease both human +suffering and economic loss." [[source](https://www.atsdr.cdc.gov/placeandhealth/svi/index.html)] + +The SVI has been released 5 times (2000, 2010, 2014, 2016, and 2018) and calculates a relative +percentile ranking in four themes categories and an overall ranking at a given _geographic context_ +and _geographic scale_. The themes are: + +- Socioeconomic +- Household Composition & Disability +- Minority Status & Language +- Housing Type & Transportation + +Rankings are calculated relative to a _geographic context_, state or all states (United States) . +Meaning, for example, a ranking calculated for some location at the United States geographic context +would be relative to all other locations where rankings was calculated in the United States. +Similarly, SVI rankings are calculated at two _geographic scales_, census tract and county scales. +Meaning, the rankings correspond to a county for a census tract. For completeness, for example, if +you were to retrieve the 2018 SVI at the census tract scale, at the state context for the state of +Alabama, you would receive 1180 records (number of census tracts in AL in 2010 census) where each +ranked percentile is calculated relative to census tracts in Alabama. The tool released in this PR +only supports querying for ranking calculated at the United States geographic context. Future work +will add support for retrieving rankings at the state spatial scale. + +Documentation for each year release of the SVI are located below: + +- [2000](https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2000Documentation-H.pdf) +- [2010](https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI-2010-Documentation-H.pdf) +- [2014](https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2014Documentation_01192022.pdf) +- [2016](https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2016Documentation_01192022.pdf) +- [2018](https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2018Documentation_01192022_1.pdf) + + +See the [SVI Client Documentation](https://noaa-owp.github.io/hydrotools/hydrotools.svi_client.html) +for a complete list and description of the currently available methods. To report bugs or request +new features, submit an issue through the [OWPHydroTools Issue +Tracker](https://github.com/NOAA-OWP/hydrotools/issues) on GitHub. + +## Installation + +In accordance with the python community, we support and advise the usage of virtual environments in +any workflow using python. In the following installation guide, we use python's built-in `venv` +module to create a virtual environment in which the tool will be installed. Note this is just +personal preference, any python virtual environment manager should work just fine (`conda`, +`pipenv`, etc. ). + +```bash +# Create and activate python environment, requires python >= 3.8 +$ python3 -m venv venv +$ source venv/bin/activate +$ python3 -m pip install --upgrade pip + +# Install nwis_client +$ python3 -m pip install hydrotools.svi_client +``` + + +## Usage + + +### Code + +```python +from hydrotools.svi_client import SVIClient + +client = SVIClient() +df = client.get( + location="AL", # state / nation name (i.e. "alabama" or "United States") also accepted. case insensitive + geographic_scale="census_tract", # "census_tract" or "county" + year="2018", # 2000, 2010, 2014, 2016, or 2018 + geographic_context="national" # only "national" supported. "state" will be supported in the future + ) +print(df) +``` + +### Example output + +```console + state_name state_abbreviation ... svi_edition geometry +0 alabama al ... 2018 POLYGON ((-87.21230 32.83583, -87.20970 32.835... +1 alabama al ... 2018 POLYGON ((-86.45640 31.65556, -86.44864 31.655... +... ... ... ... ... ... +29498 alabama al ... 2018 POLYGON ((-85.99487 31.84424, -85.99381 31.844... +29499 alabama al ... 2018 POLYGON ((-86.19941 31.80787, -86.19809 31.808... +``` +### System Requirements + +## Development + +```bash +$ python3 -m venv env +$ source env/bin/activate +$ python3 -m pip install -U pip +$ python3 -m pip install -U setuptools +$ python3 -m pip install -e ".[develop]" +``` + +To generate a source distribution: +```bash +$ python3 -m pip install -U wheel build +$ python3 -m build +``` + +The packages generated in `dist/` can be installed directly with `pip` or uploaded to PyPI using `twine`. diff --git a/python/svi_client/SECURITY.md b/python/svi_client/SECURITY.md new file mode 120000 index 00000000..42cce94f --- /dev/null +++ b/python/svi_client/SECURITY.md @@ -0,0 +1 @@ +../../SECURITY.md \ No newline at end of file diff --git a/python/svi_client/TERMS.md b/python/svi_client/TERMS.md new file mode 120000 index 00000000..873e35b9 --- /dev/null +++ b/python/svi_client/TERMS.md @@ -0,0 +1 @@ +../../TERMS.md \ No newline at end of file diff --git a/python/svi_client/pyproject.toml b/python/svi_client/pyproject.toml new file mode 100644 index 00000000..7b52b9ba --- /dev/null +++ b/python/svi_client/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools>=42", + "wheel", +] diff --git a/python/svi_client/pytest.ini b/python/svi_client/pytest.ini new file mode 100644 index 00000000..81cf57a0 --- /dev/null +++ b/python/svi_client/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + slow: marks tests as slow (deselect with '-m "not slow"') + \ No newline at end of file diff --git a/python/svi_client/setup.cfg b/python/svi_client/setup.cfg new file mode 100644 index 00000000..8c26047c --- /dev/null +++ b/python/svi_client/setup.cfg @@ -0,0 +1,49 @@ +[metadata] +name = hydrotools.svi_client +version = attr: hydrotools.svi_client._version.__version__ +author = Austin Raney +author_email = aaraney@protonmail.com +description = Retrieve Social Vulnerability Index data from The Center for Disease Control / The Agency for Toxic Substances and Disease Registry. +long_description = file: README.md +long_description_content_type = text/markdown +charset = UTF-8 +license = USDOC +license_files = + LICENSE +url = https://github.com/NOAA-OWP/hydrotools +project_urls = + Documentation = https://noaa-owp.github.io/hydrotools/hydrotools.svi_client.html + Source = https://github.com/NOAA-OWP/hydrotools/tree/main/python/svi_client + Tracker = https://github.com/NOAA-OWP/hydrotools/issues +classifiers = + Development Status :: 3 - Alpha + Intended Audience :: Education + Intended Audience :: Science/Research + License :: Free To Use But Restricted + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Topic :: Scientific/Engineering + Topic :: Sociology + Intended Audience :: Science/Research + Operating System :: OS Independent + +[options] +packages = find_namespace: +package_dir = + =src +install_requires = + hydrotools._restclient + numpy >=1.20.0 + pandas + geopandas + pydantic + typing_extensions +python_requires = >=3.7 + +[options.packages.find] +where = src + +[options.extras_require] +develop = + pytest diff --git a/python/svi_client/src/hydrotools/svi_client/__init__.py b/python/svi_client/src/hydrotools/svi_client/__init__.py new file mode 100644 index 00000000..5dfeadc3 --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/__init__.py @@ -0,0 +1,4 @@ +# removing __version__ import will cause build to fail. see: https://github.com/pypa/setuptools/issues/1724#issuecomment-627241822 +from ._version import __version__ + +from .clients import SVIClient diff --git a/python/svi_client/src/hydrotools/svi_client/_version.py b/python/svi_client/src/hydrotools/svi_client/_version.py new file mode 100644 index 00000000..f102a9ca --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/_version.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/python/svi_client/src/hydrotools/svi_client/clients.py b/python/svi_client/src/hydrotools/svi_client/clients.py new file mode 100644 index 00000000..dc743776 --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/clients.py @@ -0,0 +1,266 @@ +from hydrotools._restclient import RestClient +import pandas as pd +import geopandas as gpd + +# local imports +from . import url_builders +from .types import GeographicScale, GeographicContext, Year, utilities, field_name_map + +# typing imports +from typing import Union +from pathlib import Path + + +class SVIClient: + def __init__( + self, + enable_cache: bool = True, + cache_filename: Union[str, Path] = "svi_client_cache", + ) -> None: + self._rest_client = RestClient( + cache_filename=cache_filename, + enable_cache=enable_cache, + ) + + def get( + self, + location: str, + geographic_scale: GeographicScale, + year: Year, + geographic_context: GeographicContext = "national", + ) -> gpd.GeoDataFrame: + """Retrieve social vulnerability index thematic rankings and values for a given state or the + U.S.. + + SVI values are available for the following years: 2000, 2010, 2014, 2016, and 2018. The CDC + calculates the SVI at the census tract or county geographic scale. Likewise, the CDC + calculates SVI rankings in two geographic contexts: (1) relative to a given state's SVI + values or (2) relative to the U.S.. (1) permits interastate comparison and (2) permits + national comparison. + + Note: `state` geographic_context is not supported at this time. + + Parameters + ---------- + location : str + state / national name or abbreviation (e.g. "AL", "US", "Wyoming", "new york") + geographic_scale : GeographicScale "census_tract" or "county" + geographic scale at which theme values were calculated + year : Year + 2000, 2010, 2014, 2016, or 2018 + geographic_context : GeographicContext "national" or "state", optional + svi rankings calculated at the national or state level. use state for intrastate comparisons, by default "national" + Note: `state` not supported at this time. will raise NotImplimented Error + + Returns + ------- + pd.DataFrame + Dataframe of Social Vulnerability Index values at the census tract or county scale + + columns names: + state_name: str + state_abbreviation: str + county_name: str + state_fips: str + county_fips: str + fips: str + theme: str + rank: float + value: float + svi_edition: str + geometry: gpd.array.GeometryDtype + + + Examples + -------- + >>> client = SVIClient() + ... df = client.get("AL", "census_tract", "2018") + state_name state_abbreviation ... svi_edition geometry + 0 alabama al ... 2018 POLYGON ((-87.21230 32.83583, -87.20970 32.835... + 1 alabama al ... 2018 POLYGON ((-86.45640 31.65556, -86.44864 31.655... + ... ... ... ... ... ... + 29498 alabama al ... 2018 POLYGON ((-85.99487 31.84424, -85.99381 31.844... + 29499 alabama al ... 2018 POLYGON ((-86.19941 31.80787, -86.19809 31.808... + + """ + url_path = url_builders.build_feature_server_url( + location=location, + geographic_scale=geographic_scale, + year=year, + geographic_context=geographic_context, + count_only=True, + ) + + # RestClient only allows 200 response code or an aiohttp.client_exceptions.ClientConnectorError is raised + # number of features + count_request = self._rest_client.get(url_path) + + deserialized_count = count_request.json() + count = deserialized_count["properties"]["count"] + + # number of features requested by a single request + OFFSET = 1000 + n_gets = (count // OFFSET) + 1 + + urls = [ + url_builders.build_feature_server_url( + location=location, + geographic_scale=geographic_scale, + year=year, + geographic_context=geographic_context, + result_offset=i * OFFSET, + result_record_count=OFFSET, + ) + for i in range(n_gets) + ] + + results = self._rest_client.mget(urls) + + # create geodataframe from geojson response + df = pd.concat( + [gpd.GeoDataFrame.from_features(r.json()) for r in results], + ignore_index=True, + ) + + assert len(df) == count + + fnm = field_name_map.CdcEsriFieldNameMapFactory(geographic_scale, year) + + # map of dataset field names to canonical field names + field_names = { + v: k + for k, v in fnm.dict(exclude_unset=True, exclude={"svi_edition"}).items() + } + + df = df.rename(columns=field_names) + + str_cols = df.select_dtypes(include=object).columns + # lowercase and strip all leading and trailing white spaces from str columns for consistent + # output and quality control + df[str_cols] = df[str_cols].apply(lambda d: d.str.strip().str.lower()) + + # cast str columns to category type. this reduces the memory footprint by several orders of + # magnitude + df[str_cols] = df[str_cols].astype("category") + + # create missing fields if required + df = fnm.create_missing_fields(df) + + df["svi_edition"] = fnm.svi_edition + + # create column for rejoining dataframes + df["geometry_idx"] = df.index + + # take a copy of the geometry column + geometry = df["geometry"] + df = df.drop(columns=["geometry"]) + + # mask of column names that don't end in rank or value + rank_value_cols_mask = df.columns.str.contains("rank|value$") + + rank_value_col_names = df.columns[rank_value_cols_mask] + + rank_col_names = rank_value_col_names[ + rank_value_col_names.str.contains("rank$") + ].tolist() + + value_col_names = rank_value_col_names[ + rank_value_col_names.str.contains("value$") + ].tolist() + + non_rank_value_col_names = df.columns[~rank_value_cols_mask].tolist() + + ranks_df = df.melt( + id_vars=non_rank_value_col_names, + value_vars=rank_col_names, + var_name="rank_theme", + value_name="rank", + ) + + # some data sources do not include the svi theme values, they only include their rank. + if value_col_names: + values_df = df.melt( + id_vars=non_rank_value_col_names, + value_vars=value_col_names, + var_name="value_theme", + value_name="value", + ) + + ranks_df = ranks_df.set_index( + non_rank_value_col_names + + [ranks_df.groupby(non_rank_value_col_names).cumcount()] + ) + values_df = values_df.set_index( + non_rank_value_col_names + + [values_df.groupby(non_rank_value_col_names).cumcount()] + ) + + df = ( + pd.concat([ranks_df, values_df], axis=1) + # drop groupby cumcount level + .pipe( + lambda d: d.reset_index(level=d.index.nlevels - 1, drop=True) + ).reset_index() + ) + else: + df = ranks_df + + # re-join the geometry column using a shared index and reset the index + df = pd.concat([df.set_index("geometry_idx"), geometry], axis=1).reset_index( + drop=True + ) + + # create theme column by truncating rank_theme's _rank suffix + df["theme"] = df["rank_theme"].str.rstrip("_rank") + + # drop unnecessary cols + # value_theme column might not exist, so ignore errors when trying to drop + df = df.drop(columns=["rank_theme", "value_theme"], errors="ignore") + + df.sort_values("state_name", inplace=True, ignore_index=True) + + output_column_order = [ + "state_name", + "state_abbreviation", + "county_name", + "state_fips", + "county_fips", + "fips", + "theme", + "rank", + "value", + "svi_edition", + "geometry", + ] + + # reorder dataframe columns + # note, during reindex, if there are columns not present in dataframe, they will be created + # with NaN row values + df = df.reindex(columns=output_column_order) + + return df + + @staticmethod + def svi_documentation_url(year: Year) -> str: + year = utilities.validate_year(year) + + urls = { + "2000": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2000Documentation-H.pdf", + "2010": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI-2010-Documentation-H.pdf", + "2014": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2014Documentation_01192022.pdf", + "2016": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2016Documentation_01192022.pdf", + "2018": "https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2018Documentation_01192022_1.pdf", + } + + url = urls.get(year, None) + + # raise error if valid year not in urls. + # when new svi releases are added, this will purposefully break. + if url is None: + # raise error + error_message = ( + f"documentation for year: {year} has not been added to SVIClient." + ) + raise ValueError(error_message) + + return url diff --git a/python/svi_client/src/hydrotools/svi_client/consts.py b/python/svi_client/src/hydrotools/svi_client/consts.py new file mode 100644 index 00000000..48ff2850 --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/consts.py @@ -0,0 +1,124 @@ +from types import MappingProxyType + +_BASE_URL = "https://services3.arcgis.com/ZvidGQkLaDJxRSJ2/ArcGIS/rest/services" + +US_COUNTY_FEATURE_SERVER_URLS = MappingProxyType( + { + "2000": f"{_BASE_URL}/Overall_2000_Counties/FeatureServer/0", + "2010": f"{_BASE_URL}/Overall_2010_Counties/FeatureServer/0", + "2014": f"{_BASE_URL}/Overall_2014_Counties/FeatureServer/1", + "2016": f"{_BASE_URL}/Overall_2016_Counties/FeatureServer/0", + "2018": f"{_BASE_URL}/CDC_Social_Vulnerability_Index_2018/FeatureServer/1", + } +) + +US_TRACT_FEATURE_SERVERS_URLS = MappingProxyType( + { + "2000": f"{_BASE_URL}/Overall_2000_Tracts/FeatureServer/0", + "2010": f"{_BASE_URL}/Overall_2010_Tracts/FeatureServer/0", + "2014": f"{_BASE_URL}/Overall_2014_Tracts/FeatureServer/1", + "2016": f"{_BASE_URL}/Overall_2016_Tracts/FeatureServer/0", + "2018": f"{_BASE_URL}/CDC_Social_Vulnerability_Index_2018/FeatureServer/2", + } +) + +# Developer documentation + +# 2000: +# counties: +# US: Overall_2000_Counties/FeatureServer/0 +# tracts: +# US: Overall_2000_Tracts/FeatureServer/0 + +# 2010: +# counties: +# US: Overall_2010_Counties/FeatureServer/0 +# tracts: +# US: Overall_2010_Tracts/FeatureServer/0 + +# 2014: +# counties: +# US: Overall_2014_Counties/FeatureServer/1 +# tracts: +# US: Overall_2014_Tracts/FeatureServer/1 +# # NOTE: all STATE and COUNTY fields are prefixed with a space +# # use STATE LIKE '%%' to get around this + +# 2016: +# counties: +# US: Overall_2016_Counties/FeatureServer/0 +# tracts: +# US: Overall_2016_Tracts/FeatureServer/0 + +# 2018: +# counties: +# US: CDC_Social_Vulnerability_Index_2018/FeatureServer/1 +# tracts: +# US: CDC_Social_Vulnerability_Index_2018/FeatureServer/2 + +# cols to support: +# state_fips: State FIPS code +# state_name: State name +# county_name: County name +# fips: Census tract or county fips code +# svi_edition: year corresponding to svi release (this assumes 2 SVI's will not be release in a given year in the future) +# geometry: County or census tract simple features geometry +# rank_theme_1: Socioeconomic +# rank_theme_2: Household Composition / Disability +# rank_theme_3: Minority Status / Language +# rank_theme_4: Housing Type / Transportation +# rank_svi: aggregated overall percentile ranking +# value_theme_1: Socioeconomic +# value_theme_2: Household Composition / Disability +# value_theme_3: Minority Status / Language +# value_theme_4: Housing Type / Transportation +# value_svi: aggregated overall value; sum of values from themes 1, 2, 3, 4. + +# state_fips: +# counties: STATE_FIPS, FIRST_STATE_FIPS, ST, ST, ST +# tracts: STATE_FIPS, STATE_FIPS, ST, ST, ST +# county_fips: +# counties: CNTY_FIPS, FIRST_CNTY_FIPS, FIPS[len(ST):], FIPS[len(ST):], FIPS[len(ST):] +# tracts: CNTY_FIPS, CNTY_FIPS, STCNTY[len(ST):], STCNTY[len(ST):], STCNTY[len(ST):] +# fips: +# counties: STCOFIPS, STCOFIPS, FIPS, FIPS, FIPS +# tracts: FIPS, FIPS, FIPS, FIPS, FIPS +# state_name: +# counties: STATE_NAME, FIRST_STATE_NAME, STATE, STATE, STATE +# tracts: STATE_NAME, STATE_NAME, STATE.strip(), STATE, STATE +# county_name: +# counties: COUNTY, FIRST_COUNTY, COUNTY, COUNTY, COUNTY +# tracts: COUNTY, COUNTY, COUNTY.strip(), COUNTY, COUNTY +# svi_edition: +# counties: 2000, 2010, 2014, 2016, 2018 +# tracts: 2000, 2010, 2014, 2016, 2018 +# rank_theme_1 +# counties: USG1TP, R_PL_THEME1, RPL_THEME1, RPL_THEME1, RPL_THEME1 +# tracts: USG1TP, R_PL_THEME1, RPL_THEME1, RPL_THEME1, RPL_THEME1 +# rank_theme_2 +# counties: USG2TP, R_PL_THEME2, RPL_THEME2, RPL_THEME2, RPL_THEME2 +# tracts: USG2TP, R_PL_THEME2, RPL_THEME2, RPL_THEME2, RPL_THEME2 +# rank_theme_3 +# counties: USG3TP, R_PL_THEME3, RPL_THEME3, RPL_THEME3, RPL_THEME3 +# tracts: USG3TP, R_PL_THEME3, RPL_THEME3, RPL_THEME3, RPL_THEMES3 +# rank_theme_4 +# counties: USG3TP, R_PL_THEME4, RPL_THEME4, RPL_THEME4, RPL_THEME4 +# tracts: USG3TP, R_PL_THEME4, RPL_THEME4, RPL_THEME4, RPL_THEME4 +# rank_svi +# counties: USTP, R_PL_THEMES, RPL_THEMES, RPL_THEMES, RPL_THEMES +# tracts: USTP, R_PL_THEMES, RPL_THEMES, RPL_THEMES, RPL_THEMES +# value_theme_1 +# counties: NA, S_PL_THEME1, SPL_THEME1, SPL_THEME1, SPL_THEME1 +# tracts: NA, S_PL_THEME1, SPL_THEME1, SPL_THEME1, SPL_THEME1 +# value_theme_2 +# counties: NA, S_PL_THEME2, SPL_THEME2, SPL_THEME2, SPL_THEME2 +# tracts: NA, S_PL_THEME2, SPL_THEME2, SPL_THEME2, SPL_THEME2 +# value_theme_3 +# counties: NA, S_PL_THEME3, SPL_THEME3, SPL_THEME3, SPL_THEME3 +# tracts: NA, S_PL_THEME3, SPL_THEME3, SPL_THEMES3, SPL_THEME3 +# value_theme_4 +# counties: NA, S_PL_THEME4, SPL_THEME4, SPL_THEME4, SPL_THEME4 +# tracts: NA, S_PL_THEME4, SPL_THEME4, SPL_THEME4, SPL_THEME4 +# value_svi +# counties: NA, S_PL_THEMES, SPL_THEMES, SPL_THEMES, SPL_THEMES +# tracts: NA, S_PL_THEMES, SPL_THEMES, SPL_THEMES, SPL_THEMES diff --git a/python/svi_client/src/hydrotools/svi_client/types/__init__.py b/python/svi_client/src/hydrotools/svi_client/types/__init__.py new file mode 100644 index 00000000..73ec8706 --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/types/__init__.py @@ -0,0 +1,8 @@ +from .type_definitions import ( + LOCATIONS, + GeographicScale, + GeographicContext, + DataFormat, + Year, +) +from . import utilities diff --git a/python/svi_client/src/hydrotools/svi_client/types/field_name_map.py b/python/svi_client/src/hydrotools/svi_client/types/field_name_map.py new file mode 100644 index 00000000..e14da934 --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/types/field_name_map.py @@ -0,0 +1,278 @@ +from __future__ import annotations +from pydantic import BaseModel +from functools import partial +from types import MappingProxyType +from typing import Optional, Tuple +import pandas as pd + +# local imports +from . import utilities +from .type_definitions import GeographicScale, Year + + +class FieldNameMap(BaseModel): + """Map from canonical hydrotools SVI field names to SVI field names from another provenance.""" + + state_name: str # State name + state_abbreviation: str # State abbreviation + county_name: str # County name + + state_fips: str # State FIPS code + county_fips: str # County name + fips: str # Census tract or county fips code + + svi_edition: str # year corresponding to svi release (this assumes 2 SVI's will not be release in a given year in the future) + + socioeconomic_rank: str # theme_1_rank : Socioeconomic + household_comp_and_disability_rank: str # theme_2_rank : Household Composition / Disability + minority_status_and_lang_rank: str # theme_3_rank : Minority Status / Language + housing_type_and_trans_rank: str # theme_4_rank : Housing Type / Transportation + svi_rank: str # aggregated overall percentile ranking + + socioeconomic_value: Optional[str] # theme_1_value : Socioeconomic + household_comp_and_disability_value: Optional[ + str + ] # theme_2_value : Household Composition / Disability + minority_status_and_lang_value: Optional[ + str + ] # theme_3_value : Minority Status / Language + housing_type_and_trans_value: Optional[ + str + ] # theme_4_value : Housing Type / Transportation + + # aggregated overall value; sum of values from themes 1, 2, 3, 4. + svi_value: Optional[str] + + @staticmethod + def create_missing_fields(df: pd.DataFrame) -> pd.DataFrame: + """subclasses should override this to create missing dataframe fields from existing fields. + default behavior is to return input df. + """ + return df + + +### Mapping types for data sourced from: https://services3.arcgis.com/ZvidGQkLaDJxRSJ2/ArcGIS/rest/services/. ### + +## SVI Counties Types ## + +# 2000s data does not include theme values +CdcEsri2000CountiesFieldNameMap = FieldNameMap( + state_name="STATE_NAME", + state_abbreviation="STATE_ABBR", + county_name="COUNTY", + state_fips="STATE_FIPS", + county_fips="CNTY_FIPS", + fips="STCOFIPS", + svi_edition="2000", + socioeconomic_rank="USG1TP", + household_comp_and_disability_rank="USG2TP", + minority_status_and_lang_rank="USG3TP", + housing_type_and_trans_rank="USG4TP", + svi_rank="USTP", +) + +CdcEsri2010CountiesFieldNameMap = FieldNameMap( + state_name="FIRST_STATE_NAME", + state_abbreviation="FIRST_STATE_ABBR", + county_name="FIRST_COUNTY", + state_fips="FIRST_STATE_FIPS", + county_fips="FIRST_CNTY_FIPS", + fips="STCOFIPS", + svi_edition="2010", + socioeconomic_rank="R_PL_THEME1", + household_comp_and_disability_rank="R_PL_THEME2", + minority_status_and_lang_rank="R_PL_THEME3", + housing_type_and_trans_rank="R_PL_THEME4", + svi_rank="R_PL_THEMES", + socioeconomic_value="S_PL_THEME1", + household_comp_and_disability_value="S_PL_THEME2", + minority_status_and_lang_value="S_PL_THEME3", + housing_type_and_trans_value="S_PL_THEME4", + svi_value="S_PL_THEMES", +) + + +class _CdcEsriMissingCountyFipsFieldNameMap(FieldNameMap): + @staticmethod + def create_missing_fields(df: pd.DataFrame) -> pd.DataFrame: + """ + derive `county_fips` from `fips`. county fips codes are five digits long, where the first + two digits are the state fips. + source: https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt + """ + return df.assign(county_fips=lambda d: d["fips"].str.slice(2, 5)) + + +# svi_edition is excluded, so it can be parametrized +_CdcEsriCountiesFieldNameMap = partial( + _CdcEsriMissingCountyFipsFieldNameMap, + state_name="STATE", + state_abbreviation="ST_ABBR", + county_name="COUNTY", + state_fips="ST", + county_fips="FIPS", # calculated FIPS[2:5] + fips="FIPS", + socioeconomic_rank="RPL_THEME1", + household_comp_and_disability_rank="RPL_THEME2", + minority_status_and_lang_rank="RPL_THEME3", + housing_type_and_trans_rank="RPL_THEME4", + svi_rank="RPL_THEMES", + socioeconomic_value="SPL_THEME1", + household_comp_and_disability_value="SPL_THEME2", + minority_status_and_lang_value="SPL_THEME3", + housing_type_and_trans_value="SPL_THEME4", + svi_value="SPL_THEMES", +) + +CdcEsri2014CountiesFieldNameMap = _CdcEsriCountiesFieldNameMap(svi_edition="2014") +CdcEsri2016CountiesFieldNameMap = _CdcEsriCountiesFieldNameMap(svi_edition="2016") +CdcEsri2018CountiesFieldNameMap = _CdcEsriCountiesFieldNameMap(svi_edition="2018") + + +## SVI Tract Types ## + +# 2000s data does not include theme values +CdcEsri2000TractsFieldNameMap = FieldNameMap( + state_name="STATE_NAME", + state_abbreviation="STATE_ABBR", + county_name="COUNTY", + state_fips="STATE_FIPS", + county_fips="CNTY_FIPS", + fips="FIPS", + svi_edition="2000", + socioeconomic_rank="USG1TP", + household_comp_and_disability_rank="USG2TP", + minority_status_and_lang_rank="USG3TP", + housing_type_and_trans_rank="USG4TP", + svi_rank="USTP", +) + +CdcEsri2010TractsFieldNameMap = FieldNameMap( + state_name="STATE_NAME", + state_abbreviation="STATE_ABBR", + county_name="COUNTY", + state_fips="STATE_FIPS", + county_fips="CNTY_FIPS", + fips="FIPS", + svi_edition="2010", + socioeconomic_rank="R_PL_THEME1", + household_comp_and_disability_rank="R_PL_THEME2", + minority_status_and_lang_rank="R_PL_THEME3", + housing_type_and_trans_rank="R_PL_THEME4", + svi_rank="R_PL_THEMES", + socioeconomic_value="S_PL_THEME1", + household_comp_and_disability_value="S_PL_THEME2", + minority_status_and_lang_value="S_PL_THEME3", + housing_type_and_trans_value="S_PL_THEME4", + svi_value="S_PL_THEMES", +) + +# svi_edition is excluded, so it can be parametrized +_CdcEsriTractFieldNameMap = partial( + _CdcEsriMissingCountyFipsFieldNameMap, + state_name="STATE", + state_abbreviation="ST_ABBR", + county_name="COUNTY", + state_fips="ST", + county_fips="STCNTY", # calculated FIPS[2:5] + fips="FIPS", + socioeconomic_rank="RPL_THEME1", + household_comp_and_disability_rank="RPL_THEME2", + minority_status_and_lang_rank="RPL_THEME3", + housing_type_and_trans_rank="RPL_THEME4", + svi_rank="RPL_THEMES", + socioeconomic_value="SPL_THEME1", + household_comp_and_disability_value="SPL_THEME2", + minority_status_and_lang_value="SPL_THEME3", + housing_type_and_trans_value="SPL_THEME4", + svi_value="SPL_THEMES", +) + +CdcEsri2014TractsFieldNameMap = _CdcEsriTractFieldNameMap(svi_edition="2014") +CdcEsri2016TractsFieldNameMap = _CdcEsriTractFieldNameMap(svi_edition="2016") +CdcEsri2018TractsFieldNameMap = _CdcEsriTractFieldNameMap(svi_edition="2018") + +_CdcEsriFieldNameMapFactory: MappingProxyType[ + Tuple[str, str], FieldNameMap +] = MappingProxyType( + { + # year, geographic scale: FieldNameMap + # counties + ("2000", "county"): CdcEsri2000CountiesFieldNameMap, + ("2010", "county"): CdcEsri2010CountiesFieldNameMap, + ("2014", "county"): CdcEsri2014CountiesFieldNameMap, + ("2016", "county"): CdcEsri2016CountiesFieldNameMap, + ("2018", "county"): CdcEsri2018CountiesFieldNameMap, + # tracts + ("2000", "census_tract"): CdcEsri2000TractsFieldNameMap, + ("2010", "census_tract"): CdcEsri2010TractsFieldNameMap, + ("2014", "census_tract"): CdcEsri2014TractsFieldNameMap, + ("2016", "census_tract"): CdcEsri2016TractsFieldNameMap, + ("2018", "census_tract"): CdcEsri2018TractsFieldNameMap, + } +) + + +def CdcEsriFieldNameMapFactory( + geographic_scale: GeographicScale, year: Year +) -> FieldNameMap: + geographic_scale = utilities.validate_geographic_scale(geographic_scale) # type: ignore + year = utilities.validate_year(year) # type: ignore + + search_tuple = (year, geographic_scale) + return _CdcEsriFieldNameMapFactory[search_tuple] # type: ignore + + +# Developer notes + +# state_fips: +# counties: STATE_FIPS, FIRST_STATE_FIPS, ST, ST, ST +# tracts: STATE_FIPS, STATE_FIPS, ST, ST, ST +# county_fips: +# counties: CNTY_FIPS, FIRST_CNTY_FIPS, FIPS[len(ST):], FIPS[len(ST):], FIPS[len(ST):] +# tracts: CNTY_FIPS, CNTY_FIPS, STCNTY[len(ST):], STCNTY[len(ST):], STCNTY[len(ST):] +# fips: +# counties: STCOFIPS, STCOFIPS, FIPS, FIPS, FIPS +# tracts: FIPS, FIPS, FIPS, FIPS, FIPS +# state_name: +# counties: STATE_NAME, FIRST_STATE_NAME, STATE, STATE, STATE +# tracts: STATE_NAME, STATE_NAME, STATE.strip(), STATE, STATE +# state_abbreviation: +# counties: STATE_ABBR, FIRST_STATE_ABBR, ST_ABBR, ST_ABBR, ST_ABBR +# tracts: STATE_ABBR, STATE_ABBR, ST_ABBR, ST_ABBR, ST_ABBR +# county_name: +# counties: COUNTY, FIRST_COUNTY, COUNTY, COUNTY, COUNTY +# tracts: COUNTY, COUNTY, COUNTY.strip(), COUNTY, COUNTY +# svi_edition: +# counties: 2000, 2010, 2014, 2016, 2018 +# tracts: 2000, 2010, 2014, 2016, 2018 +# rank_theme_1 +# counties: USG1TP, R_PL_THEME1, RPL_THEME1, RPL_THEME1, RPL_THEME1 +# tracts: USG1TP, R_PL_THEME1, RPL_THEME1, RPL_THEME1, RPL_THEME1 +# rank_theme_2 +# counties: USG2TP, R_PL_THEME2, RPL_THEME2, RPL_THEME2, RPL_THEME2 +# tracts: USG2TP, R_PL_THEME2, RPL_THEME2, RPL_THEME2, RPL_THEME2 +# rank_theme_3 +# counties: USG3TP, R_PL_THEME3, RPL_THEME3, RPL_THEME3, RPL_THEME3 +# tracts: USG3TP, R_PL_THEME3, RPL_THEME3, RPL_THEME3, RPL_THEMES3 +# rank_theme_4 +# counties: USG3TP, R_PL_THEME4, RPL_THEME4, RPL_THEME4, RPL_THEME4 +# tracts: USG3TP, R_PL_THEME4, RPL_THEME4, RPL_THEME4, RPL_THEME4 +# rank_svi +# counties: USTP, R_PL_THEMES, RPL_THEMES, RPL_THEMES, RPL_THEMES +# tracts: USTP, R_PL_THEMES, RPL_THEMES, RPL_THEMES, RPL_THEMES +# value_theme_1 +# counties: NA, S_PL_THEME1, SPL_THEME1, SPL_THEME1, SPL_THEME1 +# tracts: NA, S_PL_THEME1, SPL_THEME1, SPL_THEME1, SPL_THEME1 +# value_theme_2 +# counties: NA, S_PL_THEME2, SPL_THEME2, SPL_THEME2, SPL_THEME2 +# tracts: NA, S_PL_THEME2, SPL_THEME2, SPL_THEME2, SPL_THEME2 +# value_theme_3 +# counties: NA, S_PL_THEME3, SPL_THEME3, SPL_THEME3, SPL_THEME3 +# tracts: NA, S_PL_THEME3, SPL_THEME3, SPL_THEMES3, SPL_THEME3 +# value_theme_4 +# counties: NA, S_PL_THEME4, SPL_THEME4, SPL_THEME4, SPL_THEME4 +# tracts: NA, S_PL_THEME4, SPL_THEME4, SPL_THEME4, SPL_THEME4 +# value_svi +# counties: NA, S_PL_THEMES, SPL_THEMES, SPL_THEMES, SPL_THEMES +# tracts: NA, S_PL_THEMES, SPL_THEMES, SPL_THEMES, SPL_THEMES diff --git a/python/svi_client/src/hydrotools/svi_client/types/type_definitions.py b/python/svi_client/src/hydrotools/svi_client/types/type_definitions.py new file mode 100644 index 00000000..f2753551 --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/types/type_definitions.py @@ -0,0 +1,130 @@ +from hydrotools._restclient import Alias, AliasGroup + +# typing imports +# NOTE: use typing.Literal when minimum python version is 3.8 +from typing_extensions import Literal + +GeographicScale = Literal["census_tract", "county"] +GeographicContext = Literal["national", "state"] +DataFormat = Literal["csv", "shp"] +Year = Literal["2000", "2010", "2014", "2016", "2018", 2000, 2010, 2014, 2016, 2018] + +# Geography options: +# source https://svi.cdc.gov/xmldata/SVI_StatesForDownload.xml +# used by form on https://svi.cdc.gov/data-download-wcms.html + +# TODO: add "PuertoRico" and "States". will need to consider what datasets are available for these locations +# LOCATIONS = AliasGroup( +# [ +# Alias("US", ["us", "united states"]), +# Alias("Alabama", ["al", "alabama"]), +# Alias("Alaska", ["ak", "alaska"]), +# Alias("Arizona", ["az", "arizona"]), +# Alias("Arkansas", ["ar", "arkansas"]), +# Alias("California", ["ca", "california"]), +# Alias("Colorado", ["co", "colorado"]), +# Alias("Connecticut", ["ct", "connecticut"]), +# Alias("Delaware", ["de", "delaware"]), +# Alias("DistrictofColumbia", ["dc", "district of columbia"]), +# Alias("Florida", ["fl", "florida"]), +# Alias("Georgia", ["ga", "georgia"]), +# Alias("Hawaii", ["hi", "hawaii"]), +# Alias("Idaho", ["id", "idaho"]), +# Alias("Illinois", ["il", "illinois"]), +# Alias("Indiana", ["in", "indiana"]), +# Alias("Iowa", ["ia", "iowa"]), +# Alias("Kansas", ["ks", "kansas"]), +# Alias("Kentucky", ["ky", "kentucky"]), +# Alias("Louisiana", ["la", "louisiana"]), +# Alias("Maine", ["me", "maine"]), +# Alias("Maryland", ["md", "maryland"]), +# Alias("Massachusetts", ["ma", "massachusetts"]), +# Alias("Michigan", ["mi", "michigan"]), +# Alias("Minnesota", ["mn", "minnesota"]), +# Alias("Mississippi", ["ms", "mississippi"]), +# Alias("Missouri", ["mo", "missouri"]), +# Alias("Montana", ["mt", "montana"]), +# Alias("Nebraska", ["ne", "nebraska"]), +# Alias("Nevada", ["nv", "nevada"]), +# Alias("NewHampshire", ["nh", "new hampshire"]), +# Alias("NewJersey", ["nj", "new jersey"]), +# Alias("NewMexico", ["nm", "new mexico"]), +# Alias("NewYork", ["ny", "new york"]), +# Alias("NorthCarolina", ["nc", "north carolina"]), +# Alias("NorthDakota", ["nd", "north dakota"]), +# Alias("Ohio", ["oh", "ohio"]), +# Alias("Oklahoma", ["ok", "oklahoma"]), +# Alias("Oregon", ["or", "oregon"]), +# Alias("Pennsylvania", ["pa", "pennsylvania"]), +# Alias("RhodeIsland", ["ri", "rhode island"]), +# Alias("SouthCarolina", ["sc", "south carolina"]), +# Alias("SouthDakota", ["sd", "south dakota"]), +# Alias("Tennessee", ["tn", "tennessee"]), +# Alias("Texas", ["tx", "texas"]), +# Alias("Utah", ["ut", "utah"]), +# Alias("Vermont", ["vt", "vermont"]), +# Alias("Virginia", ["va", "virginia"]), +# Alias("Washington", ["wa", "washington"]), +# Alias("WestVirginia", ["wv", "west virginia"]), +# Alias("Wisconsin", ["wi", "wisconsin"]), +# Alias("Wyoming", ["wy", "wyoming"]), +# ] +# ) + +# TODO: remove self refereing alias once bug in AliasGroup.get has been resolved +LOCATIONS = AliasGroup( + [ + Alias("us", ["us", "united states"]), + Alias("al", ["al", "alabama"]), + Alias("ak", ["ak", "alaska"]), + Alias("az", ["az", "arizona"]), + Alias("ar", ["ar", "arkansas"]), + Alias("ca", ["ca", "california"]), + Alias("co", ["co", "colorado"]), + Alias("ct", ["ct", "connecticut"]), + Alias("de", ["de", "delaware"]), + Alias("dc", ["dc", "district of columbia"]), + Alias("fl", ["fl", "florida"]), + Alias("ga", ["ga", "georgia"]), + Alias("hi", ["hi", "hawaii"]), + Alias("id", ["id", "idaho"]), + Alias("il", ["il", "illinois"]), + Alias("in", ["in", "indiana"]), + Alias("ia", ["ia", "iowa"]), + Alias("ks", ["ks", "kansas"]), + Alias("ky", ["ky", "kentucky"]), + Alias("la", ["la", "louisiana"]), + Alias("me", ["me", "maine"]), + Alias("md", ["md", "maryland"]), + Alias("ma", ["ma", "massachusetts"]), + Alias("mi", ["mi", "michigan"]), + Alias("mn", ["mn", "minnesota"]), + Alias("ms", ["ms", "mississippi"]), + Alias("mo", ["mo", "missouri"]), + Alias("mt", ["mt", "montana"]), + Alias("ne", ["ne", "nebraska"]), + Alias("nv", ["nv", "nevada"]), + Alias("nh", ["nh", "new hampshire"]), + Alias("nj", ["nj", "new jersey"]), + Alias("nm", ["nm", "new mexico"]), + Alias("ny", ["ny", "new york"]), + Alias("nc", ["nc", "north carolina"]), + Alias("nd", ["nd", "north dakota"]), + Alias("oh", ["oh", "ohio"]), + Alias("ok", ["ok", "oklahoma"]), + Alias("or", ["or", "oregon"]), + Alias("pa", ["pa", "pennsylvania"]), + Alias("ri", ["ri", "rhode island"]), + Alias("sc", ["sc", "south carolina"]), + Alias("sd", ["sd", "south dakota"]), + Alias("tn", ["tn", "tennessee"]), + Alias("tx", ["tx", "texas"]), + Alias("ut", ["ut", "utah"]), + Alias("vt", ["vt", "vermont"]), + Alias("va", ["va", "virginia"]), + Alias("wa", ["wa", "washington"]), + Alias("wv", ["wv", "west virginia"]), + Alias("wi", ["wi", "wisconsin"]), + Alias("wy", ["wy", "wyoming"]), + ] +) diff --git a/python/svi_client/src/hydrotools/svi_client/types/utilities.py b/python/svi_client/src/hydrotools/svi_client/types/utilities.py new file mode 100644 index 00000000..7c20361b --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/types/utilities.py @@ -0,0 +1,55 @@ +try: + # get_args was added in 3.8 + from typing import get_args + +except ImportError: + # to support python 3.7 + from typing_extensions import get_args + + +# local imports +from .type_definitions import LOCATIONS, GeographicScale, GeographicContext, Year + + +def validate_location(location: str) -> str: + location_key = LOCATIONS.get(location.lower()) # noqa + + if location_key is None: + valid_locations = sorted(list(LOCATIONS.keys.union(LOCATIONS.values))) + error_message = f"Invalid location: {location}. Valid location values are\n{valid_locations}" + raise ValueError(error_message) + + return location_key + + +def validate_geographic_scale(geographic_scale: GeographicScale) -> str: + valid_geo_scales = get_args(GeographicScale) + + if geographic_scale not in valid_geo_scales: + valid_geo_scales = sorted(valid_geo_scales) + error_message = f"Invalid geographic scale: {geographic_scale}. Valid geographic scale values are\n{valid_geo_scales}" + raise ValueError(error_message) + + return geographic_scale + + +def validate_geographic_context(geographic_context: GeographicContext) -> str: + valid_geo_contexts = get_args(GeographicContext) + + if geographic_context not in valid_geo_contexts: + valid_geo_scales = sorted(valid_geo_contexts) + error_message = f"Invalid geographic context: {geographic_context}. Valid geographic context values are\n{valid_geo_scales}" + raise ValueError(error_message) + + return geographic_context + + +def validate_year(year: Year) -> str: + year_str = str(year) + + valid_years = get_args(Year) + if year_str not in valid_years: + error_message = f"Invalid year: {year}. Valid year values are\n{valid_years}" + raise ValueError(error_message) + + return year_str diff --git a/python/svi_client/src/hydrotools/svi_client/url_builders.py b/python/svi_client/src/hydrotools/svi_client/url_builders.py new file mode 100644 index 00000000..d8cae454 --- /dev/null +++ b/python/svi_client/src/hydrotools/svi_client/url_builders.py @@ -0,0 +1,246 @@ +from hydrotools._restclient import Url + +# local imports +from .types import utilities, field_name_map +from .types import GeographicScale, GeographicContext, Year +from .consts import US_COUNTY_FEATURE_SERVER_URLS, US_TRACT_FEATURE_SERVERS_URLS + +# typing imports +from typing import Optional + + +def build_csv_url(location: str, geographic_scale: GeographicScale, year: Year) -> str: + location = utilities.validate_location(location) + geographic_scale = utilities.validate_geographic_scale(geographic_scale) + year = utilities.validate_year(year) + + base_path = f"Documents/Data/{year}_SVI_Data/CSV" + + if geographic_scale == "county": + county_token = get_county_token(year) + if location == "US": + location_path = f"SVI{year}_US_{county_token}.csv" + else: + location_path = f"States_Counties/{location}_{county_token}.csv" + + else: + if location == "US": + location_path = f"SVI{year}_US.csv" + else: + location_path = f"States/{location}.csv" + + return f"{base_path}/{location_path}" + + +def build_feature_server_url( + location: str, + geographic_scale: GeographicScale, + year: Year, + geographic_context: GeographicContext, + result_offset: Optional[int] = None, + result_record_count: Optional[int] = None, + count_only: bool = False, +) -> str: + location = utilities.validate_location(location) + geographic_scale = utilities.validate_geographic_scale(geographic_scale) + year = utilities.validate_year(year) + context = utilities.validate_geographic_context(geographic_context) + + if context == "state": + error_message = ( + "the `state` geographic context has not yet been implimented. " + "only svi ranked at the `national` context are currently supported." + ) + raise NotImplemented(error_message) + + path = ( + US_COUNTY_FEATURE_SERVER_URLS[year] + if geographic_scale == "county" + else US_TRACT_FEATURE_SERVERS_URLS[year] + ) + path = f"{path}/query" + + fnm = field_name_map.CdcEsriFieldNameMapFactory(geographic_scale, year) + + params = { + # for entire US, use 1=1 where clause + "where": f"{fnm.state_abbreviation} = '{location}'" + if location != "us" + else "1=1", + "outFields": ",".join( + fnm.dict(exclude_unset=True, exclude={"svi_edition"}).values() + ), + "returnGeometry": "true", + "returnExceededLimitFeatures": "true", + "returnCountOnly": "true" if count_only else "false", + "resultOffset": "" if result_offset is None else result_offset, + "resultRecordCount": "" if result_record_count is None else result_record_count, + "f": "pgeojson", + } + + o: Url = Url(path, safe="/'") + params + return o.quote_url + + +def build_shp_url(location: str, geographic_scale: GeographicScale, year: Year) -> str: + location = utilities.validate_location(location) + geographic_scale = utilities.validate_geographic_scale(geographic_scale) + year = utilities.validate_year(year) + + # shp: Documents/Data/2000_SVI_Data/{geo_option}_2000_SVI.zip + # US + + base_path = f"Documents/Data/{year}_SVI_Data/CSV" + + if geographic_scale == "county": + county_token = get_county_token(year) + if location == "US": + location_path = f"SVI{year}_US_{county_token}.csv" + else: + location_path = f"States_Counties/{location}_{county_token}.csv" + + else: + if location == "US": + location_path = f"SVI{year}_US.csv" + else: + location_path = f"States/{location}.csv" + + return f"{base_path}/{location_path}" + + +# helper functions + + +def get_county_token(year: Year) -> str: + year = utilities.validate_year(year) + + if year not in ("2014", "2016", "2018"): + error_message = "Country geographic scale only available for SVI years: 2014, 2016, and 2018." + raise ValueError(error_message) + + return "CNTY" if year == "2014" else "COUNTY" + + +# Developer Notes: + +# crosswalk urls: +# 2014: https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/csv/SVIDocumentation_Table_Crosswalk_2014.csv +# 2016: https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/csv/SVIDocumentation_Table_Crosswalk_2016.csv + +# data dictionary urls: +# 2000: https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/csv/SVIDocumentation_Table_DataDictionary_2000.csv +# 2010: https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/csv/SVIDocumentation_Table_DataDictionary_2010.csv +# 2014: https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/csv/SVIDocumentation_Table_DataDictionary_2014.csv +# 2016: https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/csv/SVIDocumentation_Table_DataDictionary_2016.csv +# 2018: https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/csv/SVIDocumentation_Table_DataDictionary_2018.csv + +# documentation page urls: +# base_url_plus_path: https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/SVI_documentation_{year}.html + +# data dictionary urls: +# 2000: https://svi.cdc.gov/Documents/Data/2000_SVI_Data/SVI2000DataDictionary.pdf +# NOTE: does not appear that 2010 follows the above url convention + +# no data value = -999 +# +# 14 / 16 / 18 have same col names (in csv case) + +# CSVs +# 2000: +# census_tracts: +# ✔️ US: Documents/Data/2000_SVI_Data/CSV/SVI2000_US.csv +# ✔️ single_state: Documents/Data/2000_SVI_Data/CSV/States/{geo_option}.csv +# 2010: +# census_tracts: +# ✔️ US: Documents/Data/2010_SVI_Data/CSV/SVI2010_US.csv +# ✔ single_state: Documents/Data/2010_SVI_Data/CSV/States/{geo_option}.csv +# 2014: +# census_tracts: +# US: Documents/Data/2014_SVI_Data/CSV/SVI2014_US.csv +# single_state: Documents/Data/2014_SVI_Data/CSV/States/{geo_option}.csv +# counties: +# US: Documents/Data/2014_SVI_Data/CSV/SVI2014_US_CNTY.csv +# single_state: Documents/Data/2014_SVI_Data/CSV/States_Counties/{geo_option}_CNTY.csv +# 2016/2018: +# census_tracts: +# US: Documents/Data/{year}_SVI_Data/CSV/SVI{year}_US.csv +# single_state: Documents/Data/{year}_SVI_Data/CSV/States/{geo_option}.csv +# counties: +# US: Documents/Data/{year}_SVI_Data/CSV/SVI{year}_US_COUNTY.csv +# single_state: Documents/Data/{year}_SVI_Data/CSV/States_Counties/{geo_option}_COUNTY.csv + + +# SHP / GDBs +# 2000s case: +# census_tracts: +# US: Documents/Data/2000_SVI_Data/US_2000_SVI.zip # ESRI gdb +# single_state: https://svi.cdc.gov/Documents/Data/2000_SVI_Data/State_2000_SVI.zip # ESRI gdb + +# 2010s case: +# census_tracts: +# US: Documents/Data/2010_SVI_Data/SVI2010_US_03242014.zip +# States: Documents/Data/2010_SVI_Data/SVI2010_States_03242014.zip +# AllStates: Documents/Data/2010_SVI_Data/SVI2010_States_03242014.zip +# counties: +# US: Documents/Data/2010_SVI_Data/SVI2010_Counties.zip +# unknown_spatial_resolution: +# single_state: Documents/Data/2010_SVI_Data/States/{geo_option}_03242014.zip + +# csv: +# census_tracts: +# ✔️ US: Documents/Data/2010_SVI_Data/CSV/SVI2010_US.csv +# ✔ single_state: Documents/Data/2010_SVI_Data/CSV/States/{geo_option}.csv + +# 2014s case: +# shp: +# census_tracts: +# US: Documents/Data/2014_SVI_Data/SVI2014_US.zip +# single_state: Documents/Data/2014_SVI_Data/States/{geo_option).zip +# counties: +# US: Documents/Data/2014_SVI_Data/SVI2014_US_CNTY.zip +# single_state: Documents/Data/2014_SVI_Data/States_Counties/{geo_option}_CNTY.zip + +# csv: +# census_tracts: +# US: Documents/Data/2014_SVI_Data/CSV/SVI2014_US.csv +# single_state: Documents/Data/2014_SVI_Data/CSV/States/{geo_option}.csv +# counties: +# US: Documents/Data/2014_SVI_Data/CSV/SVI2014_US_CNTY.csv +# single_state: Documents/Data/2014_SVI_Data/CSV/States_Counties/{geo_option}_CNTY.csv + +# 2016s case: +# 2018s case: +# shp: +# census_tracts: +# US: Documents/Data/{year}_SVI_Data/SVI{year}_US.zip +# single_state: Documents/Data/{year}_SVI_Data/States/{geo_option).zip +# counties: +# US: Documents/Data/{year}_SVI_Data/SVI{year}_US_COUNTY.zip +# single_state: Documents/Data/{year}_SVI_Data/States_Counties/{geo_option}_CNTY.zip + +# csv: +# census_tracts: +# US: Documents/Data/{year}_SVI_Data/CSV/SVI{year}_US.csv +# single_state: Documents/Data/{year}_SVI_Data/CSV/States/{geo_option}.csv +# counties: +# US: Documents/Data/{year}_SVI_Data/CSV/SVI{year}_US_COUNTY.csv +# single_state: Documents/Data/{year}_SVI_Data/CSV/States_Counties/{geo_option}_COUNTY.csv + +# Get counties for a given SVI year +# https://svi.cdc.gov/xmldata/SVI{year}_Counties.xml + +# County level maps +# base url: https://svi.cdc.gov/Documents/CountyMaps/ +# +# example: +# https://svi.cdc.gov/Documents/CountyMaps/2014/Alabama/Alabama2014_Tuscaloosa.pdf + +# 2000: +# {state}{year}_{county}.pdf + +# 2010: +# {state}{year}_v2_{date-filename | county}.pdf + +# 2014 | 2016 | 2018: +# {state}{year}_{date-filename | county}.pdf +# https://svi.cdc.gov/Documents/CountyMaps/{year}/{state}/{state}{year}_{data-filename | county}.pdf diff --git a/python/svi_client/tests/test_integration.py b/python/svi_client/tests/test_integration.py new file mode 100644 index 00000000..85deae1c --- /dev/null +++ b/python/svi_client/tests/test_integration.py @@ -0,0 +1,78 @@ +import pytest +from hydrotools.svi_client import SVIClient + +LOCATIONS = ( + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "wa", + "wv", + "wi", + "wy", +) + +YEARS = ("2000", "2010", "2014", "2016", "2018") +GEOGRAPHIC_SCALES = ("county", "census_tract") + + +@pytest.mark.slow +@pytest.mark.parametrize("location", LOCATIONS) +@pytest.mark.parametrize("year", YEARS) +@pytest.mark.parametrize("scale", GEOGRAPHIC_SCALES) +def test_svi_client_get_integration(location, year, scale): + client = SVIClient(enable_cache=False) + df = client.get(location, scale, year) + assert df.loc[0, "state_abbreviation"] == location + + +@pytest.mark.slow +@pytest.mark.parametrize("year", YEARS) +@pytest.mark.parametrize("scale", GEOGRAPHIC_SCALES) +def test_svi_client_get_integration_us(year, scale): + client = SVIClient(enable_cache=False) + df = client.get("us", scale, year) + assert df.state_abbreviation.isin(LOCATIONS).all() diff --git a/python/svi_client/tests/test_svi_utilities.py b/python/svi_client/tests/test_svi_utilities.py new file mode 100644 index 00000000..d4781f4b --- /dev/null +++ b/python/svi_client/tests/test_svi_utilities.py @@ -0,0 +1,81 @@ +import pytest +from hydrotools.svi_client.types import utilities + +# test, validation +LOCATION_TESTS = ( + ("al", "al"), + ("ALABAMA", "al"), + ("alabama", "al"), + ("AlAbAmA", "al"), +) + + +@pytest.mark.parametrize("test,val", LOCATION_TESTS) +def test_validate_locations(test: str, val: str): + assert utilities.validate_location(test) == val + + +GEOGRAPHIC_SCALE_TESTS = (("census_tract", "census_tract"), ("county", "county")) + + +@pytest.mark.parametrize("test,val", GEOGRAPHIC_SCALE_TESTS) +def test_validate_geographic_scale(test, val): + assert utilities.validate_geographic_scale(test) == val + + +GEOGRAPHIC_CONTEXT_TESTS = (("national", "national"), ("state", "state")) + + +@pytest.mark.parametrize("test,val", GEOGRAPHIC_CONTEXT_TESTS) +def test_validate_geographic_context(test, val): + assert utilities.validate_geographic_context(test) == val + + +YEAR_TESTS = ( + (2000, "2000"), + (2010, "2010"), + (2014, "2014"), + (2016, "2016"), + (2018, "2018"), + ("2000", "2000"), + ("2010", "2010"), + ("2014", "2014"), + ("2016", "2016"), + ("2018", "2018"), +) + + +@pytest.mark.parametrize("test,val", YEAR_TESTS) +def test_validate_year(test, val): + assert utilities.validate_year(test) == val + + +# Tests for raising error + + +def test_validate_locations_raises_ValueError(): + test = "canada" + + with pytest.raises(ValueError): + utilities.validate_location(test) + + +def test_validate_geographic_scale_raises_ValueError(): + test = "some fake scale" + + with pytest.raises(ValueError): + utilities.validate_geographic_scale(test) + + +def test_validate_geographic_context_raises_ValueError(): + test = "some fake context" + + with pytest.raises(ValueError): + utilities.validate_geographic_context(test) + + +def test_validate_year_raises_ValueError(): + test = 1999 + + with pytest.raises(ValueError): + utilities.validate_year(test)