Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optionally include additional metadata with nwis_client get requests #214

Merged
merged 6 commits into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/nwis_client/src/hydrotools/nwis_client/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.2.1"
__version__ = "3.3.1"
75 changes: 60 additions & 15 deletions python/nwis_client/src/hydrotools/nwis_client/iv.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def get(
] = None,
period: Union[str, None] = None,
siteStatus: str = "all",
include_expanded_metadata: bool = False,
**params,
):
"""Return Pandas DataFrame of NWIS IV data.
Expand Down Expand Up @@ -189,6 +190,9 @@ def get(
siteStatus: str, optional, default 'all'
Site status in string format.
Options: 'all', 'active', 'inactive'
include_expanded_metadata: bool, default False
Setting to True will add latitude, longitude, srs, hucCd, stateCd, countyCd, and siteName
columns to the returned dataframe.
params:
Additional parameters passed directly to service.

Expand Down Expand Up @@ -244,6 +248,7 @@ def get(
endDT=endDT,
period=period,
siteStatus=siteStatus,
include_expanded_metadata=include_expanded_metadata,
**params,
)

Expand Down Expand Up @@ -284,8 +289,7 @@ def empty_df_warning_helper():

# Convert all times to UTC
dfs[self.value_time_label] = pd.to_datetime(
dfs["dateTime"], utc=True, infer_datetime_format=True
).dt.tz_localize(None)
dfs["dateTime"], utc=True).dt.tz_localize(None)

# Simplify variable name
dfs["variable_name"] = dfs["variableName"].apply(self.simplify_variable_name)
Expand All @@ -304,8 +308,18 @@ def empty_df_warning_helper():
"usgs_site_code",
"measurement_unit",
"qualifiers",
"series",
"series"
]
if include_expanded_metadata:
expanded_columns = [
"siteTypeCd",
"hucCd",
"countyCd",
"stateCd",
"siteName",
"srs"
]
cols += expanded_columns
dfs[cols] = dfs[cols].astype(str)
dfs[cols] = dfs[cols].astype(dtype="category")

Expand All @@ -315,17 +329,26 @@ def empty_df_warning_helper():
dfs[converted_float.columns] = converted_float

# DataFrame in semi-WRES compatible format
return dfs[
[
self.value_time_label,
"variable_name",
"usgs_site_code",
"measurement_unit",
"value",
"qualifiers",
"series",
]
output_columns = [
self.value_time_label,
"variable_name",
"usgs_site_code",
"measurement_unit",
"value",
"qualifiers",
"series",
]
if include_expanded_metadata:
expanded_column_mapping = {
"siteTypeCd": "site_type_code",
"hucCd": "huc_code",
"countyCd": "county_code",
"stateCd": "state_code",
"siteName": "site_name"
}
dfs = dfs.rename(columns=expanded_column_mapping)
output_columns += list(expanded_column_mapping.values()) + ["latitude", "longitude"]
return dfs[output_columns]

def get_raw(
self,
Expand Down Expand Up @@ -368,6 +391,7 @@ def get_raw(
period: Union[str, None] = None,
siteStatus: str = "all",
max_sites_per_request: int = 20,
include_expanded_metadata: bool = False,
**params,
) -> List[aiohttp.ClientResponse]:
"""
Expand Down Expand Up @@ -446,7 +470,8 @@ def get_raw(
results = self._restclient.mget(parameters=query_params, headers=self._headers)

# flatten list of lists
return [item for r in results for item in self._handle_response(r)]
return [item for r in results for item in self._handle_response(r,
include_expanded_metadata=include_expanded_metadata)]

def _handle_start_end_period_url_params(
self, startDT=None, endDT=None, period=None
Expand Down Expand Up @@ -556,7 +581,10 @@ def is_string_or_non_string_iterable(x) -> bool:
return params

@staticmethod
def _handle_response(raw_response: aiohttp.ClientResponse) -> List[dict]:
def _handle_response(
raw_response: aiohttp.ClientResponse,
include_expanded_metadata: bool = False
) -> List[dict]:
"""From a raw response, return a list of extracted sites in dictionary form.
Relevant dictionary keys are:

Expand Down Expand Up @@ -593,6 +621,19 @@ def extract_metadata(json_time_series):
"measurement_unit": json_time_series["variable"]["unit"]["unitCode"],
}

def extract_expanded_metadata(json_time_series):
# Add site type code, huc, county, and state codes
d = {s["name"]: s["value"] for s in json_time_series["sourceInfo"]["siteProperty"]}

# Add site name
d["siteName"] = json_time_series["sourceInfo"]["siteName"]

# Add geo coordinates
d["srs"] = json_time_series["sourceInfo"]["geoLocation"]["geogLocation"]["srs"]
d["latitude"] = json_time_series["sourceInfo"]["geoLocation"]["geogLocation"]["latitude"]
d["longitude"] = json_time_series["sourceInfo"]["geoLocation"]["geogLocation"]["longitude"]
return d

flattened_data = []

for response_value_timeSeries in deserialized_response["value"]["timeSeries"]:
Expand All @@ -602,6 +643,10 @@ def extract_metadata(json_time_series):
# Create general site metadata dictionary
site_metadata = extract_metadata(response_value_timeSeries)

# Add expanded metadata
if include_expanded_metadata:
site_metadata.update(extract_expanded_metadata(response_value_timeSeries))

# Add site time series values and its index number
site_metadata.update({"values": site_data["value"], "series": indicies})
flattened_data.append(site_metadata)
Expand Down
53 changes: 53 additions & 0 deletions python/nwis_client/tests/test_nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,13 @@ def test_get_value_time(setup_iv_value_time, sites, validation):
assert df["usgs_site_code"].isin(validation).all()
assert "value_time" in df


@pytest.mark.slow
def test_get_with_expanded_metadata(setup_iv):
"""Test expanded metadata option"""
df = setup_iv.get(sites=["01646500"], parameterCd="00060", include_expanded_metadata=True)
assert (df["huc_code"] == "02070008").all()

def test_get_raw_with_mock(setup_iv, monkeypatch):
"""Test data retrieval and parsing"""
import json
Expand All @@ -202,6 +209,52 @@ def requests_mock(*args, **kwargs):
data = setup_iv.get_raw(sites="01646500", parameterCd="00060,00065")
assert data[0]["usgs_site_code"] == "01646500"

def test_expanded_metadata(setup_iv, monkeypatch):
"""Test data retrieval and parsing"""
import json
from pathlib import Path
from hydrotools._restclient import RestClient

def requests_mock(*args, **kwargs):
json_text = json.loads(
(Path(__file__).resolve().parent / "nwis_test_data.json").read_text()
)
# key_word_args = {"_json": json_text}
return MockRequests(_json=json_text)

monkeypatch.setattr(RestClient, "get", requests_mock)

data = setup_iv.get_raw(sites="01646500", parameterCd="00060,00065", include_expanded_metadata=True)
assert data[0]["hucCd"] == "02070008"

def test_expanded_metadata_columns(setup_iv, monkeypatch):
"""Test data retrieval and parsing"""
import json
from pathlib import Path
from hydrotools._restclient import RestClient

def requests_mock(*args, **kwargs):
json_text = json.loads(
(Path(__file__).resolve().parent / "nwis_test_data.json").read_text()
)
# key_word_args = {"_json": json_text}
return MockRequests(_json=json_text)

monkeypatch.setattr(RestClient, "get", requests_mock)

data = setup_iv.get(sites="01646500", parameterCd="00060,00065", include_expanded_metadata=True)
extra_columns = [
"site_type_code",
"huc_code",
"county_code",
"state_code",
"site_name",
"latitude",
"longitude"
]
for c in extra_columns:
assert c in data.columns


def test_handle_response(setup_iv, monkeypatch):
import json
Expand Down