Skip to content

Commit

Permalink
feat(nhdplus_utils.read_nhdplus_hr): support reading multiple geodata…
Browse files Browse the repository at this point in the history
…bases; add test
  • Loading branch information
aleaf committed Oct 16, 2024
1 parent 3e131bc commit 3307021
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 40 deletions.
115 changes: 81 additions & 34 deletions sfrmaker/nhdplus_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
import time
import warnings
import numpy as np
import pandas as pd
import geopandas as gpd
from gisutils import shp2df, get_shapefile_crs
Expand Down Expand Up @@ -260,44 +261,90 @@ def read_nhdplus(shapefiles, bbox_filter=None,
return df


def read_nhdplus_hr(NHDPlusHR_path, bbox_filter=None, drop_fcodes=None):
def read_nhdplus_hr(NHDPlusHR_paths, bbox_filter=None, drop_fcodes=None):
"""_summary_
Parameters
----------
NHDPlusHR_paths : str, pathlike or sequence of paths
NHDPlus High Resolution (HR) geodatabases.
bbox_filter : tuple or polygon shapefile, optional
Only read NHDPlus HR content within this area,
by default None
drop_fcodes : int or sequence of ints, optional
Exclude NHDPlus data with these FCodes,
by default None
Returns
-------
fl : pandas DataFrame
NHDPlus HR Flowlines including the following attributes:
========== ===========================================================================
NHDPlusID Unique identifier for each flowline and it's attributes
ArbolateSu Total length of upstream drainage, in km (from NHDPlusFlowlineVAA table)
StreamOrde Strahler stream order (from NHDPlusFlowlineVAA table)
MaxElevSmo Starting elevation for each flowline, in cm (from NHDPlusFlowlineVAA table)
MinElevSmo Ending elevation for each flowline, in cm (from NHDPlusFlowlineVAA table)
Divergence Distributary number (from NHDPlusFlowlineVAA table)
ToNHDPID Next downstream NHDPlusID (from NHDPlusFlow table)
========== ===========================================================================
Notes
-----
* At divergences where there are multiple downstream flowlines,
those with Divergence == 2 (minor distributaries) are removed,
so that there is only one downstream NHDPlusID.
"""
ta = time.time()
print('reading {}...'.format(NHDPlusHR_path))
# read NHDFLowlines from NHDPlusHR_path (NHDPlus HR OpenFileGDB)
fl = gpd.read_file(NHDPlusHR_path, driver='OpenFileGDB', layer='NHDFlowline')
# get crs information from flowlines
fl_crs = fl.crs

if filter is not None:
print('filtering flowlines...')

# ensure that filter bbox is in same crs as flowlines
# get filters from shapefiles, shapley Polygons or GeoJSON polygons
if bbox_filter is not None:
if bbox_filter is not isinstance(bbox_filter, tuple):
bbox_filter = get_bbox(bbox_filter, dest_crs=fl_crs)
if isinstance(NHDPlusHR_paths, str) or isinstance(NHDPlusHR_paths, Path):
NHDPlusHR_paths = [NHDPlusHR_paths]
fls = []
for f in NHDPlusHR_paths:
print(f'reading {f}...')
fl = gpd.read_file(f, driver='OpenFileGDB', layer='NHDFlowline')
# get crs information from flowlines
fl_crs = fl.crs

# filter to bbox using geopandas spatial indexing
fl = fl.cx[bbox_filter[0]:bbox_filter[2], bbox_filter[1]:bbox_filter[3]]
if filter is not None:
print('filtering flowlines...')

# read NHDPlusFlowlineVAA file from NHDPlusHR_path (NHDPlus HR OpenFileGDB) and merge with flowlines
flvaa = gpd.read_file(NHDPlusHR_path, driver='OpenFileGDB', layer='NHDPlusFlowlineVAA')
fl = fl.merge(flvaa[['NHDPlusID', 'ArbolateSu','StreamOrde', 'MaxElevSmo', 'MinElevSmo', 'Divergence']],
on='NHDPlusID', how='left'
)

# read NHDPlusFlow file from NHDPlusHR_path (NHDPlus HR OpenFileGDB)
pf = gpd.read_file(NHDPlusHR_path, driver='OpenFileGDB', layer='NHDPlusFlow')

# Remove features classified as minor divergence pathways (Divergence == 2)
# from PlusFlow table
pf_routing_dict = get_hr_routing(pf, fl)

# Add routing information from PlusFlow table.
# Set any remaining comids not in fromcomid_list to zero
# (outlets or inlets from outside model)
fl['ToNHDPID'] = [pf_routing_dict[i] if i in pf_routing_dict else 0.0 for i in fl.NHDPlusID]
print("finished in {:.2f}s\n".format(time.time() - ta))
# ensure that filter bbox is in same crs as flowlines
# get filters from shapefiles, shapley Polygons or GeoJSON polygons
if bbox_filter is not None:
if not isinstance(bbox_filter, tuple):
bbox_filter = get_bbox(bbox_filter, dest_crs=fl_crs)

# filter to bbox using geopandas spatial indexing
fl = fl.cx[bbox_filter[0]:bbox_filter[2], bbox_filter[1]:bbox_filter[3]]

# read NHDPlusFlowlineVAA file from NHDPlusHR_path (NHDPlus HR OpenFileGDB) and merge with flowlines
flvaa = gpd.read_file(f, driver='OpenFileGDB', layer='NHDPlusFlowlineVAA')
fl = fl.merge(flvaa[['NHDPlusID', 'ArbolateSu','StreamOrde', 'MaxElevSmo', 'MinElevSmo', 'Divergence']],
on='NHDPlusID', how='left'
)

# read NHDPlusFlow file from NHDPlusHR_path (NHDPlus HR OpenFileGDB)
pf = gpd.read_file(f, driver='OpenFileGDB', layer='NHDPlusFlow')

# Remove features classified as minor divergence pathways (Divergence == 2)
# from PlusFlow table
pf_routing_dict = get_hr_routing(pf, fl)

# Add routing information from PlusFlow table.
# Set any remaining comids not in fromcomid_list to zero
# (outlets or inlets from outside model)
fl['ToNHDPID'] = [pf_routing_dict[i] if i in pf_routing_dict else 0.0 for i in fl.NHDPlusID]
print("finished in {:.2f}s\n".format(time.time() - ta))
fls.append(fl)
fl = pd.concat(fls, axis=0)
if drop_fcodes is not None:
if np.isscalar(drop_fcodes):
keep = fl['FCode'] != drop_fcodes
else:
keep = ~fl['FCode'].isin(drop_fcodes)
fl = fl.loc[keep].copy()
return fl


Expand Down
9 changes: 4 additions & 5 deletions sfrmaker/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,12 @@ def test_data_path():

@pytest.fixture(scope="session", autouse=True)
def outdir(project_root_path):
folder = os.path.join(project_root_path, 'sfrmaker/test/temp/')
folder = project_root_path / 'sfrmaker/test/temp/'
reset = True
if reset:
if os.path.isdir(folder):
shutil.rmtree(folder)
os.makedirs(folder)
return Path(folder)
shutil.rmtree(folder, ignore_errors=True)
folder.mkdir()
return folder


@pytest.fixture(scope="session")
Expand Down
33 changes: 32 additions & 1 deletion sfrmaker/test/test_nhdplus_utils.py
Original file line number Diff line number Diff line change
@@ -1 +1,32 @@
# TODO: add unit tests for test_nhdplus_utils.py
"""Unit tests for test_nhdplus_utils.py"""
import pytest
from sfrmaker.nhdplus_utils import (
read_nhdplus_hr
)


@pytest.mark.parametrize('nhdplus_files,bbox_filter,expected_nrows,drop_fcodes', (
('neversink_rondout/NHDPlus_HR_1.gdb',
(-74.68061523310072, 41.74556802490179, -74.44311875132166, 41.848862965337595),
132, None),
('neversink_rondout/NHDPlus_HR_1.gdb',
'examples/neversink_rondout/Model_Extent.shp',
75, 55800),
(['neversink_rondout/NHDPlus_HR_1.gdb', 'neversink_rondout/NHDPlus_HR_2.gdb'],
'examples/neversink_rondout/Model_Extent.shp', 386, None
)
)
)
def test_read_nhdplus_hr(nhdplus_files, bbox_filter, expected_nrows, drop_fcodes,
datapath):

if not isinstance(nhdplus_files, str):
NHDPlusHR_paths = [datapath / f for f in nhdplus_files]
else:
NHDPlusHR_paths = datapath / nhdplus_files
results = read_nhdplus_hr(NHDPlusHR_paths, bbox_filter=bbox_filter,
drop_fcodes=drop_fcodes)
expected_cols = ['NHDPlusID', 'ArbolateSu','StreamOrde', 'MaxElevSmo',
'MinElevSmo', 'Divergence', 'ToNHDPID']
assert not set(expected_cols).difference(results.columns)
assert len(results) == expected_nrows

0 comments on commit 3307021

Please sign in to comment.