Skip to content

Commit

Permalink
894 escapes for polyfiles with multiple polylines (#906)
Browse files Browse the repository at this point in the history
* add separate boundary forcing block to the ext file for each polyline in the polyfile, both for tide and cmems

* deprecated `read_polyfile_as_gdf_points()` and `interp_regularnc_to_plipoints()`

* removed phyc from modelbuilder example and updated modelbuilder notebook

* added `validate_polyline_names()` for polyline name checking

* removed nPoints arguments

* repaired tests and added testcases

* updated whatsnew
  • Loading branch information
veenstrajelmer authored Jul 12, 2024
1 parent 62a1c8c commit 024c0db
Show file tree
Hide file tree
Showing 9 changed files with 309 additions and 269 deletions.
34 changes: 17 additions & 17 deletions dfm_tools/hydrolib_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,18 @@ def DataFrame_to_PolyObject(poly_pd,name,content=None):
return polyobject


def validate_polyline_names(polyfile_obj):
# TODO: not allowed to have empty or duplicated polyline names in a polyfile, this is not
# catched by hydrolib-core: https://github.com/Deltares/HYDROLIB-core/issues/483
# therefore, we check it here
names = [x.metadata.name for x in polyfile_obj.objects]
if len(set(names)) != len(names):
raise ValueError(f'duplicate polyline names found in polyfile: {names}')
first_alpha = [x[0].isalpha() for x in names]
if not all(first_alpha):
raise ValueError(f'names in polyfile do not all start with a letter: {names}')


def geodataframe_to_PolyFile(poly_gdf, name="L"):
"""
convert a geopandas geodataframe with x/y columns (and optional others like z/data/comment) to a hydrolib PolyFile
Expand All @@ -235,10 +247,6 @@ def geodataframe_to_PolyFile(poly_gdf, name="L"):
# catch some invalid occurences of name
if not isinstance(name, str):
raise TypeError("name should be a string")
if name == "":
raise ValueError("name is not allowed to be an empty string")
if not name[0].isalpha():
raise ValueError("name should start with a letter")

# add name column if not present
if 'name' not in poly_gdf.columns:
Expand All @@ -263,21 +271,13 @@ def geodataframe_to_PolyFile(poly_gdf, name="L"):
pointsobj_list = poly_geom_df.T.apply(dict).tolist()
for pnt in pointsobj_list:
pnt['data'] = []
polyobject = hcdfm.PolyObject(metadata={'name':name_str,'n_rows':poly_geom_np.shape[0],'n_columns':poly_geom_np.shape[1]}, points=pointsobj_list)
#if content is not None: # TODO: add support for content
# polyobject.description = {'content':content}
polyobject = hcdfm.PolyObject(metadata={'name':name_str,
'n_rows':poly_geom_np.shape[0],
'n_columns':poly_geom_np.shape[1]},
points=pointsobj_list)
polyfile_obj.objects.append(polyobject)

# TODO: not allowed to have empty or duplicated polyline names in a polyfile, this is not
# catched by hydrolib-core: https://github.com/Deltares/HYDROLIB-core/issues/483
# therefore, we check it here
names = [x.metadata.name for x in polyfile_obj.objects]
if len(set(names)) != len(names):
raise ValueError(f'duplicate polyline names found in polyfile: {names}')
first_alpha = [x[0].isalpha() for x in names]
if not all(first_alpha):
raise ValueError(f'names in polyfile do not all start with a letter: {names}')

validate_polyline_names(polyfile_obj)
return polyfile_obj


Expand Down
95 changes: 50 additions & 45 deletions dfm_tools/interpolate_grid2bnd.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import glob
import datetime as dt
import numpy as np
Expand All @@ -16,14 +17,14 @@
PolyFile_to_geodataframe_points,
get_ncbnd_construct,
da_from_gdf_points,
maybe_convert_fews_to_dfmt)
maybe_convert_fews_to_dfmt,
validate_polyline_names)
from dfm_tools.errors import OutOfRangeError

__all__ = ["get_conversion_dict",
"interpolate_tide_to_bc",
"interpolate_tide_to_plipoints",
"open_dataset_extra",
"interp_regularnc_to_plipoints",
"interp_regularnc_to_plipointsDataset",
"interp_uds_to_plipoints",
"interp_hisnc_to_plipoints",
Expand Down Expand Up @@ -115,13 +116,56 @@ def get_conversion_dict(ncvarname_updates={}):
return conversion_dict


def interpolate_tide_to_bc(tidemodel, file_pli, component_list=None, nPoints=None, load=True):
gdf_points = read_polyfile_as_gdf_points(file_pli, nPoints=nPoints)
def ext_add_boundary_object_per_polyline(ext_new:hcdfm.ExtModel, boundary_object:hcdfm.Boundary):
"""
If the polyfile contains multiple polylines, only the first
one will be used by DFLOW-FM for the boundary conditions.
This function will save each polyline as a separate polyfile
and add duplicate boundary entries for each polyline to the extfile.
"""
file_pli_main = boundary_object.locationfile.filepath
polyfile_obj = hcdfm.PolyFile(file_pli_main)
validate_polyline_names(polyfile_obj)
dir_output = os.path.dirname(file_pli_main)
for polyline_obj in polyfile_obj.objects:
if len(polyfile_obj.objects) > 1:
# copy to avoid location file to be the same for all duplicated boundary objects
boundary_object = boundary_object.copy()
# create a polyfile with a single plifile
polyfile_oneline_obj = hcdfm.PolyFile(objects=[polyline_obj])
file_pli_oneline = os.path.join(dir_output, f"{polyline_obj.metadata.name}.pli")
if str(file_pli_oneline) == str(file_pli_main):
raise ValueError("The names of one of the polylines in the polyfile is the "
"same as the polyfilename, resulting in the same filename")
polyfile_oneline_obj.save(file_pli_oneline)
# update locationfile to the plifile with a single polyline
boundary_object.locationfile = file_pli_oneline
# add boundary to extmodel
ext_new.boundary.append(boundary_object)


def interpolate_tide_to_bc(ext_new: hcdfm.ExtModel, tidemodel, file_pli, component_list=None, load=True):
# read polyfile as geodataframe
polyfile_object = hcdfm.PolyFile(file_pli)
gdf_points = PolyFile_to_geodataframe_points(polyfile_object)

# interpolate tidal components to plipoints
data_interp = interpolate_tide_to_plipoints(tidemodel=tidemodel, gdf_points=gdf_points,
component_list=component_list, load=load)

# convert interpolated xarray.Dataset to hydrolib ForcingModel and save as bc file
ForcingModel_object = plipointsDataset_to_ForcingModel(plipointsDataset=data_interp)
dir_output = os.path.dirname(file_pli)
file_bc_out = os.path.join(dir_output,f'tide_{tidemodel}.bc')
ForcingModel_object.save(filepath=file_bc_out)

return ForcingModel_object
# generate hydrolib-core Boundary object to be appended to the ext file
boundary_object = hcdfm.Boundary(quantity='waterlevelbnd', #the FM quantity for tide is also waterlevelbnd
locationfile=file_pli,
forcingfile=ForcingModel_object)

# add the boundary object to the ext file for each polyline in the polyfile
ext_add_boundary_object_per_polyline(ext_new=ext_new, boundary_object=boundary_object)


def tidemodel_componentlist(tidemodel:str, convention:bool):
Expand Down Expand Up @@ -368,42 +412,6 @@ def open_dataset_extra(dir_pattern, quantity, tstart, tstop, conversion_dict=Non
return data_xr_vars


def read_polyfile_as_gdf_points(file_pli, nPoints=None):
# read polyfile
polyfile_object = hcdfm.PolyFile(file_pli)

# warn if the polyfile contains multiple polylines
if len(polyfile_object.objects) > 1:
logger.warning(f"The polyfile {file_pli} contains multiple polylines. "
"Only the first one will be used by DFLOW-FM for the boundary conditions.")
#TODO after issue UNST-7012 is properly solved, remove this warning

# check if polyobj names in plifile are unique
polynames_pd = pd.Series([polyobj.metadata.name for polyobj in polyfile_object.objects])
if polynames_pd.duplicated().any():
raise ValueError(f'Duplicate polyobject names in polyfile {file_pli}, this is not allowed:\n{polynames_pd}')

gdf_points = PolyFile_to_geodataframe_points(polyfile_object)

# only use testset of n first points of polyfile
gdf_points = gdf_points.iloc[:nPoints]
return gdf_points


def interp_regularnc_to_plipoints(data_xr_reg, file_pli, nPoints=None, load=True):

"""
load: interpolation errors are only raised upon loading, so do this per default
"""
# TODO: consider phasing out, this function is probably only used in
# tests/examples/preprocess_interpolate_nc_to_bc.py and dfm_tools/modelbuilder.py

gdf_points = read_polyfile_as_gdf_points(file_pli, nPoints=nPoints)

data_interp = interp_regularnc_to_plipointsDataset(data_xr_reg, gdf_points=gdf_points, load=load)
return data_interp


def interp_regularnc_to_plipointsDataset(data_xr_reg, gdf_points, load=True):

ncbnd_construct = get_ncbnd_construct()
Expand Down Expand Up @@ -449,7 +457,7 @@ def interp_regularnc_to_plipointsDataset(data_xr_reg, gdf_points, load=True):
return data_interp_loaded


def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame, nPoints:int=None) -> xr.Dataset:
def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame) -> xr.Dataset:
"""
To interpolate an unstructured dataset (like a _map.nc file) read with xugrid to plipoint locations
Expand All @@ -459,8 +467,6 @@ def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame, nPo
dfm model output read using dfm_tools. Dims: mesh2d_nLayers, mesh2d_nInterfaces, time, mesh2d_nNodes, mesh2d_nFaces, mesh2d_nMax_face_nodes, mesh2d_nEdges.
gdf : geopandas.GeoDataFrame (str/path is also supported)
gdf with location, geometry (Point) and crs corresponding to model crs.
nPoints : int, optional
amount of points (None gives all). The default is None.
Raises
------
Expand All @@ -481,7 +487,6 @@ def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame, nPo
if isinstance(gdf,(str,Path)): #TODO: align plipoints/gdf with other functions, now three input types are supported, but the interp_regularnc_to_plipoints requires paths to plifiles (and others also)
gdf = PolyFile_to_geodataframe_points(hcdfm.PolyFile(gdf))

gdf = gdf.iloc[:nPoints]
ds = uds.ugrid.sel_points(x=gdf.geometry.x, y=gdf.geometry.y)
#TODO: drop mesh2d_face_x and mesh2d_face_y variables

Expand Down
24 changes: 13 additions & 11 deletions dfm_tools/modelbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from hydrolib.core.dimr.models import DIMR, FMComponent, Start
from hydrolib.core.utils import get_path_style_for_current_operating_system
from dfm_tools.hydrolib_helpers import get_ncbnd_construct
from dfm_tools.interpolate_grid2bnd import ext_add_boundary_object_per_polyline

__all__ = [
"cmems_nc_to_bc",
Expand All @@ -22,11 +23,10 @@

def cmems_nc_to_bc(ext_bnd, list_quantities, tstart, tstop, file_pli, dir_pattern, dir_output, conversion_dict=None, refdate_str=None):
#input examples in https://github.com/Deltares/dfm_tools/blob/main/tests/examples/preprocess_interpolate_nc_to_bc.py

# TODO: rename ext_bnd to ext_new for consistency
if conversion_dict is None:
conversion_dict = dfmt.get_conversion_dict()

file_bc_basename = os.path.basename(file_pli).replace('.pli','')
for quantity in list_quantities:
print(f'processing quantity: {quantity}')

Expand All @@ -39,22 +39,24 @@ def cmems_nc_to_bc(ext_bnd, list_quantities, tstart, tstop, file_pli, dir_patter
tstart=tstart, tstop=tstop,
conversion_dict=conversion_dict,
refdate_str=refdate_str)
#interpolate regulargridDataset to plipointsDataset
data_interp = dfmt.interp_regularnc_to_plipoints(data_xr_reg=data_xr_vars, file_pli=file_pli)
# interpolate regulargridDataset to plipointsDataset
polyfile_obj = hcdfm.PolyFile(file_pli)
gdf_points = dfmt.PolyFile_to_geodataframe_points(polyfile_object=polyfile_obj)
data_interp = dfmt.interp_regularnc_to_plipointsDataset(data_xr_reg=data_xr_vars, gdf_points=gdf_points, load=True)

#convert plipointsDataset to hydrolib ForcingModel
ForcingModel_object = dfmt.plipointsDataset_to_ForcingModel(plipointsDataset=data_interp)

file_bc_out = os.path.join(dir_output,f'{quantity}_{file_bc_basename}_CMEMS.bc')

# generate boundary object for the ext file (quantity, pli-filename, bc-filename)
file_bc_out = os.path.join(dir_output,f'{quantity}_CMEMS.bc')
ForcingModel_object.save(filepath=file_bc_out)

#generate boundary object for the ext file (quantity, pli-filename, bc-filename)
boundary_object = hcdfm.Boundary(quantity=quantity,
locationfile=file_pli,
locationfile=file_pli, #placeholder, will be replaced later on
forcingfile=ForcingModel_object)
ext_bnd.boundary.append(boundary_object)


# add the boundary object to the ext file for each polyline in the polyfile
ext_add_boundary_object_per_polyline(ext_new=ext_bnd, boundary_object=boundary_object)

return ext_bnd


Expand Down
286 changes: 121 additions & 165 deletions docs/notebooks/modelbuilder_example.ipynb

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
- improved usability of `dfmt.LineBuilder()` in [#854](https://github.com/Deltares/dfm_tools/pull/854)
- added workaround for grids that are not orthogonal after cutting the land with `dfmt.meshkernel_get_illegalcells()` in [#866](https://github.com/Deltares/dfm_tools/pull/866)
- updated CMEMS bcg multiyear dataset name in [#880](https://github.com/Deltares/dfm_tools/pull/880)
- added CMEMS reananalysis-interim (myint) datasets to `dfmt.download_CMEMS()` in [#883](https://github.com/Deltares/dfm_tools/pull/883)
- added CMEMS reananalysis-interim (myint) datasets to `dfmt.download_CMEMS()` in [#883](https://github.com/Deltares/dfm_tools/pull/883) and [#903](https://github.com/Deltares/dfm_tools/pull/903)
- avoid duplicate and empty polyline names in `dfmt.geodataframe_to_PolyFile()` in [#896](https://github.com/Deltares/dfm_tools/pull/896)
- support for multiple polylines per polyfile in `interpolate_tide_to_bc()` and `cmems_nc_to_bc()` in [#906](https://github.com/Deltares/dfm_tools/pull/906)
- add to ext as part of `interpolate_tide_to_bc()`, it now requires `ext_new` and has no return value anymore in [#906](https://github.com/Deltares/dfm_tools/pull/906)

### Fix
- cleanups for datasets retrieved with `dfmt.ssh_retrieve_data()` in [#867](https://github.com/Deltares/dfm_tools/pull/867)
Expand Down
11 changes: 7 additions & 4 deletions tests/examples/preprocess_interpolate_nc_to_bc.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,13 @@
tstart=tstart, tstop=tstop,
conversion_dict=conversion_dict,
refdate_str=refdate_str)
#interpolate regulargridDataset to plipointsDataset
data_interp = dfmt.interp_regularnc_to_plipoints(data_xr_reg=data_xr_vars, file_pli=file_pli, #TODO: difference in .interp() with float vs da arguments: https://github.com/Deltares/dfm_tools/issues/287
nPoints=nPoints) #argument for testing
#data_interp = data_interp.ffill(dim="node").bfill(dim="node") #to fill allnan plipoints with values from the neighbour point #TODO: this also fills the belowbed layers from one point onto another, so should be done after ffill/bfill in depth dimension. Currently all-nan arrays are replaced with .fillna(0)
# read polyfile as geodataframe
polyfile_object = hcdfm.PolyFile(file_pli)
gdf_points_all = dfmt.PolyFile_to_geodataframe_points(polyfile_object)
gdf_points = gdf_points_all.iloc[:nPoints]
# interpolate regulargridDataset to plipointsDataset
data_interp = dfmt.interp_regularnc_to_plipointsDataset(data_xr_reg=data_xr_vars, gdf_points=gdf_points, load=True)
# data_interp = data_interp.ffill(dim="node").bfill(dim="node") #to fill allnan plipoints with values from the neighbour point #TODO: this also fills the belowbed layers from one point onto another, so should be done after ffill/bfill in depth dimension. Currently all-nan arrays are replaced with .fillna(0)

#convert plipointsDataset to hydrolib ForcingModel
ForcingModel_object = dfmt.plipointsDataset_to_ForcingModel(plipointsDataset=data_interp)
Expand Down
13 changes: 3 additions & 10 deletions tests/examples/preprocess_modelbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,22 +117,15 @@
ext_new = hcdfm.ExtModel()

# FES2014 tidal components bc file
file_bc_basename = os.path.basename(poly_file).replace('.pli','')
tidemodel = 'EOT20' # tidemodel: FES2014, FES2012, EOT20, GTSMv4.1, GTSMv4.1_opendap
ForcingModel_object = dfmt.interpolate_tide_to_bc(tidemodel=tidemodel, file_pli=poly_file, component_list=None)
file_bc_out = os.path.join(dir_output,f'tide_{file_bc_basename}_{tidemodel}.bc')
ForcingModel_object.save(filepath=file_bc_out)
boundary_object = hcdfm.Boundary(quantity='waterlevelbnd', #the FM quantity for tide is also waterlevelbnd
locationfile=poly_file,
forcingfile=ForcingModel_object)
ext_new.boundary.append(boundary_object)
dfmt.interpolate_tide_to_bc(ext_new=ext_new, tidemodel=tidemodel, file_pli=poly_file, component_list=None)

# CMEMS - download
# you can also add WAQ variables like 'no3' and 'phyc'
# check dfmt.get_conversion_dict() for an overview of parameter/quantity names
dir_output_data_cmems = os.path.join(dir_output_data, 'cmems')
os.makedirs(dir_output_data_cmems, exist_ok=True)
for varkey in ['zos','so','thetao','uo','vo','no3','phyc']:
for varkey in ['zos','so','thetao','uo','vo','no3']:#,'phyc']: # TODO: phyc not available in reanalysis: https://github.com/Deltares/dfm_tools/issues/847
dfmt.download_CMEMS(varkey=varkey,
longitude_min=lon_min, longitude_max=lon_max, latitude_min=lat_min, latitude_max=lat_max,
date_min=date_min, date_max=date_max,
Expand All @@ -144,7 +137,7 @@
# when supplying two waterlevelbnds to FM (tide and steric) with other quantities in between, dimrset>=2.24.00 is required
# or else "ERROR : update_ghostboundvals: not all ghost boundary flowlinks are being updated" is raised (https://issuetracker.deltares.nl/browse/UNST-7011).
# Two waterlevelbnds need to share same physical plifile in order to be appended (https://issuetracker.deltares.nl/browse/UNST-5320).
list_quantities = ['waterlevelbnd','salinitybnd','temperaturebnd','uxuyadvectionvelocitybnd','tracerbndNO3','tracerbndPON1']
list_quantities = ['waterlevelbnd','salinitybnd','temperaturebnd','uxuyadvectionvelocitybnd','tracerbndNO3']#,'tracerbndPON1']
dir_pattern = os.path.join(dir_output_data_cmems,'cmems_{ncvarname}_*.nc')
ext_new = dfmt.cmems_nc_to_bc(ext_bnd=ext_new,
refdate_str=f'minutes since {ref_date} 00:00:00 +00:00',
Expand Down
6 changes: 3 additions & 3 deletions tests/test_hydrolib_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,15 @@ def test_geodataframe_to_PolyFile_name_invalidtype(bnd_gdf):
def test_geodataframe_to_PolyFile_name_incorrect(bnd_gdf):
with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf, name='1')
assert 'name should start with a letter' in str(e.value)
assert 'names in polyfile do not all start with a letter' in str(e.value)

with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf, name='-')
assert 'name should start with a letter' in str(e.value)
assert 'names in polyfile do not all start with a letter' in str(e.value)

with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf, name='')
assert 'name is not allowed to be an empty string' in str(e.value)
assert 'names in polyfile do not all start with a letter' in str(e.value)


@pytest.mark.unittest
Expand Down
Loading

0 comments on commit 024c0db

Please sign in to comment.