894 escapes for polyfiles with multiple polylines (#906)

* add separate boundary forcing block to the ext file for each polyline in the polyfile, both for tide and cmems * deprecated `read_polyfile_as_gdf_points()` and `interp_regularnc_to_plipoints()` * removed phyc from modelbuilder example and updated modelbuilder notebook * added `validate_polyline_names()` for polyline name checking * removed nPoints arguments * repaired tests and added testcases * updated whatsnew
Deltares · Jul 12, 2024 · 024c0db · 024c0db
1 parent 62a1c8c
commit 024c0db
Show file tree

Hide file tree

Showing 9 changed files with 309 additions and 269 deletions.
diff --git a/dfm_tools/hydrolib_helpers.py b/dfm_tools/hydrolib_helpers.py
@@ -227,6 +227,18 @@ def DataFrame_to_PolyObject(poly_pd,name,content=None):
     return polyobject
 
 
+def validate_polyline_names(polyfile_obj):
+    # TODO: not allowed to have empty or duplicated polyline names in a polyfile, this is not 
+    # catched by hydrolib-core: https://github.com/Deltares/HYDROLIB-core/issues/483
+    # therefore, we check it here
+    names = [x.metadata.name for x in polyfile_obj.objects]
+    if len(set(names)) != len(names):
+        raise ValueError(f'duplicate polyline names found in polyfile: {names}')
+    first_alpha = [x[0].isalpha() for x in names]
+    if not all(first_alpha):
+        raise ValueError(f'names in polyfile do not all start with a letter: {names}')
+
+
 def geodataframe_to_PolyFile(poly_gdf, name="L"):
     """
     convert a geopandas geodataframe with x/y columns (and optional others like z/data/comment) to a hydrolib PolyFile
@@ -235,10 +247,6 @@ def geodataframe_to_PolyFile(poly_gdf, name="L"):
     # catch some invalid occurences of name
     if not isinstance(name, str):
         raise TypeError("name should be a string")
-    if name == "":
-        raise ValueError("name is not allowed to be an empty string")
-    if not name[0].isalpha():
-        raise ValueError("name should start with a letter")
 
     # add name column if not present
     if 'name' not in poly_gdf.columns:
@@ -263,21 +271,13 @@ def geodataframe_to_PolyFile(poly_gdf, name="L"):
         pointsobj_list = poly_geom_df.T.apply(dict).tolist()
         for pnt in pointsobj_list:
             pnt['data'] = []
-        polyobject = hcdfm.PolyObject(metadata={'name':name_str,'n_rows':poly_geom_np.shape[0],'n_columns':poly_geom_np.shape[1]}, points=pointsobj_list)
-        #if content is not None: # TODO: add support for content
-        #    polyobject.description = {'content':content}
+        polyobject = hcdfm.PolyObject(metadata={'name':name_str,
+                                                'n_rows':poly_geom_np.shape[0],
+                                                'n_columns':poly_geom_np.shape[1]}, 
+                                      points=pointsobj_list)
         polyfile_obj.objects.append(polyobject)
 
-    # TODO: not allowed to have empty or duplicated polyline names in a polyfile, this is not 
-    # catched by hydrolib-core: https://github.com/Deltares/HYDROLIB-core/issues/483
-    # therefore, we check it here
-    names = [x.metadata.name for x in polyfile_obj.objects]
-    if len(set(names)) != len(names):
-        raise ValueError(f'duplicate polyline names found in polyfile: {names}')
-    first_alpha = [x[0].isalpha() for x in names]
-    if not all(first_alpha):
-        raise ValueError(f'names in polyfile do not all start with a letter: {names}')
-
+    validate_polyline_names(polyfile_obj)
     return polyfile_obj
 
 

diff --git a/dfm_tools/interpolate_grid2bnd.py b/dfm_tools/interpolate_grid2bnd.py
@@ -1,3 +1,4 @@
+import os
 import glob
 import datetime as dt
 import numpy as np
@@ -16,14 +17,14 @@
                                         PolyFile_to_geodataframe_points, 
                                         get_ncbnd_construct,
                                         da_from_gdf_points,
-                                        maybe_convert_fews_to_dfmt)
+                                        maybe_convert_fews_to_dfmt,
+                                        validate_polyline_names)
 from dfm_tools.errors import OutOfRangeError
 
 __all__ = ["get_conversion_dict",
            "interpolate_tide_to_bc",
            "interpolate_tide_to_plipoints",
            "open_dataset_extra",
-           "interp_regularnc_to_plipoints",
            "interp_regularnc_to_plipointsDataset",
            "interp_uds_to_plipoints",
            "interp_hisnc_to_plipoints",
@@ -115,13 +116,56 @@ def get_conversion_dict(ncvarname_updates={}):
     return conversion_dict
 
 
-def interpolate_tide_to_bc(tidemodel, file_pli, component_list=None, nPoints=None, load=True):
-    gdf_points = read_polyfile_as_gdf_points(file_pli, nPoints=nPoints)
+def ext_add_boundary_object_per_polyline(ext_new:hcdfm.ExtModel, boundary_object:hcdfm.Boundary):
+    """
+    If the polyfile contains multiple polylines, only the first 
+    one will be used by DFLOW-FM for the boundary conditions.
+    This function will save each polyline as a separate polyfile 
+    and add duplicate boundary entries for each polyline to the extfile.
+    """
+    file_pli_main = boundary_object.locationfile.filepath
+    polyfile_obj = hcdfm.PolyFile(file_pli_main)
+    validate_polyline_names(polyfile_obj)
+    dir_output = os.path.dirname(file_pli_main)
+    for polyline_obj in polyfile_obj.objects:
+        if len(polyfile_obj.objects) > 1:
+            # copy to avoid location file to be the same for all duplicated boundary objects
+            boundary_object = boundary_object.copy()
+            # create a polyfile with a single plifile
+            polyfile_oneline_obj = hcdfm.PolyFile(objects=[polyline_obj])
+            file_pli_oneline = os.path.join(dir_output, f"{polyline_obj.metadata.name}.pli")
+            if str(file_pli_oneline) == str(file_pli_main):
+                raise ValueError("The names of one of the polylines in the polyfile is the "
+                                 "same as the polyfilename, resulting in the same filename")
+            polyfile_oneline_obj.save(file_pli_oneline)
+            # update locationfile to the plifile with a single polyline
+            boundary_object.locationfile = file_pli_oneline
+        # add boundary to extmodel
+        ext_new.boundary.append(boundary_object)
+
+
+def interpolate_tide_to_bc(ext_new: hcdfm.ExtModel, tidemodel, file_pli, component_list=None, load=True):
+    # read polyfile as geodataframe
+    polyfile_object = hcdfm.PolyFile(file_pli)
+    gdf_points = PolyFile_to_geodataframe_points(polyfile_object)
+
+    # interpolate tidal components to plipoints
     data_interp = interpolate_tide_to_plipoints(tidemodel=tidemodel, gdf_points=gdf_points, 
                                                 component_list=component_list, load=load)
+
+    # convert interpolated xarray.Dataset to hydrolib ForcingModel and save as bc file
     ForcingModel_object = plipointsDataset_to_ForcingModel(plipointsDataset=data_interp)
+    dir_output = os.path.dirname(file_pli)
+    file_bc_out = os.path.join(dir_output,f'tide_{tidemodel}.bc')
+    ForcingModel_object.save(filepath=file_bc_out)
 
-    return ForcingModel_object
+    # generate hydrolib-core Boundary object to be appended to the ext file
+    boundary_object = hcdfm.Boundary(quantity='waterlevelbnd', #the FM quantity for tide is also waterlevelbnd
+                                      locationfile=file_pli,
+                                      forcingfile=ForcingModel_object)
+
+    # add the boundary object to the ext file for each polyline in the polyfile
+    ext_add_boundary_object_per_polyline(ext_new=ext_new, boundary_object=boundary_object)
 
 
 def tidemodel_componentlist(tidemodel:str, convention:bool):
@@ -368,42 +412,6 @@ def open_dataset_extra(dir_pattern, quantity, tstart, tstop, conversion_dict=Non
     return data_xr_vars
 
 
-def read_polyfile_as_gdf_points(file_pli, nPoints=None):
-    # read polyfile
-    polyfile_object = hcdfm.PolyFile(file_pli)
-
-    # warn if the polyfile contains multiple polylines
-    if len(polyfile_object.objects) > 1:
-        logger.warning(f"The polyfile {file_pli} contains multiple polylines. "
-                       "Only the first one will be used by DFLOW-FM for the boundary conditions.")
-        #TODO after issue UNST-7012 is properly solved, remove this warning
-
-    # check if polyobj names in plifile are unique
-    polynames_pd = pd.Series([polyobj.metadata.name for polyobj in polyfile_object.objects])
-    if polynames_pd.duplicated().any():
-        raise ValueError(f'Duplicate polyobject names in polyfile {file_pli}, this is not allowed:\n{polynames_pd}')
-
-    gdf_points = PolyFile_to_geodataframe_points(polyfile_object)
-
-    # only use testset of n first points of polyfile
-    gdf_points = gdf_points.iloc[:nPoints]
-    return gdf_points
-
-
-def interp_regularnc_to_plipoints(data_xr_reg, file_pli, nPoints=None, load=True):
-
-    """
-    load: interpolation errors are only raised upon loading, so do this per default
-    """
-    # TODO: consider phasing out, this function is probably only used in 
-    # tests/examples/preprocess_interpolate_nc_to_bc.py and dfm_tools/modelbuilder.py
-
-    gdf_points = read_polyfile_as_gdf_points(file_pli, nPoints=nPoints)
-
-    data_interp = interp_regularnc_to_plipointsDataset(data_xr_reg, gdf_points=gdf_points, load=load)
-    return data_interp
-
-
 def interp_regularnc_to_plipointsDataset(data_xr_reg, gdf_points, load=True):
 
     ncbnd_construct = get_ncbnd_construct()
@@ -449,7 +457,7 @@ def interp_regularnc_to_plipointsDataset(data_xr_reg, gdf_points, load=True):
     return data_interp_loaded
 
 
-def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame, nPoints:int=None) -> xr.Dataset:
+def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame) -> xr.Dataset:
     """
     To interpolate an unstructured dataset (like a _map.nc file) read with xugrid to plipoint locations
     
@@ -459,8 +467,6 @@ def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame, nPo
         dfm model output read using dfm_tools. Dims: mesh2d_nLayers, mesh2d_nInterfaces, time, mesh2d_nNodes, mesh2d_nFaces, mesh2d_nMax_face_nodes, mesh2d_nEdges.
     gdf : geopandas.GeoDataFrame (str/path is also supported)
         gdf with location, geometry (Point) and crs corresponding to model crs.
-    nPoints : int, optional
-        amount of points (None gives all). The default is None.
 
     Raises
     ------
@@ -481,7 +487,6 @@ def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame, nPo
     if isinstance(gdf,(str,Path)): #TODO: align plipoints/gdf with other functions, now three input types are supported, but the interp_regularnc_to_plipoints requires paths to plifiles (and others also)
         gdf = PolyFile_to_geodataframe_points(hcdfm.PolyFile(gdf))
 
-    gdf = gdf.iloc[:nPoints]
     ds = uds.ugrid.sel_points(x=gdf.geometry.x, y=gdf.geometry.y)
     #TODO: drop mesh2d_face_x and mesh2d_face_y variables
 

diff --git a/dfm_tools/modelbuilder.py b/dfm_tools/modelbuilder.py
@@ -7,6 +7,7 @@
 from hydrolib.core.dimr.models import DIMR, FMComponent, Start
 from hydrolib.core.utils import get_path_style_for_current_operating_system
 from dfm_tools.hydrolib_helpers import get_ncbnd_construct
+from dfm_tools.interpolate_grid2bnd import ext_add_boundary_object_per_polyline
 
 __all__ = [
     "cmems_nc_to_bc",
@@ -22,11 +23,10 @@
 
 def cmems_nc_to_bc(ext_bnd, list_quantities, tstart, tstop, file_pli, dir_pattern, dir_output, conversion_dict=None, refdate_str=None):
     #input examples in https://github.com/Deltares/dfm_tools/blob/main/tests/examples/preprocess_interpolate_nc_to_bc.py
-
+    # TODO: rename ext_bnd to ext_new for consistency
     if conversion_dict is None:
         conversion_dict = dfmt.get_conversion_dict()
 
-    file_bc_basename = os.path.basename(file_pli).replace('.pli','')
     for quantity in list_quantities:
         print(f'processing quantity: {quantity}')
 
@@ -39,22 +39,24 @@ def cmems_nc_to_bc(ext_bnd, list_quantities, tstart, tstop, file_pli, dir_patter
                                                tstart=tstart, tstop=tstop,
                                                conversion_dict=conversion_dict,
                                                refdate_str=refdate_str)
-        #interpolate regulargridDataset to plipointsDataset
-        data_interp = dfmt.interp_regularnc_to_plipoints(data_xr_reg=data_xr_vars, file_pli=file_pli)
+        # interpolate regulargridDataset to plipointsDataset
+        polyfile_obj = hcdfm.PolyFile(file_pli)
+        gdf_points = dfmt.PolyFile_to_geodataframe_points(polyfile_object=polyfile_obj)
+        data_interp = dfmt.interp_regularnc_to_plipointsDataset(data_xr_reg=data_xr_vars, gdf_points=gdf_points, load=True)
 
         #convert plipointsDataset to hydrolib ForcingModel
         ForcingModel_object = dfmt.plipointsDataset_to_ForcingModel(plipointsDataset=data_interp)
 
-        file_bc_out = os.path.join(dir_output,f'{quantity}_{file_bc_basename}_CMEMS.bc')
-
+        # generate boundary object for the ext file (quantity, pli-filename, bc-filename)
+        file_bc_out = os.path.join(dir_output,f'{quantity}_CMEMS.bc')
         ForcingModel_object.save(filepath=file_bc_out)
-
-        #generate boundary object for the ext file (quantity, pli-filename, bc-filename)
         boundary_object = hcdfm.Boundary(quantity=quantity,
-                                         locationfile=file_pli,
+                                         locationfile=file_pli, #placeholder, will be replaced later on
                                          forcingfile=ForcingModel_object)
-        ext_bnd.boundary.append(boundary_object)
-
+
+        # add the boundary object to the ext file for each polyline in the polyfile
+        ext_add_boundary_object_per_polyline(ext_new=ext_bnd, boundary_object=boundary_object)
+
     return ext_bnd
 
 

diff --git a/docs/notebooks/modelbuilder_example.ipynb b/docs/notebooks/modelbuilder_example.ipynb
diff --git a/docs/whats-new.md b/docs/whats-new.md
@@ -6,8 +6,10 @@
 - improved usability of `dfmt.LineBuilder()` in [#854](https://github.com/Deltares/dfm_tools/pull/854)
 - added workaround for grids that are not orthogonal after cutting the land with `dfmt.meshkernel_get_illegalcells()` in [#866](https://github.com/Deltares/dfm_tools/pull/866)
 - updated CMEMS bcg multiyear dataset name in [#880](https://github.com/Deltares/dfm_tools/pull/880)
-- added CMEMS reananalysis-interim (myint) datasets to `dfmt.download_CMEMS()` in [#883](https://github.com/Deltares/dfm_tools/pull/883)
+- added CMEMS reananalysis-interim (myint) datasets to `dfmt.download_CMEMS()` in [#883](https://github.com/Deltares/dfm_tools/pull/883) and [#903](https://github.com/Deltares/dfm_tools/pull/903)
 - avoid duplicate and empty polyline names in `dfmt.geodataframe_to_PolyFile()` in [#896](https://github.com/Deltares/dfm_tools/pull/896)
+- support for multiple polylines per polyfile in `interpolate_tide_to_bc()` and `cmems_nc_to_bc()` in [#906](https://github.com/Deltares/dfm_tools/pull/906)
+- add to ext as part of `interpolate_tide_to_bc()`, it now requires `ext_new` and has no return value anymore in [#906](https://github.com/Deltares/dfm_tools/pull/906)
 
 ### Fix
 - cleanups for datasets retrieved with `dfmt.ssh_retrieve_data()` in [#867](https://github.com/Deltares/dfm_tools/pull/867)

diff --git a/tests/examples/preprocess_interpolate_nc_to_bc.py b/tests/examples/preprocess_interpolate_nc_to_bc.py
@@ -91,10 +91,13 @@
                                                    tstart=tstart, tstop=tstop,
                                                    conversion_dict=conversion_dict,
                                                    refdate_str=refdate_str)
-            #interpolate regulargridDataset to plipointsDataset
-            data_interp = dfmt.interp_regularnc_to_plipoints(data_xr_reg=data_xr_vars, file_pli=file_pli, #TODO: difference in .interp() with float vs da arguments: https://github.com/Deltares/dfm_tools/issues/287
-                                                             nPoints=nPoints) #argument for testing
-            #data_interp = data_interp.ffill(dim="node").bfill(dim="node") #to fill allnan plipoints with values from the neighbour point #TODO: this also fills the belowbed layers from one point onto another, so should be done after ffill/bfill in depth dimension. Currently all-nan arrays are replaced with .fillna(0)
+            # read polyfile as geodataframe
+            polyfile_object = hcdfm.PolyFile(file_pli)
+            gdf_points_all = dfmt.PolyFile_to_geodataframe_points(polyfile_object)
+            gdf_points = gdf_points_all.iloc[:nPoints]
+            # interpolate regulargridDataset to plipointsDataset
+            data_interp = dfmt.interp_regularnc_to_plipointsDataset(data_xr_reg=data_xr_vars, gdf_points=gdf_points, load=True)
+            # data_interp = data_interp.ffill(dim="node").bfill(dim="node") #to fill allnan plipoints with values from the neighbour point #TODO: this also fills the belowbed layers from one point onto another, so should be done after ffill/bfill in depth dimension. Currently all-nan arrays are replaced with .fillna(0)
 
             #convert plipointsDataset to hydrolib ForcingModel
             ForcingModel_object = dfmt.plipointsDataset_to_ForcingModel(plipointsDataset=data_interp)

diff --git a/tests/examples/preprocess_modelbuilder.py b/tests/examples/preprocess_modelbuilder.py
@@ -117,22 +117,15 @@
 ext_new = hcdfm.ExtModel()
 
 # FES2014 tidal components bc file
-file_bc_basename = os.path.basename(poly_file).replace('.pli','')
 tidemodel = 'EOT20' # tidemodel: FES2014, FES2012, EOT20, GTSMv4.1, GTSMv4.1_opendap
-ForcingModel_object = dfmt.interpolate_tide_to_bc(tidemodel=tidemodel, file_pli=poly_file, component_list=None)
-file_bc_out = os.path.join(dir_output,f'tide_{file_bc_basename}_{tidemodel}.bc')
-ForcingModel_object.save(filepath=file_bc_out)
-boundary_object = hcdfm.Boundary(quantity='waterlevelbnd', #the FM quantity for tide is also waterlevelbnd
-                                 locationfile=poly_file,
-                                 forcingfile=ForcingModel_object)
-ext_new.boundary.append(boundary_object)
+dfmt.interpolate_tide_to_bc(ext_new=ext_new, tidemodel=tidemodel, file_pli=poly_file, component_list=None)
 
 # CMEMS - download
 # you can also add WAQ variables like 'no3' and 'phyc'
 # check dfmt.get_conversion_dict() for an overview of parameter/quantity names
 dir_output_data_cmems = os.path.join(dir_output_data, 'cmems')
 os.makedirs(dir_output_data_cmems, exist_ok=True)
-for varkey in ['zos','so','thetao','uo','vo','no3','phyc']:
+for varkey in ['zos','so','thetao','uo','vo','no3']:#,'phyc']: # TODO: phyc not available in reanalysis: https://github.com/Deltares/dfm_tools/issues/847
     dfmt.download_CMEMS(varkey=varkey,
                         longitude_min=lon_min, longitude_max=lon_max, latitude_min=lat_min, latitude_max=lat_max,
                         date_min=date_min, date_max=date_max,
@@ -144,7 +137,7 @@
 # when supplying two waterlevelbnds to FM (tide and steric) with other quantities in between, dimrset>=2.24.00 is required
 # or else "ERROR  : update_ghostboundvals: not all ghost boundary flowlinks are being updated" is raised (https://issuetracker.deltares.nl/browse/UNST-7011).
 # Two waterlevelbnds need to share same physical plifile in order to be appended (https://issuetracker.deltares.nl/browse/UNST-5320).
-list_quantities = ['waterlevelbnd','salinitybnd','temperaturebnd','uxuyadvectionvelocitybnd','tracerbndNO3','tracerbndPON1']
+list_quantities = ['waterlevelbnd','salinitybnd','temperaturebnd','uxuyadvectionvelocitybnd','tracerbndNO3']#,'tracerbndPON1']
 dir_pattern = os.path.join(dir_output_data_cmems,'cmems_{ncvarname}_*.nc')
 ext_new = dfmt.cmems_nc_to_bc(ext_bnd=ext_new,
                               refdate_str=f'minutes since {ref_date} 00:00:00 +00:00',

diff --git a/tests/test_hydrolib_helpers.py b/tests/test_hydrolib_helpers.py
@@ -83,15 +83,15 @@ def test_geodataframe_to_PolyFile_name_invalidtype(bnd_gdf):
 def test_geodataframe_to_PolyFile_name_incorrect(bnd_gdf):
     with pytest.raises(ValueError) as e:
         dfmt.geodataframe_to_PolyFile(bnd_gdf, name='1')
-    assert 'name should start with a letter' in str(e.value)
+    assert 'names in polyfile do not all start with a letter' in str(e.value)
 
     with pytest.raises(ValueError) as e:
         dfmt.geodataframe_to_PolyFile(bnd_gdf, name='-')
-    assert 'name should start with a letter' in str(e.value)
+    assert 'names in polyfile do not all start with a letter' in str(e.value)
 
     with pytest.raises(ValueError) as e:
         dfmt.geodataframe_to_PolyFile(bnd_gdf, name='')
-    assert 'name is not allowed to be an empty string' in str(e.value)
+    assert 'names in polyfile do not all start with a letter' in str(e.value)
 
 
 @pytest.mark.unittest